sley_diff_merge/
lib.rs

1use sley_core::{GitError, ObjectFormat, ObjectId, RepoPath, Result, object_id_for_bytes};
2
3pub mod render;
4pub mod ws;
5
6pub use sley_core::BString;
7use sley_index::{BorrowedIndex, Index, IndexStatCache};
8use sley_object::{Commit, EncodedObject, ObjectType, Tree, TreeEntries, TreeEntry};
9use sley_odb::{FileObjectDatabase, ObjectReader, ObjectWriter};
10use sley_refs::{FileRefStore, RefTarget};
11use std::collections::{BTreeMap, BTreeSet, HashMap};
12use std::fs;
13use std::path::{Path, PathBuf};
14
15// ===========================================================================
16// Gitlink (submodule) resolution helpers.
17//
18// A gitlink is a mode-160000 tree/index entry whose oid names the commit an
19// embedded repository has checked out. These helpers resolve, for a directory
20// in the working tree, (a) the embedded repository's git directory — either a
21// `.git` directory or a `.git` *file* carrying a `gitdir: <path>` pointer (the
22// layout `git submodule add`/`update` creates, pointing into the
23// superproject's `.git/modules/<name>`) — and (b) the commit its HEAD names.
24// They are the native equivalent of upstream's `resolve_gitlink_ref()`.
25// ===========================================================================
26
27/// Resolve the git directory of an embedded repository whose working tree is
28/// at `sub_root`. A `.git` directory is returned as-is; a `.git` file is
29/// followed through its `gitdir: <path>` pointer (a relative pointer resolves
30/// against `sub_root`). Returns `None` when there is no `.git` entry or the
31/// pointer does not name an existing directory.
32pub fn gitlink_git_dir(sub_root: &Path) -> Option<PathBuf> {
33    let dot_git = sub_root.join(".git");
34    let metadata = fs::symlink_metadata(&dot_git).ok()?;
35    if metadata.is_dir() {
36        return Some(dot_git);
37    }
38    if !metadata.is_file() {
39        return None;
40    }
41    let contents = fs::read_to_string(&dot_git).ok()?;
42    let target = contents.strip_prefix("gitdir:")?.trim();
43    if target.is_empty() {
44        return None;
45    }
46    let target = PathBuf::from(target);
47    let git_dir = if target.is_absolute() {
48        target
49    } else {
50        sub_root.join(target)
51    };
52    if git_dir.is_dir() {
53        Some(git_dir)
54    } else {
55        None
56    }
57}
58
59/// Resolve the commit checked out in the embedded repository at `sub_root`
60/// (the value a gitlink entry for that path records): its git directory's
61/// HEAD, followed through symbolic refs. `None` when `sub_root` is not a
62/// repository or its HEAD does not resolve to a commit (e.g. an unborn
63/// branch) — upstream's `resolve_gitlink_ref() < 0` case.
64pub fn gitlink_head_oid(sub_root: &Path, format: ObjectFormat) -> Option<ObjectId> {
65    let git_dir = gitlink_git_dir(sub_root)?;
66    let store = FileRefStore::new(&git_dir, format);
67    let mut target = store.read_ref("HEAD").ok()??;
68    // Follow symbolic-ref chains defensively (git caps the depth too).
69    for _ in 0..10 {
70        match target {
71            RefTarget::Direct(oid) => return Some(oid),
72            RefTarget::Symbolic(name) => target = store.read_ref(&name).ok()??,
73        }
74    }
75    None
76}
77
78// ===========================================================================
79// Line-level diff (Myers O(ND)) and 3-way blob merge (diff3).
80//
81// These operate purely on in-memory blobs and never touch the ODB or the
82// filesystem. They are the engine the CLI layers `git merge`, `cherry-pick`,
83// and `revert` on top of.
84// ===========================================================================
85
86/// A single line of a blob, slicing into the original buffer.
87///
88/// `content` includes the line's own trailing newline byte when present;
89/// `has_newline` records whether this line ended with `\n` in the source. Only
90/// the final line of a blob can have `has_newline == false` (a file with "no
91/// newline at end of file"). Comparing two `DiffLine`s for equality compares
92/// both the bytes and the trailing-newline flag, so a line that gained or lost
93/// its terminating newline is treated as a real change, matching git.
94#[derive(Debug, Clone, Copy, PartialEq, Eq)]
95pub struct DiffLine<'a> {
96    /// The raw bytes of the line, including the trailing `\n` if it had one.
97    pub content: &'a [u8],
98    /// Whether the line was terminated by a newline in the source blob.
99    pub has_newline: bool,
100}
101
102impl<'a> DiffLine<'a> {
103    /// The line bytes without any trailing newline.
104    pub fn bytes_without_newline(&self) -> &'a [u8] {
105        if self.has_newline {
106            self.content.strip_suffix(b"\n").unwrap_or(self.content)
107        } else {
108            self.content
109        }
110    }
111}
112
113/// Split a blob into lines, preserving the exact bytes of each line.
114///
115/// Each returned [`DiffLine`] borrows from `blob`; its `content` includes the
116/// terminating `\n`. The returned vector is empty for an empty blob. A blob
117/// whose final byte is not `\n` yields a final line with `has_newline ==
118/// false` — git's "\ No newline at end of file" case.
119pub fn split_lines(blob: &[u8]) -> Vec<DiffLine<'_>> {
120    let mut lines = Vec::new();
121    let mut start = 0usize;
122    let len = blob.len();
123    let mut idx = 0usize;
124    while idx < len {
125        if blob[idx] == b'\n' {
126            lines.push(DiffLine {
127                content: &blob[start..=idx],
128                has_newline: true,
129            });
130            idx += 1;
131            start = idx;
132        } else {
133            idx += 1;
134        }
135    }
136    if start < len {
137        lines.push(DiffLine {
138            content: &blob[start..len],
139            has_newline: false,
140        });
141    }
142    lines
143}
144
145/// A run-length entry in a Myers edit script.
146///
147/// Each variant carries the number of consecutive lines it applies to:
148/// - [`DiffOp::Equal`] — `n` lines common to both `old` and `new`.
149/// - [`DiffOp::Delete`] — `n` lines present in `old` but not `new`.
150/// - [`DiffOp::Insert`] — `n` lines present in `new` but not `old`.
151///
152/// Walking the script in order and consuming `old`/`new` lines accordingly
153/// reconstructs `new` from `old`.
154#[derive(Debug, Clone, Copy, PartialEq, Eq)]
155pub enum DiffOp {
156    /// `n` lines are identical in both sequences.
157    Equal(usize),
158    /// `n` lines are removed from the old sequence.
159    Delete(usize),
160    /// `n` lines are added in the new sequence.
161    Insert(usize),
162}
163
164/// Compute a minimal line-level edit script transforming `old` into `new`
165/// using Myers' O(ND) difference algorithm.
166///
167/// Lines are compared for equality by their full bytes (see [`DiffLine`]). The
168/// result is a coalesced sequence of [`DiffOp`] runs; consecutive ops of the
169/// same kind are merged so the script is compact. The script is a standard
170/// (shortest-edit-script) diff: the number of `Delete` + `Insert` lines is
171/// minimal.
172pub fn myers_diff_lines(old: &[DiffLine<'_>], new: &[DiffLine<'_>]) -> Vec<DiffOp> {
173    // Trim a common prefix and suffix first. This keeps the O(ND) search small
174    // for the typical case of a localized edit and does not affect minimality.
175    let n_total = old.len();
176    let m_total = new.len();
177    let mut prefix = 0usize;
178    while prefix < n_total && prefix < m_total && old[prefix] == new[prefix] {
179        prefix += 1;
180    }
181    let mut suffix = 0usize;
182    while suffix < n_total - prefix
183        && suffix < m_total - prefix
184        && old[n_total - 1 - suffix] == new[m_total - 1 - suffix]
185    {
186        suffix += 1;
187    }
188
189    let old_mid = &old[prefix..n_total - suffix];
190    let new_mid = &new[prefix..m_total - suffix];
191
192    let mut ops: Vec<DiffOp> = Vec::new();
193    if prefix > 0 {
194        ops.push(DiffOp::Equal(prefix));
195    }
196    myers_core(old_mid, new_mid, &mut ops);
197    if suffix > 0 {
198        ops.push(DiffOp::Equal(suffix));
199    }
200    coalesce_ops(ops)
201}
202
203/// Classic forward Myers O(ND) shortest-edit-script search over the trimmed
204/// sub-problem, followed by a backtrack through the stored traces.
205///
206/// `old`/`new` are the trimmed (no common prefix/suffix) line slices. Per-line
207/// ops are appended to `out` in order; they are coalesced by the caller. This
208/// is the algorithm from Myers' 1986 paper, which yields a shortest edit script
209/// (minimal number of insertions + deletions).
210fn myers_core(old: &[DiffLine<'_>], new: &[DiffLine<'_>], out: &mut Vec<DiffOp>) {
211    let n = old.len() as isize;
212    let m = new.len() as isize;
213    if n == 0 {
214        if m > 0 {
215            out.push(DiffOp::Insert(m as usize));
216        }
217        return;
218    }
219    if m == 0 {
220        out.push(DiffOp::Delete(n as usize));
221        return;
222    }
223
224    let max = (n + m) as usize;
225    let offset = max as isize; // shift so diagonal k maps to index (k + offset)
226    let width = 2 * max + 1;
227    // v[k + offset] holds the furthest-reaching x on diagonal k for the current d.
228    let mut v = vec![0isize; width];
229    // Save a snapshot of v after each d so we can backtrack the chosen path.
230    let mut trace: Vec<Vec<isize>> = Vec::new();
231
232    let mut found_d: Option<usize> = None;
233    'search: for d in 0..=(max as isize) {
234        trace.push(v.clone());
235        let mut k = -d;
236        while k <= d {
237            let kidx = (k + offset) as usize;
238            // Decide whether we arrived here by moving down (insert, from k+1)
239            // or right (delete, from k-1). Prefer the move that reaches further.
240            let mut x = if k == -d
241                || (k != d && v[(k - 1 + offset) as usize] < v[(k + 1 + offset) as usize])
242            {
243                // Move down: x stays, y increases (insertion from new).
244                v[(k + 1 + offset) as usize]
245            } else {
246                // Move right: x increases (deletion from old).
247                v[(k - 1 + offset) as usize] + 1
248            };
249            let mut y = x - k;
250            // Follow the diagonal (matching lines) as far as possible.
251            while x < n && y < m && old[x as usize] == new[y as usize] {
252                x += 1;
253                y += 1;
254            }
255            v[kidx] = x;
256            if x >= n && y >= m {
257                found_d = Some(d as usize);
258                break 'search;
259            }
260            k += 2;
261        }
262    }
263
264    // A shortest edit path always exists, so found_d is set; if somehow not,
265    // fall back to a delete-all/insert-all script (still correct, not minimal).
266    let Some(d_end) = found_d else {
267        out.push(DiffOp::Delete(n as usize));
268        out.push(DiffOp::Insert(m as usize));
269        return;
270    };
271
272    backtrack(n, m, &trace, d_end, offset, out);
273}
274
275/// Reconstruct the edit script from the saved Myers traces.
276///
277/// Walks backward from `(n, m)` to `(0, 0)`, emitting per-line `Delete`,
278/// `Insert`, and `Equal` ops, then reverses them into forward order before
279/// appending to `out`. `n`/`m` are the lengths of the (trimmed) old/new slices.
280fn backtrack(
281    n: isize,
282    m: isize,
283    trace: &[Vec<isize>],
284    d_end: usize,
285    offset: isize,
286    out: &mut Vec<DiffOp>,
287) {
288    let mut x = n;
289    let mut y = m;
290    let mut rev: Vec<DiffOp> = Vec::new();
291
292    for d in (0..=d_end).rev() {
293        let v = &trace[d];
294        let k = x - y;
295        // Determine the predecessor diagonal, mirroring the forward step rule.
296        let prev_k = if k == -(d as isize)
297            || (k != d as isize && v[(k - 1 + offset) as usize] < v[(k + 1 + offset) as usize])
298        {
299            k + 1 // came from a down move (insert)
300        } else {
301            k - 1 // came from a right move (delete)
302        };
303        let prev_x = v[(prev_k + offset) as usize];
304        let prev_y = prev_x - prev_k;
305
306        // Emit the diagonal (equal) moves taken after reaching the predecessor.
307        while x > prev_x && y > prev_y {
308            rev.push(DiffOp::Equal(1));
309            x -= 1;
310            y -= 1;
311        }
312        if d > 0 {
313            if x == prev_x {
314                // Down move: an insertion of new[prev_y].
315                rev.push(DiffOp::Insert(1));
316            } else {
317                // Right move: a deletion of old[prev_x].
318                rev.push(DiffOp::Delete(1));
319            }
320            x = prev_x;
321            y = prev_y;
322        }
323    }
324
325    rev.reverse();
326    out.extend(rev);
327}
328
329/// Merge adjacent ops of the same kind so the script is compact.
330fn coalesce_ops(ops: Vec<DiffOp>) -> Vec<DiffOp> {
331    let mut out: Vec<DiffOp> = Vec::with_capacity(ops.len());
332    for op in ops {
333        match (out.last_mut(), op) {
334            (Some(DiffOp::Equal(prev)), DiffOp::Equal(n)) => *prev += n,
335            (Some(DiffOp::Delete(prev)), DiffOp::Delete(n)) => *prev += n,
336            (Some(DiffOp::Insert(prev)), DiffOp::Insert(n)) => *prev += n,
337            _ => out.push(op),
338        }
339    }
340    out
341}
342
343// ===========================================================================
344// Alternative diff algorithms: patience and histogram.
345//
346// Both share the recursive "anchor and recurse" shape used by git's xdiff
347// implementations of `--patience` and `--histogram`:
348//
349//   1. trim the common prefix and suffix of the current line range,
350//   2. pick one or more common lines that are confidently aligned (the
351//      "anchors") according to the algorithm's rule,
352//   3. recurse on the gaps to the left of, between, and to the right of the
353//      anchors,
354//   4. when no anchor can be found, fall back to the Myers shortest-edit-script
355//      search for that range so the result is still a valid LCS-correct diff.
356//
357// They operate purely on slices of [`DiffLine`]s and emit the same coalesced
358// [`DiffOp`] run sequence as [`myers_diff_lines`], so any caller can swap
359// algorithms freely. The two functions differ only in the anchor-selection
360// rule in steps 2/3.
361// ===========================================================================
362
363/// A hashable key for a line, used to bucket equal lines when finding anchors.
364///
365/// Mirrors [`DiffLine`]'s `PartialEq`: two lines are the same iff their bytes
366/// and their trailing-newline flag match. Keying on this tuple lets us hash
367/// lines without changing the public [`DiffLine`] type.
368type LineKey<'a> = (&'a [u8], bool);
369
370#[inline]
371fn line_key<'a>(line: &DiffLine<'a>) -> LineKey<'a> {
372    (line.content, line.has_newline)
373}
374
375/// Compute a line-level edit script transforming `old` into `new` using the
376/// patience diff algorithm (Bram Cohen's algorithm, as in `git diff
377/// --patience`).
378///
379/// Patience diff anchors on lines that occur *exactly once* in both `old` and
380/// `new`; it aligns those unique lines via a longest-increasing-subsequence
381/// ("patience sorting") pass and recurses into the gaps, falling back to Myers
382/// when a gap has no unique common line. The result is a valid LCS-correct edit
383/// script with the same shape as [`myers_diff_lines`]: walking it reconstructs
384/// `new` from `old`, and every [`DiffOp::Equal`] run covers genuinely equal
385/// lines. Patience tends to produce more human-readable hunks than Myers when
386/// blocks of lines are moved or repeated, though it is not guaranteed to be a
387/// shortest edit script.
388pub fn patience_diff_lines(old: &[DiffLine<'_>], new: &[DiffLine<'_>]) -> Vec<DiffOp> {
389    let mut ops: Vec<DiffOp> = Vec::new();
390    patience_recurse(old, new, 0, old.len(), 0, new.len(), &mut ops);
391    coalesce_ops(ops)
392}
393
394/// Compute a line-level edit script transforming `old` into `new` using the
395/// histogram diff algorithm (as in `git diff --histogram`, derived from JGit).
396///
397/// Histogram diff is a patience-style unique-anchor algorithm with a fallback:
398/// it builds an occurrence histogram of `old` and, scanning `new`, picks the
399/// longest run of matching lines whose `old` line has the *fewest* occurrences
400/// (preferring truly unique lines, like patience, but still able to anchor on
401/// low-frequency lines when no globally-unique line exists). It then recurses
402/// on the regions on either side of that run, falling back to Myers only when
403/// no common line exists in a region. The result is a valid LCS-correct edit
404/// script with the same shape as [`myers_diff_lines`].
405pub fn histogram_diff_lines(old: &[DiffLine<'_>], new: &[DiffLine<'_>]) -> Vec<DiffOp> {
406    let mut ops: Vec<DiffOp> = Vec::new();
407    histogram_recurse(old, new, 0, old.len(), 0, new.len(), &mut ops);
408    coalesce_ops(ops)
409}
410
411/// Dispatch to the line-diff implementation selected by `algorithm`.
412///
413/// All variants return the same coalesced [`DiffOp`] run sequence as
414/// [`myers_diff_lines`], so callers can switch algorithms without changing how
415/// they consume the result.
416///
417/// - [`DiffAlgorithm::Myers`] and [`DiffAlgorithm::Minimal`] use the Myers
418///   O(ND) shortest-edit-script search ([`myers_diff_lines`]); that search is
419///   already minimal in deletions + insertions, so `Minimal` is an alias for
420///   it here rather than a distinct slower mode.
421/// - [`DiffAlgorithm::Patience`] uses [`patience_diff_lines`].
422/// - [`DiffAlgorithm::Histogram`] uses [`histogram_diff_lines`].
423pub fn diff_lines_with_algorithm(
424    old: &[DiffLine<'_>],
425    new: &[DiffLine<'_>],
426    algorithm: DiffAlgorithm,
427) -> Vec<DiffOp> {
428    match algorithm {
429        DiffAlgorithm::Myers | DiffAlgorithm::Minimal => myers_diff_lines(old, new),
430        DiffAlgorithm::Patience => patience_diff_lines(old, new),
431        DiffAlgorithm::Histogram => histogram_diff_lines(old, new),
432    }
433}
434
435/// Emit ops for an empty-on-one-side range; returns `true` if it handled it.
436///
437/// Covers the recursion base cases where one side of `old[a0..a1]` /
438/// `new[b0..b1]` is empty: a pure deletion, a pure insertion, or nothing at
439/// all. Used by both the patience and histogram recursions before they look
440/// for an anchor.
441fn emit_trivial_range(a0: usize, a1: usize, b0: usize, b1: usize, out: &mut Vec<DiffOp>) -> bool {
442    let old_len = a1 - a0;
443    let new_len = b1 - b0;
444    if old_len == 0 && new_len == 0 {
445        return true;
446    }
447    if old_len == 0 {
448        out.push(DiffOp::Insert(new_len));
449        return true;
450    }
451    if new_len == 0 {
452        out.push(DiffOp::Delete(old_len));
453        return true;
454    }
455    false
456}
457
458/// Trim the common prefix/suffix of `old[a0..a1]` vs `new[b0..b1]`.
459///
460/// Emits an `Equal` for the matched prefix immediately, returns the inner
461/// (still-differing) range, and reports the matched-suffix length so the caller
462/// can emit its `Equal` *after* it has processed the inner range. This keeps
463/// the per-range work proportional to the actual edit, mirroring the prefix /
464/// suffix trim in [`myers_diff_lines`].
465fn trim_common(
466    old: &[DiffLine<'_>],
467    new: &[DiffLine<'_>],
468    mut a0: usize,
469    mut a1: usize,
470    mut b0: usize,
471    mut b1: usize,
472    out: &mut Vec<DiffOp>,
473) -> (usize, usize, usize, usize, usize) {
474    let mut prefix = 0usize;
475    while a0 < a1 && b0 < b1 && old[a0] == new[b0] {
476        a0 += 1;
477        b0 += 1;
478        prefix += 1;
479    }
480    if prefix > 0 {
481        out.push(DiffOp::Equal(prefix));
482    }
483    let mut suffix = 0usize;
484    while a1 > a0 && b1 > b0 && old[a1 - 1] == new[b1 - 1] {
485        a1 -= 1;
486        b1 -= 1;
487        suffix += 1;
488    }
489    (a0, a1, b0, b1, suffix)
490}
491
492/// Recursive patience-diff worker over `old[a0..a1]` vs `new[b0..b1]`.
493fn patience_recurse(
494    old: &[DiffLine<'_>],
495    new: &[DiffLine<'_>],
496    a0: usize,
497    a1: usize,
498    b0: usize,
499    b1: usize,
500    out: &mut Vec<DiffOp>,
501) {
502    if emit_trivial_range(a0, a1, b0, b1, out) {
503        return;
504    }
505    let (a0, a1, b0, b1, suffix) = trim_common(old, new, a0, a1, b0, b1, out);
506    if !emit_trivial_range(a0, a1, b0, b1, out) {
507        match patience_anchors(old, new, a0, a1, b0, b1) {
508            Some(anchors) => {
509                // Walk the aligned anchors in order, recursing into each gap
510                // before emitting the anchor line as Equal.
511                let mut cur_a = a0;
512                let mut cur_b = b0;
513                for (ai, bi) in anchors {
514                    patience_recurse(old, new, cur_a, ai, cur_b, bi, out);
515                    out.push(DiffOp::Equal(1));
516                    cur_a = ai + 1;
517                    cur_b = bi + 1;
518                }
519                // Tail after the last anchor.
520                patience_recurse(old, new, cur_a, a1, cur_b, b1, out);
521            }
522            // No unique common line in this range: defer to Myers, which always
523            // yields a valid (and minimal) script for the leftover block.
524            None => myers_core(&old[a0..a1], &new[b0..b1], out),
525        }
526    }
527    if suffix > 0 {
528        out.push(DiffOp::Equal(suffix));
529    }
530}
531
532/// Find the patience anchors for `old[a0..a1]` vs `new[b0..b1]`.
533///
534/// An anchor is a line that occurs exactly once in `old[a0..a1]` and exactly
535/// once in `new[b0..b1]`. The matched (old_index, new_index) pairs are reduced
536/// to their longest increasing subsequence by new-index (the patience-sort LCS)
537/// so the returned anchors are strictly increasing in *both* indices and can be
538/// used as split points. Returns `None` when there are no such unique common
539/// lines (the caller then falls back to Myers).
540fn patience_anchors(
541    old: &[DiffLine<'_>],
542    new: &[DiffLine<'_>],
543    a0: usize,
544    a1: usize,
545    b0: usize,
546    b1: usize,
547) -> Option<Vec<(usize, usize)>> {
548    // Count occurrences and remember the (single) position of each line in each
549    // side's range. `count > 1` poisons the position so we can ignore it.
550    struct Occ {
551        count: usize,
552        pos: usize,
553    }
554    let mut in_old: HashMap<LineKey<'_>, Occ> = HashMap::new();
555    for (i, line) in old.iter().enumerate().take(a1).skip(a0) {
556        in_old
557            .entry(line_key(line))
558            .and_modify(|o| o.count += 1)
559            .or_insert(Occ { count: 1, pos: i });
560    }
561    let mut in_new: HashMap<LineKey<'_>, Occ> = HashMap::new();
562    for (j, line) in new.iter().enumerate().take(b1).skip(b0) {
563        in_new
564            .entry(line_key(line))
565            .and_modify(|o| o.count += 1)
566            .or_insert(Occ { count: 1, pos: j });
567    }
568
569    // Collect lines unique in both, ordered by their position in `old`.
570    let mut pairs: Vec<(usize, usize)> = Vec::new();
571    for (i, line) in old.iter().enumerate().take(a1).skip(a0) {
572        let key = line_key(line);
573        let Some(o) = in_old.get(&key) else { continue };
574        if o.count != 1 || o.pos != i {
575            continue;
576        }
577        // A line unique in both ranges is a candidate anchor.
578        if let Some(n) = in_new.get(&key)
579            && n.count == 1
580        {
581            pairs.push((i, n.pos));
582        }
583    }
584    if pairs.is_empty() {
585        return None;
586    }
587
588    // Patience sort: longest increasing subsequence of new-indices. `pairs` is
589    // already sorted by old-index, so an LIS by new-index yields a set of
590    // anchors increasing in both coordinates.
591    let lis = longest_increasing_by_new(&pairs);
592    if lis.is_empty() { None } else { Some(lis) }
593}
594
595/// Longest increasing subsequence of `pairs` (sorted by old-index) keyed on the
596/// new-index, returned as the chosen (old_index, new_index) pairs in order.
597///
598/// This is the patience-sorting core: standard O(k log k) LIS with predecessor
599/// links so the actual subsequence (not just its length) is recovered. Because
600/// the input is pre-sorted by old-index and the new-indices are distinct, the
601/// result is strictly increasing in both coordinates.
602fn longest_increasing_by_new(pairs: &[(usize, usize)]) -> Vec<(usize, usize)> {
603    if pairs.is_empty() {
604        return Vec::new();
605    }
606    // tails[len-1] = index into `pairs` of the smallest possible tail value of
607    // an increasing subsequence of length `len`.
608    let mut tails: Vec<usize> = Vec::new();
609    // prev[i] = index into `pairs` of the predecessor of pairs[i] in its LIS.
610    let mut prev: Vec<Option<usize>> = vec![None; pairs.len()];
611
612    for i in 0..pairs.len() {
613        let val = pairs[i].1;
614        // Binary search for the first tail whose new-index is >= val.
615        let mut lo = 0usize;
616        let mut hi = tails.len();
617        while lo < hi {
618            let mid = lo + (hi - lo) / 2;
619            if pairs[tails[mid]].1 < val {
620                lo = mid + 1;
621            } else {
622                hi = mid;
623            }
624        }
625        if lo > 0 {
626            prev[i] = Some(tails[lo - 1]);
627        }
628        if lo == tails.len() {
629            tails.push(i);
630        } else {
631            tails[lo] = i;
632        }
633    }
634
635    // Reconstruct by following predecessor links from the last tail.
636    let mut result: Vec<(usize, usize)> = Vec::with_capacity(tails.len());
637    let mut cur = tails.last().copied();
638    while let Some(i) = cur {
639        result.push(pairs[i]);
640        cur = prev[i];
641    }
642    result.reverse();
643    result
644}
645
646/// Recursive histogram-diff worker over `old[a0..a1]` vs `new[b0..b1]`.
647fn histogram_recurse(
648    old: &[DiffLine<'_>],
649    new: &[DiffLine<'_>],
650    a0: usize,
651    a1: usize,
652    b0: usize,
653    b1: usize,
654    out: &mut Vec<DiffOp>,
655) {
656    if emit_trivial_range(a0, a1, b0, b1, out) {
657        return;
658    }
659    let (a0, a1, b0, b1, suffix) = trim_common(old, new, a0, a1, b0, b1, out);
660    if !emit_trivial_range(a0, a1, b0, b1, out) {
661        match histogram_region(old, new, a0, a1, b0, b1) {
662            Some(region) => {
663                // Recurse left of the matched run, emit the run as Equal, then
664                // recurse right of it.
665                histogram_recurse(old, new, a0, region.old_start, b0, region.new_start, out);
666                out.push(DiffOp::Equal(region.len));
667                histogram_recurse(
668                    old,
669                    new,
670                    region.old_start + region.len,
671                    a1,
672                    region.new_start + region.len,
673                    b1,
674                    out,
675                );
676            }
677            // No common line at all in this range: hand it to Myers.
678            None => myers_core(&old[a0..a1], &new[b0..b1], out),
679        }
680    }
681    if suffix > 0 {
682        out.push(DiffOp::Equal(suffix));
683    }
684}
685
686/// The longest common run chosen by the histogram heuristic for one range.
687struct HistogramRegion {
688    old_start: usize,
689    new_start: usize,
690    len: usize,
691}
692
693/// Choose the histogram anchor run for `old[a0..a1]` vs `new[b0..b1]`.
694///
695/// Builds an occurrence histogram of the `old` range, then scans the `new`
696/// range. For each `new` line that also appears in `old`, it extends a matching
697/// run backward and forward and scores candidate alignments, preferring the run
698/// whose anchoring `old` line has the *fewest* occurrences (ties broken by run
699/// length, then by earliest position). This is the JGit/`git --histogram`
700/// heuristic: rare lines make the most reliable anchors. Returns `None` if no
701/// `new` line appears in the `old` range.
702fn histogram_region(
703    old: &[DiffLine<'_>],
704    new: &[DiffLine<'_>],
705    a0: usize,
706    a1: usize,
707    b0: usize,
708    b1: usize,
709) -> Option<HistogramRegion> {
710    // Occurrence count and the list of positions of each line within old[a0..a1].
711    let mut buckets: HashMap<LineKey<'_>, Vec<usize>> = HashMap::new();
712    for (i, line) in old.iter().enumerate().take(a1).skip(a0) {
713        buckets.entry(line_key(line)).or_default().push(i);
714    }
715
716    let mut best: Option<HistogramRegion> = None;
717    // Lower occurrence count is better; among equal counts, longer run wins.
718    let mut best_count = usize::MAX;
719    let mut best_len = 0usize;
720
721    let mut bj = b0;
722    while bj < b1 {
723        let key = line_key(&new[bj]);
724        let Some(positions) = buckets.get(&key) else {
725            bj += 1;
726            continue;
727        };
728        let occ = positions.len();
729        // For every place this line sits in `old`, measure the maximal matching
730        // run that passes through (positions[*], bj).
731        let mut next_bj = bj + 1;
732        for &ai in positions {
733            // Extend backward while lines keep matching and we stay in range.
734            let mut start_a = ai;
735            let mut start_b = bj;
736            while start_a > a0 && start_b > b0 && old[start_a - 1] == new[start_b - 1] {
737                start_a -= 1;
738                start_b -= 1;
739            }
740            // Extend forward from the run start.
741            let mut len = 0usize;
742            while start_a + len < a1
743                && start_b + len < b1
744                && old[start_a + len] == new[start_b + len]
745            {
746                len += 1;
747            }
748            // Score this run by the rarest occurrence count along it; using the
749            // anchor line's own count is the standard, cheaper approximation.
750            let run_count = occ;
751            let better = run_count < best_count || (run_count == best_count && len > best_len);
752            if better && len > 0 {
753                best_count = run_count;
754                best_len = len;
755                best = Some(HistogramRegion {
756                    old_start: start_a,
757                    new_start: start_b,
758                    len,
759                });
760                // Skip past this matched run in `new` so we do not re-evaluate
761                // every interior line of the same run from scratch.
762                if start_b + len > next_bj {
763                    next_bj = start_b + len;
764                }
765            }
766        }
767        bj = next_bj.max(bj + 1);
768    }
769
770    best
771}
772
773/// Which conflict-marker style [`merge_blobs`] emits.
774#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
775pub enum ConflictStyle {
776    /// Standard two-section markers (`<<<<<<<` / `=======` / `>>>>>>>`).
777    #[default]
778    Merge,
779    /// `diff3` style: also include the common-ancestor section between `ours`
780    /// and the `=======` divider, delimited by `|||||||`.
781    Diff3,
782}
783
784/// Labels and style controlling [`merge_blobs`] conflict markers.
785#[derive(Debug, Clone, Copy)]
786pub struct MergeBlobOptions<'a> {
787    /// Label after the opening `<<<<<<<` marker (typically the local branch).
788    pub ours_label: &'a str,
789    /// Label after the closing `>>>>>>>` marker (typically the other branch).
790    pub theirs_label: &'a str,
791    /// Label after the `|||||||` marker (only used for [`ConflictStyle::Diff3`]).
792    pub base_label: &'a str,
793    /// Which marker style to emit.
794    pub style: ConflictStyle,
795}
796
797impl Default for MergeBlobOptions<'_> {
798    fn default() -> Self {
799        Self {
800            ours_label: "ours",
801            theirs_label: "theirs",
802            base_label: "base",
803            style: ConflictStyle::Merge,
804        }
805    }
806}
807
808/// The outcome of a 3-way blob merge.
809#[derive(Debug, Clone, PartialEq, Eq)]
810pub struct MergeBlobResult {
811    /// The merged blob bytes, including any conflict markers.
812    pub content: Vec<u8>,
813    /// True when at least one region conflicted and markers were written.
814    pub conflicted: bool,
815}
816
817/// Perform a 3-way merge of three blobs using the diff3 algorithm.
818///
819/// `base` is the common ancestor; `ours` and `theirs` are the two sides. The
820/// merge diffs base→ours and base→theirs (with [`myers_diff_lines`]) and walks
821/// the base in lockstep:
822/// - regions unchanged on both sides emit the base lines unchanged;
823/// - regions changed on exactly one side take that side's lines;
824/// - regions changed on both sides emit the side lines if they are
825///   byte-identical, otherwise a conflict (and [`MergeBlobResult::conflicted`]
826///   is set).
827///
828/// An empty `base` is supported: every line is then "added on both sides", so
829/// the result is the shared content if `ours == theirs`, else a single
830/// conflict (add/add).
831pub fn merge_blobs(
832    base: &[u8],
833    ours: &[u8],
834    theirs: &[u8],
835    options: &MergeBlobOptions<'_>,
836) -> MergeBlobResult {
837    let base_lines = split_lines(base);
838    let ours_lines = split_lines(ours);
839    let theirs_lines = split_lines(theirs);
840
841    // Per-side matched (equal) base regions, paired with the corresponding side
842    // ranges, computed via Myers.
843    let ours_matches = matching_regions(&base_lines, &ours_lines);
844    let theirs_matches = matching_regions(&base_lines, &theirs_lines);
845
846    // Intersect the two match lists to get segments of base that are unchanged
847    // on BOTH sides, each carrying the exact aligned side indices. Between these
848    // common-stable segments lie the (potentially conflicting) changed regions.
849    let stable = common_stable_segments(&ours_matches, &theirs_matches);
850
851    let mut writer = MergeWriter::new(options);
852    // Cursors: next unconsumed line in base, ours, theirs.
853    let mut base_idx = 0usize;
854    let mut our_idx = 0usize;
855    let mut their_idx = 0usize;
856
857    for seg in &stable {
858        // Unstable (changed) region preceding this stable segment.
859        let base_region = &base_lines[base_idx..seg.base_start];
860        let our_region = &ours_lines[our_idx..seg.ours_start];
861        let their_region = &theirs_lines[their_idx..seg.theirs_start];
862        emit_region(&mut writer, base_region, our_region, their_region);
863
864        // The stable segment itself is identical on all three: emit base lines.
865        writer.emit_lines(&base_lines[seg.base_start..seg.base_start + seg.len]);
866
867        base_idx = seg.base_start + seg.len;
868        our_idx = seg.ours_start + seg.len;
869        their_idx = seg.theirs_start + seg.len;
870    }
871
872    // Trailing unstable region after the last stable segment (or the whole input
873    // when there are no common-stable segments).
874    emit_region(
875        &mut writer,
876        &base_lines[base_idx..],
877        &ours_lines[our_idx..],
878        &theirs_lines[their_idx..],
879    );
880
881    writer.finish()
882}
883
884/// Resolve and emit one changed region (the gap between two common-stable
885/// segments) according to diff3 rules.
886fn emit_region(
887    writer: &mut MergeWriter<'_>,
888    base_region: &[DiffLine<'_>],
889    our_region: &[DiffLine<'_>],
890    their_region: &[DiffLine<'_>],
891) {
892    if our_region.is_empty() && their_region.is_empty() {
893        return;
894    }
895    let our_changed = our_region != base_region;
896    let their_changed = their_region != base_region;
897    match (our_changed, their_changed) {
898        (false, false) => writer.emit_lines(base_region),
899        (true, false) => writer.emit_lines(our_region),
900        (false, true) => writer.emit_lines(their_region),
901        (true, true) => {
902            if our_region == their_region {
903                // Both sides made the same change: no conflict.
904                writer.emit_lines(our_region);
905            } else {
906                writer.emit_conflict(our_region, base_region, their_region);
907            }
908        }
909    }
910}
911
912/// A matched (equal) region between `base` and one side: `base_start..+len`
913/// lines of base equal `side_start..+len` lines of that side.
914#[derive(Debug, Clone, Copy)]
915struct MatchRegion {
916    base_start: usize,
917    side_start: usize,
918    len: usize,
919}
920
921/// A run of base lines unchanged on *both* sides, with the aligned side starts.
922#[derive(Debug, Clone, Copy)]
923struct StableSegment {
924    base_start: usize,
925    ours_start: usize,
926    theirs_start: usize,
927    len: usize,
928}
929
930/// Compute the matched regions between base and a side using [`myers_diff_lines`].
931///
932/// Each `Equal(n)` run becomes a [`MatchRegion`]; the regions are returned in
933/// increasing base order. (Equal runs are coalesced by the diff, so adjacent
934/// regions are already maximal.)
935fn matching_regions(base: &[DiffLine<'_>], side: &[DiffLine<'_>]) -> Vec<MatchRegion> {
936    let ops = myers_diff_lines(base, side);
937    let mut regions = Vec::new();
938    let mut base_idx = 0usize;
939    let mut side_idx = 0usize;
940    for op in ops {
941        match op {
942            DiffOp::Equal(n) => {
943                regions.push(MatchRegion {
944                    base_start: base_idx,
945                    side_start: side_idx,
946                    len: n,
947                });
948                base_idx += n;
949                side_idx += n;
950            }
951            DiffOp::Delete(n) => base_idx += n,
952            DiffOp::Insert(n) => side_idx += n,
953        }
954    }
955    regions
956}
957
958/// Intersect the ours/theirs match lists (both in base coordinates) to find the
959/// base ranges unchanged on both sides, recording the aligned side indices.
960///
961/// For each overlapping pair of base ranges `[bs, be)` the ours-side index of
962/// `bs` is `o.side_start + (bs - o.base_start)` and likewise for theirs; both
963/// map contiguously across the overlap. The returned segments are in increasing
964/// base order and never overlap.
965fn common_stable_segments(ours: &[MatchRegion], theirs: &[MatchRegion]) -> Vec<StableSegment> {
966    let mut segments = Vec::new();
967    let mut oi = 0usize;
968    let mut ti = 0usize;
969    while oi < ours.len() && ti < theirs.len() {
970        let o = ours[oi];
971        let t = theirs[ti];
972        let o_end = o.base_start + o.len;
973        let t_end = t.base_start + t.len;
974        let lo = o.base_start.max(t.base_start);
975        let hi = o_end.min(t_end);
976        if lo < hi {
977            segments.push(StableSegment {
978                base_start: lo,
979                ours_start: o.side_start + (lo - o.base_start),
980                theirs_start: t.side_start + (lo - t.base_start),
981                len: hi - lo,
982            });
983        }
984        // Advance whichever range ends first.
985        if o_end <= t_end {
986            oi += 1;
987        } else {
988            ti += 1;
989        }
990    }
991    segments
992}
993
994/// Accumulates merged output and renders conflict markers byte-for-byte like
995/// upstream git.
996struct MergeWriter<'a> {
997    out: Vec<u8>,
998    conflicted: bool,
999    options: &'a MergeBlobOptions<'a>,
1000}
1001
1002impl<'a> MergeWriter<'a> {
1003    fn new(options: &'a MergeBlobOptions<'a>) -> Self {
1004        Self {
1005            out: Vec::new(),
1006            conflicted: false,
1007            options,
1008        }
1009    }
1010
1011    /// Append raw line bytes (each line already carries its own newline, except
1012    /// possibly a final no-newline line).
1013    fn emit_lines(&mut self, lines: &[DiffLine<'_>]) {
1014        for line in lines {
1015            self.out.extend_from_slice(line.content);
1016        }
1017    }
1018
1019    /// Emit a conflict hunk. Conflict markers always begin on their own line,
1020    /// so if the preceding emitted content did not end in a newline (a
1021    /// no-newline-at-end side), insert one first — matching git, which prints
1022    /// the "\ No newline at end of file" content followed by a newline before
1023    /// the next marker.
1024    fn emit_conflict(
1025        &mut self,
1026        ours: &[DiffLine<'_>],
1027        base: &[DiffLine<'_>],
1028        theirs: &[DiffLine<'_>],
1029    ) {
1030        self.conflicted = true;
1031        self.write_marker(b'<', self.options.ours_label);
1032        self.emit_section(ours);
1033        if self.options.style == ConflictStyle::Diff3 {
1034            self.ensure_newline();
1035            self.write_marker(b'|', self.options.base_label);
1036            self.emit_section(base);
1037        }
1038        self.ensure_newline();
1039        self.write_divider();
1040        self.emit_section(theirs);
1041        self.ensure_newline();
1042        self.write_marker(b'>', self.options.theirs_label);
1043    }
1044
1045    /// Emit one side's lines inside a conflict, preserving their exact bytes.
1046    fn emit_section(&mut self, lines: &[DiffLine<'_>]) {
1047        for line in lines {
1048            self.out.extend_from_slice(line.content);
1049        }
1050    }
1051
1052    /// Ensure the buffer ends with a newline before writing the next marker, so
1053    /// markers always start a fresh line even after a no-newline final line.
1054    fn ensure_newline(&mut self) {
1055        if !self.out.is_empty() && self.out.last() != Some(&b'\n') {
1056            self.out.push(b'\n');
1057        }
1058    }
1059
1060    /// Write a marker line: 7 copies of `ch`, then (if the label is non-empty)
1061    /// a space and the label, then a newline. No trailing space for an empty
1062    /// label — byte-for-byte with upstream git.
1063    fn write_marker(&mut self, ch: u8, label: &str) {
1064        for _ in 0..7 {
1065            self.out.push(ch);
1066        }
1067        if !label.is_empty() {
1068            self.out.push(b' ');
1069            self.out.extend_from_slice(label.as_bytes());
1070        }
1071        self.out.push(b'\n');
1072    }
1073
1074    /// Write the `=======` divider line (never labelled).
1075    fn write_divider(&mut self) {
1076        for _ in 0..7 {
1077            self.out.push(b'=');
1078        }
1079        self.out.push(b'\n');
1080    }
1081
1082    fn finish(self) -> MergeBlobResult {
1083        MergeBlobResult {
1084            content: self.out,
1085            conflicted: self.conflicted,
1086        }
1087    }
1088}
1089
1090#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1091pub enum DiffAlgorithm {
1092    Myers,
1093    Minimal,
1094    Patience,
1095    Histogram,
1096}
1097
1098#[derive(Debug, Clone, PartialEq, Eq)]
1099pub enum FileChange {
1100    Add { path: RepoPath },
1101    Delete { path: RepoPath },
1102    Modify { path: RepoPath },
1103    Rename { old: RepoPath, new: RepoPath },
1104    Copy { source: RepoPath, dest: RepoPath },
1105}
1106
1107#[derive(Debug, Clone, PartialEq, Eq)]
1108pub struct Conflict {
1109    pub path: RepoPath,
1110    pub ours: Vec<u8>,
1111    pub theirs: Vec<u8>,
1112}
1113
1114#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1115pub enum NameStatus {
1116    Added,
1117    Deleted,
1118    Modified,
1119    Renamed(u8),
1120    Copied(u8),
1121}
1122
1123impl NameStatus {
1124    pub const fn code(self) -> char {
1125        match self {
1126            Self::Added => 'A',
1127            Self::Deleted => 'D',
1128            Self::Modified => 'M',
1129            Self::Renamed(_) => 'R',
1130            Self::Copied(_) => 'C',
1131        }
1132    }
1133
1134    pub fn label(self) -> String {
1135        match self {
1136            Self::Renamed(score) => format!("R{score:03}"),
1137            Self::Copied(score) => format!("C{score:03}"),
1138            _ => self.code().to_string(),
1139        }
1140    }
1141}
1142
1143#[derive(Debug, Clone, PartialEq, Eq)]
1144pub struct NameStatusEntry {
1145    pub status: NameStatus,
1146    pub path: BString,
1147    pub old_path: Option<BString>,
1148    pub old_mode: Option<u32>,
1149    pub new_mode: Option<u32>,
1150    pub old_oid: Option<ObjectId>,
1151    pub new_oid: Option<ObjectId>,
1152}
1153
1154impl NameStatusEntry {
1155    pub fn line(&self) -> String {
1156        if let Some(old_path) = &self.old_path {
1157            format!(
1158                "{}\t{}\t{}",
1159                self.status.label(),
1160                String::from_utf8_lossy(old_path.as_bytes()),
1161                String::from_utf8_lossy(self.path.as_bytes())
1162            )
1163        } else {
1164            format!(
1165                "{}\t{}",
1166                self.status.label(),
1167                String::from_utf8_lossy(self.path.as_bytes())
1168            )
1169        }
1170    }
1171}
1172
1173#[derive(Debug, Clone, PartialEq, Eq)]
1174pub struct IndexGitlinkEntry {
1175    pub path: BString,
1176    pub oid: ObjectId,
1177}
1178
1179#[derive(Debug, Clone, PartialEq, Eq)]
1180pub struct IndexWorktreeDiff {
1181    pub entries: Vec<NameStatusEntry>,
1182    pub staged_gitlinks: Vec<IndexGitlinkEntry>,
1183}
1184
1185#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1186pub struct DiffNameStatusOptions {
1187    pub detect_renames: bool,
1188    pub detect_copies: bool,
1189    pub find_copies_harder: bool,
1190    pub rename_empty: bool,
1191}
1192
1193impl Default for DiffNameStatusOptions {
1194    fn default() -> Self {
1195        Self {
1196            detect_renames: true,
1197            detect_copies: false,
1198            find_copies_harder: false,
1199            rename_empty: true,
1200        }
1201    }
1202}
1203
1204/// git's default minimum similarity (as a percentage) for a pair of files to be
1205/// reported as a rename or copy. Matches `git`'s built-in `-M`/`-C` threshold
1206/// of 50% (`DEFAULT_RENAME_SCORE` is `MAX_SCORE / 2`).
1207pub const DEFAULT_RENAME_THRESHOLD: u8 = 50;
1208
1209/// Options controlling inexact (similarity-based) rename and copy detection,
1210/// layered additively on top of [`DiffNameStatusOptions`].
1211///
1212/// This is a separate struct rather than new fields on [`DiffNameStatusOptions`]
1213/// so that existing callers — which build `DiffNameStatusOptions` with a struct
1214/// literal — keep compiling unchanged. Code that wants inexact detection uses
1215/// the `*_with_rename_options` entry points and this type instead.
1216///
1217/// [`Default`] preserves the existing behaviour exactly: `detect_inexact` is
1218/// `false`, so unless a caller opts in, only exact-OID rename/copy detection
1219/// runs (identical to the plain `*_with_options` functions). When
1220/// `detect_inexact` is enabled, files added on one side are paired with the most
1221/// similar deleted/modified file on the other side whose similarity meets the
1222/// relevant threshold; exact-OID matches still take priority and are always
1223/// scored 100.
1224#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1225pub struct RenameDetectionOptions {
1226    /// The base name-status options (rename/copy enable flags, find-copies-harder,
1227    /// rename-empty). Exact detection honours these exactly as before.
1228    pub base: DiffNameStatusOptions,
1229    /// Enable inexact (content-similarity) detection. When `false`, only exact
1230    /// OID matches are detected, matching the legacy `*_with_options` behaviour.
1231    pub detect_inexact: bool,
1232    /// Minimum similarity percentage (`0..=100`) for an inexact *rename*. Pairs
1233    /// scoring below this are not reported as renames. Defaults to
1234    /// [`DEFAULT_RENAME_THRESHOLD`].
1235    pub rename_threshold: u8,
1236    /// Minimum similarity percentage (`0..=100`) for an inexact *copy*. Defaults
1237    /// to [`DEFAULT_RENAME_THRESHOLD`]; git uses the same default for `-C` as for
1238    /// `-M` unless `-C<n>` overrides it.
1239    pub copy_threshold: u8,
1240}
1241
1242impl Default for RenameDetectionOptions {
1243    fn default() -> Self {
1244        Self {
1245            base: DiffNameStatusOptions::default(),
1246            detect_inexact: false,
1247            rename_threshold: DEFAULT_RENAME_THRESHOLD,
1248            copy_threshold: DEFAULT_RENAME_THRESHOLD,
1249        }
1250    }
1251}
1252
1253impl RenameDetectionOptions {
1254    /// Build inexact-enabled options from a base [`DiffNameStatusOptions`], using
1255    /// the default thresholds for both renames and copies.
1256    pub fn inexact(base: DiffNameStatusOptions) -> Self {
1257        Self {
1258            base,
1259            detect_inexact: true,
1260            ..Self::default()
1261        }
1262    }
1263}
1264
1265pub fn diff_name_status_head_worktree(
1266    worktree_root: impl AsRef<Path>,
1267    git_dir: impl AsRef<Path>,
1268    format: ObjectFormat,
1269) -> Result<Vec<NameStatusEntry>> {
1270    diff_name_status_head_worktree_with_options(
1271        worktree_root,
1272        git_dir,
1273        format,
1274        DiffNameStatusOptions::default(),
1275    )
1276}
1277
1278pub fn diff_name_status_head_worktree_with_options(
1279    worktree_root: impl AsRef<Path>,
1280    git_dir: impl AsRef<Path>,
1281    format: ObjectFormat,
1282    options: DiffNameStatusOptions,
1283) -> Result<Vec<NameStatusEntry>> {
1284    let worktree_root = worktree_root.as_ref();
1285    let git_dir = git_dir.as_ref();
1286    let db = FileObjectDatabase::from_git_dir(git_dir, format);
1287    let head = head_tree_entries(git_dir, format, &db)?;
1288    let IndexSnapshot {
1289        entries: index,
1290        stat_cache,
1291    } = read_index_snapshot(git_dir, format)?;
1292    let index_gitlinks = index_gitlinks(&index);
1293    let candidate_paths = candidate_path_set(head.keys().chain(index.keys()));
1294    let worktree = worktree_entries_for_path_set(
1295        worktree_root,
1296        format,
1297        &candidate_paths,
1298        &index_gitlinks,
1299        Some(&stat_cache),
1300    )?;
1301    let changes = diff_name_status_maps_for_path_set(&head, &worktree, &candidate_paths, options)?;
1302    Ok(mark_unstaged_worktree_oids_unresolved(
1303        changes, &index, &worktree,
1304    ))
1305}
1306
1307/// HEAD-vs-worktree name-status with full rename/copy options, including inexact
1308/// (similarity) detection when enabled. Worktree blob content is read directly
1309/// from the working tree; HEAD-side blobs come from the object database.
1310pub fn diff_name_status_head_worktree_with_rename_options(
1311    worktree_root: impl AsRef<Path>,
1312    git_dir: impl AsRef<Path>,
1313    format: ObjectFormat,
1314    options: RenameDetectionOptions,
1315) -> Result<Vec<NameStatusEntry>> {
1316    let worktree_root = worktree_root.as_ref();
1317    let git_dir = git_dir.as_ref();
1318    let db = FileObjectDatabase::from_git_dir(git_dir, format);
1319    let head = head_tree_entries(git_dir, format, &db)?;
1320    let IndexSnapshot {
1321        entries: index,
1322        stat_cache,
1323    } = read_index_snapshot(git_dir, format)?;
1324    let index_gitlinks = index_gitlinks(&index);
1325    let candidate_paths = candidate_path_set(head.keys().chain(index.keys()));
1326    let worktree = worktree_entries_for_path_set(
1327        worktree_root,
1328        format,
1329        &candidate_paths,
1330        &index_gitlinks,
1331        Some(&stat_cache),
1332    )?;
1333    let cache = worktree_blob_cache_for_path_set(
1334        worktree_root,
1335        &head,
1336        &worktree,
1337        &candidate_paths,
1338        options,
1339    )?;
1340    let changes = diff_name_status_maps_with_renames_for_path_set(
1341        &head,
1342        &worktree,
1343        &candidate_paths,
1344        options,
1345        |oid| cache_or_odb_blob(&cache, &db, oid),
1346    )?;
1347    Ok(mark_unstaged_worktree_oids_unresolved(
1348        changes, &index, &worktree,
1349    ))
1350}
1351
1352pub fn diff_name_status_head_index(
1353    git_dir: impl AsRef<Path>,
1354    format: ObjectFormat,
1355) -> Result<Vec<NameStatusEntry>> {
1356    diff_name_status_head_index_with_options(git_dir, format, DiffNameStatusOptions::default())
1357}
1358
1359pub fn diff_name_status_head_index_with_options(
1360    git_dir: impl AsRef<Path>,
1361    format: ObjectFormat,
1362    options: DiffNameStatusOptions,
1363) -> Result<Vec<NameStatusEntry>> {
1364    let git_dir = git_dir.as_ref();
1365    let db = FileObjectDatabase::from_git_dir(git_dir, format);
1366    let head = head_tree_entries(git_dir, format, &db)?;
1367    let index = read_index_entries(git_dir, format)?;
1368    diff_name_status_maps(&head, &index, head.keys().chain(index.keys()), options)
1369}
1370
1371/// HEAD-vs-index name-status with full rename/copy options, including inexact
1372/// (similarity) detection when enabled. All blob content (both sides) comes from
1373/// the object database.
1374pub fn diff_name_status_head_index_with_rename_options(
1375    git_dir: impl AsRef<Path>,
1376    format: ObjectFormat,
1377    options: RenameDetectionOptions,
1378) -> Result<Vec<NameStatusEntry>> {
1379    let git_dir = git_dir.as_ref();
1380    let db = FileObjectDatabase::from_git_dir(git_dir, format);
1381    let head = head_tree_entries(git_dir, format, &db)?;
1382    let index = read_index_entries(git_dir, format)?;
1383    diff_name_status_maps_with_renames(
1384        &head,
1385        &index,
1386        head.keys().chain(index.keys()),
1387        options,
1388        |oid| read_blob_bytes(&db, oid),
1389    )
1390}
1391
1392/// Read an arbitrary tree object's flattened blob entries (recursively) keyed by
1393/// repository-relative path. This is the tree-side counterpart used by
1394/// `git diff-index <tree-ish>`: unlike [`head_tree_entries`] it does not consult
1395/// `HEAD`, so any commit/tag (peeled to a tree) or tree oid can be compared.
1396///
1397/// The canonical empty tree (`git hash-object -t tree /dev/null`) is treated as
1398/// always present and yields no entries, even when the object was never written
1399/// to the database. git makes the same guarantee, which keeps the common idiom
1400/// `git diff-index --cached <empty-tree-sha>` working in a fresh repository.
1401fn tree_entries(
1402    tree_oid: &ObjectId,
1403    format: ObjectFormat,
1404    db: &FileObjectDatabase,
1405) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
1406    let mut entries = BTreeMap::new();
1407    if *tree_oid == empty_tree_oid(format)? {
1408        return Ok(entries);
1409    }
1410    collect_tree_entries(db, format, tree_oid, Vec::new(), &mut entries)?;
1411    Ok(entries)
1412}
1413
1414/// The well-known oid of the empty tree for `format` (the hash of a zero-length
1415/// tree object). git hard-codes this value and treats it as always existing.
1416fn empty_tree_oid(format: ObjectFormat) -> Result<ObjectId> {
1417    object_id_for_bytes(format, "tree", b"")
1418}
1419
1420/// Name-status diff of an arbitrary tree against the index, the engine behind
1421/// `git diff-index --cached <tree-ish>`. Exact rename/copy detection follows
1422/// `options`; all blob content comes from the object database.
1423pub fn diff_name_status_tree_index_with_options(
1424    git_dir: impl AsRef<Path>,
1425    format: ObjectFormat,
1426    tree_oid: &ObjectId,
1427    options: DiffNameStatusOptions,
1428) -> Result<Vec<NameStatusEntry>> {
1429    let git_dir = git_dir.as_ref();
1430    let db = FileObjectDatabase::from_git_dir(git_dir, format);
1431    let tree = tree_entries(tree_oid, format, &db)?;
1432    let index = read_index_entries(git_dir, format)?;
1433    diff_name_status_maps(&tree, &index, tree.keys().chain(index.keys()), options)
1434}
1435
1436/// Tree-vs-index name-status with full rename/copy options, including inexact
1437/// (similarity) detection when enabled. Both sides read blob content from the
1438/// object database. Counterpart of
1439/// [`diff_name_status_head_index_with_rename_options`] for an arbitrary tree.
1440pub fn diff_name_status_tree_index_with_rename_options(
1441    git_dir: impl AsRef<Path>,
1442    format: ObjectFormat,
1443    tree_oid: &ObjectId,
1444    options: RenameDetectionOptions,
1445) -> Result<Vec<NameStatusEntry>> {
1446    let git_dir = git_dir.as_ref();
1447    let db = FileObjectDatabase::from_git_dir(git_dir, format);
1448    let tree = tree_entries(tree_oid, format, &db)?;
1449    let index = read_index_entries(git_dir, format)?;
1450    diff_name_status_maps_with_renames(
1451        &tree,
1452        &index,
1453        tree.keys().chain(index.keys()),
1454        options,
1455        |oid| read_blob_bytes(&db, oid),
1456    )
1457}
1458
1459/// Name-status diff of an arbitrary tree against the working tree, the engine
1460/// behind plain `git diff-index <tree-ish>` (no `--cached`). New-side oids for
1461/// paths whose worktree contents differ from the index are cleared (rendered as
1462/// zeros), matching git, which only reports the worktree blob oid when it is
1463/// known-clean against the index.
1464pub fn diff_name_status_tree_worktree_with_options(
1465    worktree_root: impl AsRef<Path>,
1466    git_dir: impl AsRef<Path>,
1467    format: ObjectFormat,
1468    tree_oid: &ObjectId,
1469    options: DiffNameStatusOptions,
1470) -> Result<Vec<NameStatusEntry>> {
1471    let worktree_root = worktree_root.as_ref();
1472    let git_dir = git_dir.as_ref();
1473    let db = FileObjectDatabase::from_git_dir(git_dir, format);
1474    let tree = tree_entries(tree_oid, format, &db)?;
1475    let IndexSnapshot {
1476        entries: index,
1477        stat_cache,
1478    } = read_index_snapshot(git_dir, format)?;
1479    let index_gitlinks = index_gitlinks(&index);
1480    let candidate_paths = candidate_path_set(tree.keys().chain(index.keys()));
1481    let worktree = worktree_entries_for_path_set(
1482        worktree_root,
1483        format,
1484        &candidate_paths,
1485        &index_gitlinks,
1486        Some(&stat_cache),
1487    )?;
1488    let changes = diff_name_status_maps_for_path_set(&tree, &worktree, &candidate_paths, options)?;
1489    Ok(mark_unstaged_worktree_oids_unresolved(
1490        changes, &index, &worktree,
1491    ))
1492}
1493
1494/// Tree-vs-worktree name-status with full rename/copy options, including inexact
1495/// (similarity) detection when enabled. Worktree blob content is read directly
1496/// from the working tree (via an oid-keyed cache); tree-side blobs come from the
1497/// object database. As with [`diff_name_status_tree_worktree_with_options`],
1498/// new-side oids for paths that differ from the index are cleared.
1499pub fn diff_name_status_tree_worktree_with_rename_options(
1500    worktree_root: impl AsRef<Path>,
1501    git_dir: impl AsRef<Path>,
1502    format: ObjectFormat,
1503    tree_oid: &ObjectId,
1504    options: RenameDetectionOptions,
1505) -> Result<Vec<NameStatusEntry>> {
1506    let worktree_root = worktree_root.as_ref();
1507    let git_dir = git_dir.as_ref();
1508    let db = FileObjectDatabase::from_git_dir(git_dir, format);
1509    let tree = tree_entries(tree_oid, format, &db)?;
1510    let IndexSnapshot {
1511        entries: index,
1512        stat_cache,
1513    } = read_index_snapshot(git_dir, format)?;
1514    let index_gitlinks = index_gitlinks(&index);
1515    let candidate_paths = candidate_path_set(tree.keys().chain(index.keys()));
1516    let worktree = worktree_entries_for_path_set(
1517        worktree_root,
1518        format,
1519        &candidate_paths,
1520        &index_gitlinks,
1521        Some(&stat_cache),
1522    )?;
1523    let cache = worktree_blob_cache_for_path_set(
1524        worktree_root,
1525        &tree,
1526        &worktree,
1527        &candidate_paths,
1528        options,
1529    )?;
1530    let changes = diff_name_status_maps_with_renames_for_path_set(
1531        &tree,
1532        &worktree,
1533        &candidate_paths,
1534        options,
1535        |oid| cache_or_odb_blob(&cache, &db, oid),
1536    )?;
1537    Ok(mark_unstaged_worktree_oids_unresolved(
1538        changes, &index, &worktree,
1539    ))
1540}
1541
1542pub fn diff_name_status_index_worktree(
1543    worktree_root: impl AsRef<Path>,
1544    git_dir: impl AsRef<Path>,
1545    format: ObjectFormat,
1546) -> Result<Vec<NameStatusEntry>> {
1547    diff_name_status_index_worktree_with_options(
1548        worktree_root,
1549        git_dir,
1550        format,
1551        DiffNameStatusOptions::default(),
1552    )
1553}
1554
1555pub fn diff_name_status_index_worktree_with_options(
1556    worktree_root: impl AsRef<Path>,
1557    git_dir: impl AsRef<Path>,
1558    format: ObjectFormat,
1559    options: DiffNameStatusOptions,
1560) -> Result<Vec<NameStatusEntry>> {
1561    Ok(diff_name_status_index_worktree_with_options_and_gitlinks(
1562        worktree_root,
1563        git_dir,
1564        format,
1565        options,
1566    )?
1567    .entries)
1568}
1569
1570pub fn diff_name_status_index_worktree_with_options_and_gitlinks(
1571    worktree_root: impl AsRef<Path>,
1572    git_dir: impl AsRef<Path>,
1573    format: ObjectFormat,
1574    options: DiffNameStatusOptions,
1575) -> Result<IndexWorktreeDiff> {
1576    let IndexWorktreeDiff {
1577        entries,
1578        staged_gitlinks,
1579    } = diff_name_status_index_worktree_changes(worktree_root.as_ref(), git_dir.as_ref(), format)?;
1580    let entries = apply_name_status_options_to_index_worktree_changes(entries, options)?;
1581    Ok(IndexWorktreeDiff {
1582        entries,
1583        staged_gitlinks,
1584    })
1585}
1586
1587/// Index-vs-worktree name-status with full rename/copy options, including inexact
1588/// (similarity) detection when enabled. Worktree blob content is read directly
1589/// from the working tree; index-side blobs come from the object database.
1590pub fn diff_name_status_index_worktree_with_rename_options(
1591    worktree_root: impl AsRef<Path>,
1592    git_dir: impl AsRef<Path>,
1593    format: ObjectFormat,
1594    options: RenameDetectionOptions,
1595) -> Result<Vec<NameStatusEntry>> {
1596    Ok(
1597        diff_name_status_index_worktree_with_rename_options_and_gitlinks(
1598            worktree_root,
1599            git_dir,
1600            format,
1601            options,
1602        )?
1603        .entries,
1604    )
1605}
1606
1607pub fn diff_name_status_index_worktree_with_rename_options_and_gitlinks(
1608    worktree_root: impl AsRef<Path>,
1609    git_dir: impl AsRef<Path>,
1610    format: ObjectFormat,
1611    options: RenameDetectionOptions,
1612) -> Result<IndexWorktreeDiff> {
1613    let IndexWorktreeDiff {
1614        entries,
1615        staged_gitlinks,
1616    } = diff_name_status_index_worktree_changes(worktree_root.as_ref(), git_dir.as_ref(), format)?;
1617    // Index-vs-worktree diffs only consider tracked index paths; untracked
1618    // worktree files are not additions, so rename/copy detection has no add
1619    // destinations to pair. Apply the base options for completeness.
1620    let entries = apply_name_status_options_to_index_worktree_changes(entries, options.base)?;
1621    Ok(IndexWorktreeDiff {
1622        entries,
1623        staged_gitlinks,
1624    })
1625}
1626
1627fn diff_name_status_index_worktree_changes(
1628    worktree_root: &Path,
1629    git_dir: &Path,
1630    format: ObjectFormat,
1631) -> Result<IndexWorktreeDiff> {
1632    let index_path = sley_index::repository_index_path(git_dir);
1633    let index_metadata = match fs::metadata(&index_path) {
1634        Ok(metadata) => metadata,
1635        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
1636            return Ok(IndexWorktreeDiff {
1637                entries: Vec::new(),
1638                staged_gitlinks: Vec::new(),
1639            });
1640        }
1641        Err(err) => return Err(err.into()),
1642    };
1643    let index_bytes = fs::read(&index_path)?;
1644    if let Ok(index) = BorrowedIndex::parse(&index_bytes, format) {
1645        let (has_non_normal_stage, staged_gitlinks) =
1646            index_worktree_metadata_for_entries(&index.entries);
1647        if has_non_normal_stage {
1648            return diff_name_status_index_worktree_changes_from_snapshot(
1649                worktree_root,
1650                git_dir,
1651                format,
1652            );
1653        }
1654        let stat_cache =
1655            IndexStatCache::from_index_mtime_only(sley_index::file_mtime_parts(&index_metadata));
1656        let entries = diff_name_status_index_worktree_changes_for_borrowed_entries(
1657            worktree_root,
1658            format,
1659            &index.entries,
1660            &stat_cache,
1661        )?;
1662        return Ok(IndexWorktreeDiff {
1663            entries,
1664            staged_gitlinks,
1665        });
1666    }
1667    let index = Index::parse(&index_bytes, format)?;
1668    let (has_non_normal_stage, staged_gitlinks) =
1669        index_worktree_metadata_for_entries(&index.entries);
1670    if has_non_normal_stage {
1671        return diff_name_status_index_worktree_changes_from_snapshot(
1672            worktree_root,
1673            git_dir,
1674            format,
1675        );
1676    }
1677    let stat_cache =
1678        IndexStatCache::from_index_mtime_only(sley_index::file_mtime_parts(&index_metadata));
1679    let entries = diff_name_status_index_worktree_changes_for_entries(
1680        worktree_root,
1681        format,
1682        &index.entries,
1683        &stat_cache,
1684    )?;
1685    Ok(IndexWorktreeDiff {
1686        entries,
1687        staged_gitlinks,
1688    })
1689}
1690
1691fn diff_name_status_index_worktree_changes_for_borrowed_entries(
1692    worktree_root: &Path,
1693    format: ObjectFormat,
1694    entries: &[sley_index::IndexEntryRef<'_>],
1695    stat_cache: &IndexStatCache,
1696) -> Result<Vec<NameStatusEntry>> {
1697    const PARALLEL_SCAN_MIN_ENTRIES: usize = 2048;
1698    let workers = std::thread::available_parallelism()
1699        .map(|count| count.get())
1700        .unwrap_or(1)
1701        .min(8);
1702    if workers <= 1 || entries.len() < PARALLEL_SCAN_MIN_ENTRIES {
1703        return diff_name_status_index_worktree_changes_for_borrowed_entry_chunk(
1704            worktree_root,
1705            format,
1706            entries,
1707            stat_cache,
1708        );
1709    }
1710    let chunk_size = entries.len().div_ceil(workers);
1711    std::thread::scope(|scope| {
1712        let mut handles = Vec::new();
1713        for chunk in entries.chunks(chunk_size) {
1714            handles.push(scope.spawn(move || {
1715                diff_name_status_index_worktree_changes_for_borrowed_entry_chunk(
1716                    worktree_root,
1717                    format,
1718                    chunk,
1719                    stat_cache,
1720                )
1721            }));
1722        }
1723        let mut changes = Vec::new();
1724        for handle in handles {
1725            let chunk_changes = handle
1726                .join()
1727                .map_err(|_| GitError::Command("diff worker panicked".into()))??;
1728            changes.extend(chunk_changes);
1729        }
1730        Ok(changes)
1731    })
1732}
1733
1734fn diff_name_status_index_worktree_changes_for_entries(
1735    worktree_root: &Path,
1736    format: ObjectFormat,
1737    entries: &[sley_index::IndexEntry],
1738    stat_cache: &IndexStatCache,
1739) -> Result<Vec<NameStatusEntry>> {
1740    const PARALLEL_SCAN_MIN_ENTRIES: usize = 2048;
1741    let workers = std::thread::available_parallelism()
1742        .map(|count| count.get())
1743        .unwrap_or(1)
1744        .min(8);
1745    if workers <= 1 || entries.len() < PARALLEL_SCAN_MIN_ENTRIES {
1746        return diff_name_status_index_worktree_changes_for_entry_chunk(
1747            worktree_root,
1748            format,
1749            entries,
1750            stat_cache,
1751        );
1752    }
1753    let chunk_size = entries.len().div_ceil(workers);
1754    std::thread::scope(|scope| {
1755        let mut handles = Vec::new();
1756        for chunk in entries.chunks(chunk_size) {
1757            handles.push(scope.spawn(move || {
1758                diff_name_status_index_worktree_changes_for_entry_chunk(
1759                    worktree_root,
1760                    format,
1761                    chunk,
1762                    stat_cache,
1763                )
1764            }));
1765        }
1766        let mut changes = Vec::new();
1767        for handle in handles {
1768            let chunk_changes = handle
1769                .join()
1770                .map_err(|_| GitError::Command("diff worker panicked".into()))??;
1771            changes.extend(chunk_changes);
1772        }
1773        Ok(changes)
1774    })
1775}
1776
1777fn diff_name_status_index_worktree_changes_for_entry_chunk(
1778    worktree_root: &Path,
1779    format: ObjectFormat,
1780    entries: &[sley_index::IndexEntry],
1781    stat_cache: &IndexStatCache,
1782) -> Result<Vec<NameStatusEntry>> {
1783    let mut changes = Vec::new();
1784    let mut path = PathBuf::from(worktree_root);
1785    for entry in entries {
1786        worktree_path_for_repo_path_into(&mut path, worktree_root, entry.path.as_bytes());
1787        if let Some(change) = index_worktree_change_for_entry(&path, format, entry, &stat_cache)? {
1788            changes.push(change);
1789        }
1790    }
1791    Ok(changes)
1792}
1793
1794fn diff_name_status_index_worktree_changes_for_borrowed_entry_chunk(
1795    worktree_root: &Path,
1796    format: ObjectFormat,
1797    entries: &[sley_index::IndexEntryRef<'_>],
1798    stat_cache: &IndexStatCache,
1799) -> Result<Vec<NameStatusEntry>> {
1800    let mut changes = Vec::new();
1801    let mut path = PathBuf::from(worktree_root);
1802    for entry in entries {
1803        worktree_path_for_repo_path_into(&mut path, worktree_root, entry.path);
1804        if let Some(change) = index_worktree_change_for_entry(&path, format, entry, &stat_cache)? {
1805            changes.push(change);
1806        }
1807    }
1808    Ok(changes)
1809}
1810
1811fn index_worktree_metadata_for_entries(
1812    entries: &[impl WorktreeIndexEntry],
1813) -> (bool, Vec<IndexGitlinkEntry>) {
1814    let mut has_non_normal_stage = false;
1815    let mut staged_gitlinks = Vec::new();
1816    for entry in entries {
1817        if entry.stage() != sley_index::Stage::Normal {
1818            has_non_normal_stage = true;
1819        }
1820        if entry.mode() == 0o160000 {
1821            staged_gitlinks.push(IndexGitlinkEntry {
1822                path: BString::from_bytes(entry.git_path()),
1823                oid: entry.oid(),
1824            });
1825        }
1826    }
1827    (has_non_normal_stage, staged_gitlinks)
1828}
1829
1830fn diff_name_status_index_worktree_changes_from_snapshot(
1831    worktree_root: &Path,
1832    git_dir: &Path,
1833    format: ObjectFormat,
1834) -> Result<IndexWorktreeDiff> {
1835    let IndexSnapshot {
1836        entries: index,
1837        stat_cache,
1838    } = read_index_snapshot(git_dir, format)?;
1839    let index_gitlinks = index_gitlinks(&index);
1840    let staged_gitlinks = index_gitlinks
1841        .iter()
1842        .map(|(path, oid)| IndexGitlinkEntry {
1843            path: BString::from_bytes(path),
1844            oid: *oid,
1845        })
1846        .collect();
1847    let mut changes = Vec::new();
1848    for (git_path, left) in &index {
1849        let right = worktree_entry_for_path(
1850            worktree_root,
1851            format,
1852            git_path,
1853            &index_gitlinks,
1854            Some(&stat_cache),
1855        )?;
1856        let Some(right) = right else {
1857            changes.push(NameStatusEntry {
1858                status: NameStatus::Deleted,
1859                path: git_path.clone().into(),
1860                old_path: None,
1861                old_mode: Some(left.mode),
1862                new_mode: None,
1863                old_oid: Some(left.oid),
1864                new_oid: None,
1865            });
1866            continue;
1867        };
1868        if right != *left {
1869            changes.push(NameStatusEntry {
1870                status: NameStatus::Modified,
1871                path: git_path.clone().into(),
1872                old_path: None,
1873                old_mode: Some(left.mode),
1874                new_mode: Some(right.mode),
1875                old_oid: Some(left.oid),
1876                new_oid: Some(right.oid),
1877            });
1878        }
1879    }
1880    Ok(IndexWorktreeDiff {
1881        entries: changes,
1882        staged_gitlinks,
1883    })
1884}
1885
1886fn apply_name_status_options_to_index_worktree_changes(
1887    mut changes: Vec<NameStatusEntry>,
1888    options: DiffNameStatusOptions,
1889) -> Result<Vec<NameStatusEntry>> {
1890    if options.detect_renames || options.detect_copies {
1891        changes.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
1892    }
1893    Ok(changes)
1894}
1895
1896/// Index-vs-worktree name-status for **`git diff-files`** (plumbing), which
1897/// selects changed paths by the cached *stat* rather than by content.
1898///
1899/// This is the crucial difference from [`diff_name_status_index_worktree_with_options`]
1900/// (the engine behind porcelain `git diff`): porcelain `git diff` refreshes the
1901/// index first, so a stat-dirty-but-content-identical entry (a `touch`ed file, or
1902/// a freshly `rm --cached`-then-`reset --no-refresh` entry with a zeroed cached
1903/// stat) is re-stamped clean and suppressed. `git diff-files` does **not** refresh
1904/// — it reports every entry whose cached stat fails to prove it clean as `M`,
1905/// without re-hashing the content to "rescue" it (`builtin/diff.c` →
1906/// `run_diff_files` → `ie_match_stat`). The raw / name-only / name-status output
1907/// and the `--quiet`/`--exit-code` status therefore list such entries even when
1908/// the content is byte-identical; patch/stat output, which diffs actual content,
1909/// renders them as an empty hunk.
1910///
1911/// We layer that stat-based selection on top of the content-based diff: the
1912/// content diff already catches adds/deletes/genuine-content modifies (with
1913/// rename detection), and we then append a `Modified` entry for any stage-0 path
1914/// whose worktree file is present and whose cached stat is dirty per
1915/// [`IndexStatCache::index_entry_worktree_stat_dirty`] but which the content diff
1916/// did not already report. Content-identical stat-dirty entries cannot be rename
1917/// sources/targets (their content is unchanged), so they never interact with the
1918/// rename machinery — they are plain `M`.
1919pub fn diff_name_status_index_worktree_for_diff_files_with_options(
1920    worktree_root: impl AsRef<Path>,
1921    git_dir: impl AsRef<Path>,
1922    format: ObjectFormat,
1923    options: DiffNameStatusOptions,
1924) -> Result<Vec<NameStatusEntry>> {
1925    let worktree_root = worktree_root.as_ref();
1926    let git_dir = git_dir.as_ref();
1927    let changes =
1928        diff_name_status_index_worktree_with_options(worktree_root, git_dir, format, options)?;
1929    augment_with_stat_dirty_entries(worktree_root, git_dir, format, changes)
1930}
1931
1932/// As [`diff_name_status_index_worktree_for_diff_files_with_options`], but with
1933/// full rename/copy options (the `git diff-files -M/-C` path). The stat-dirty
1934/// augmentation is identical; only the underlying content diff differs.
1935pub fn diff_name_status_index_worktree_for_diff_files_with_rename_options(
1936    worktree_root: impl AsRef<Path>,
1937    git_dir: impl AsRef<Path>,
1938    format: ObjectFormat,
1939    options: RenameDetectionOptions,
1940) -> Result<Vec<NameStatusEntry>> {
1941    let worktree_root = worktree_root.as_ref();
1942    let git_dir = git_dir.as_ref();
1943    let changes = diff_name_status_index_worktree_with_rename_options(
1944        worktree_root,
1945        git_dir,
1946        format,
1947        options,
1948    )?;
1949    augment_with_stat_dirty_entries(worktree_root, git_dir, format, changes)
1950}
1951
1952/// Append a `Modified` entry for every stage-0 index path whose worktree file is
1953/// present and whose cached stat is dirty (`ce_match_stat` "changed") but which
1954/// `content_changes` did not already report. The result is re-sorted by path so
1955/// the merged set keeps git's diff-queue ordering. New-side oids on the added
1956/// entries are left `None` (rendered as zeros in raw output), matching git, which
1957/// reports the worktree blob oid only for entries it has hashed.
1958fn augment_with_stat_dirty_entries(
1959    worktree_root: &Path,
1960    git_dir: &Path,
1961    format: ObjectFormat,
1962    mut content_changes: Vec<NameStatusEntry>,
1963) -> Result<Vec<NameStatusEntry>> {
1964    let IndexSnapshot {
1965        entries: index,
1966        stat_cache,
1967    } = read_index_snapshot(git_dir, format)?;
1968    // Paths the content diff already accounts for (by new-side path, the position
1969    // git queues a pair at — a rename's destination, a modify/add/delete's path).
1970    let already_reported: BTreeSet<&[u8]> = content_changes
1971        .iter()
1972        .map(|entry| entry.path.as_bytes())
1973        .collect();
1974    let mut extras = Vec::new();
1975    for (git_path, tracked) in &index {
1976        if already_reported.contains(git_path.as_slice()) {
1977            continue;
1978        }
1979        let Some(cached) = stat_cache.entry_for_git_path(git_path) else {
1980            continue;
1981        };
1982        // Gitlinks (submodules) have their own dirtiness model and are not stat-
1983        // compared here; the content diff already handles changed gitlink oids.
1984        if tracked.mode == 0o160000 {
1985            continue;
1986        }
1987        let path = worktree_path_for_repo_path(worktree_root, git_path);
1988        let Ok(metadata) = fs::symlink_metadata(&path) else {
1989            // A missing worktree file is a deletion, which the content diff
1990            // already reports; nothing to add here.
1991            continue;
1992        };
1993        if !(metadata.is_file() || metadata.file_type().is_symlink()) {
1994            continue;
1995        }
1996        match stat_cache.index_entry_worktree_stat_verdict(cached, &metadata) {
1997            sley_index::StatVerdict::Clean => continue,
1998            sley_index::StatVerdict::Dirty => {}
1999            // A racily-clean entry must be resolved by content: git re-hashes it
2000            // (`ce_compare_data`) and only reports `M` when the worktree bytes
2001            // actually differ from the cached oid — so a `touch`ed-then-re-`add`ed
2002            // file (same-second mtime as the index) stays clean.
2003            sley_index::StatVerdict::RacyNeedsContentCheck => {
2004                if worktree_oid_matches_index(worktree_root, git_path, &metadata, tracked, format)? {
2005                    continue;
2006                }
2007            }
2008        }
2009        extras.push(NameStatusEntry {
2010            status: NameStatus::Modified,
2011            path: git_path.clone().into(),
2012            old_path: None,
2013            old_mode: Some(tracked.mode),
2014            new_mode: Some(tracked.mode),
2015            old_oid: Some(tracked.oid),
2016            new_oid: None,
2017        });
2018    }
2019    if !extras.is_empty() {
2020        content_changes.extend(extras);
2021        content_changes
2022            .sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
2023    }
2024    Ok(content_changes)
2025}
2026
2027/// Whether the worktree file at `git_path` hashes to the index entry's oid (mode
2028/// included). Used to resolve a racily-clean `diff-files` entry: git re-hashes the
2029/// content and only reports it changed when the bytes truly differ. Mirrors the
2030/// worktree-oid computation in [`worktree_entry_for_path`].
2031fn worktree_oid_matches_index(
2032    worktree_root: &Path,
2033    git_path: &[u8],
2034    metadata: &fs::Metadata,
2035    index_entry: &TrackedEntry,
2036    format: ObjectFormat,
2037) -> Result<bool> {
2038    let file_type = metadata.file_type();
2039    let path = worktree_path_for_repo_path(worktree_root, git_path);
2040    let body = if file_type.is_symlink() {
2041        symlink_target_bytes(&path)?
2042    } else {
2043        fs::read(&path)?
2044    };
2045    let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
2046    let mode = if file_type.is_symlink() {
2047        0o120000
2048    } else {
2049        file_mode(metadata)
2050    };
2051    Ok(oid == index_entry.oid && mode == index_entry.mode)
2052}
2053
2054pub fn diff_name_status_trees_with_options(
2055    db: &FileObjectDatabase,
2056    format: ObjectFormat,
2057    left_tree: &ObjectId,
2058    right_tree: &ObjectId,
2059    options: DiffNameStatusOptions,
2060) -> Result<Vec<NameStatusEntry>> {
2061    // `--find-copies-harder` may pair an *unchanged* left-side file as a copy
2062    // source, so it needs the complete left map; every other mode only consults
2063    // changed paths, so the pruned simultaneous walk (which skips identical
2064    // subtrees) suffices and produces byte-identical output.
2065    let needs_full_maps = options.detect_copies && options.find_copies_harder;
2066    let (left_entries, right_entries) = if needs_full_maps {
2067        collect_full_tree_pair(db, format, left_tree, right_tree)?
2068    } else {
2069        changed_tree_entries(db, format, left_tree, right_tree)?
2070    };
2071    diff_name_status_maps(
2072        &left_entries,
2073        &right_entries,
2074        left_entries.keys().chain(right_entries.keys()),
2075        options,
2076    )
2077}
2078
2079pub fn diff_name_status_empty_tree_with_options(
2080    db: &FileObjectDatabase,
2081    format: ObjectFormat,
2082    right_tree: &ObjectId,
2083    options: DiffNameStatusOptions,
2084) -> Result<Vec<NameStatusEntry>> {
2085    let left_entries = BTreeMap::new();
2086    let mut right_entries = BTreeMap::new();
2087    collect_tree_entries(db, format, right_tree, Vec::new(), &mut right_entries)?;
2088    diff_name_status_maps(&left_entries, &right_entries, right_entries.keys(), options)
2089}
2090
2091/// Diff two trees with full rename/copy options, including inexact (similarity)
2092/// detection when [`RenameDetectionOptions::detect_inexact`] is set.
2093///
2094/// Blob bytes for similarity scoring are read from `db`. This is the inexact-
2095/// aware counterpart of [`diff_name_status_trees_with_options`]; passing
2096/// `RenameDetectionOptions::default()` (or `RenameDetectionOptions { base, ..
2097/// default }` with `detect_inexact: false`) reproduces the exact-only behaviour.
2098pub fn diff_name_status_trees_with_rename_options(
2099    db: &FileObjectDatabase,
2100    format: ObjectFormat,
2101    left_tree: &ObjectId,
2102    right_tree: &ObjectId,
2103    options: RenameDetectionOptions,
2104) -> Result<Vec<NameStatusEntry>> {
2105    // See `diff_name_status_trees_with_options`: only `--find-copies-harder`
2106    // needs unchanged left entries as copy sources; otherwise the pruned walk
2107    // (skipping identical subtrees) yields identical output far more cheaply.
2108    let needs_full_maps = options.base.detect_copies && options.base.find_copies_harder;
2109    let (left_entries, right_entries) = if needs_full_maps {
2110        collect_full_tree_pair(db, format, left_tree, right_tree)?
2111    } else {
2112        changed_tree_entries(db, format, left_tree, right_tree)?
2113    };
2114    diff_name_status_maps_with_renames(
2115        &left_entries,
2116        &right_entries,
2117        left_entries.keys().chain(right_entries.keys()),
2118        options,
2119        |oid| read_blob_bytes(db, oid),
2120    )
2121}
2122
2123/// Diff the empty tree against `right_tree` with full rename/copy options.
2124///
2125/// As with [`diff_name_status_trees_with_rename_options`], inexact detection is
2126/// gated on [`RenameDetectionOptions::detect_inexact`]; the left (empty) side
2127/// has no sources, so only copies among the right-side additions can match when
2128/// `find_copies_harder` is set.
2129pub fn diff_name_status_empty_tree_with_rename_options(
2130    db: &FileObjectDatabase,
2131    format: ObjectFormat,
2132    right_tree: &ObjectId,
2133    options: RenameDetectionOptions,
2134) -> Result<Vec<NameStatusEntry>> {
2135    let left_entries = BTreeMap::new();
2136    let mut right_entries = BTreeMap::new();
2137    collect_tree_entries(db, format, right_tree, Vec::new(), &mut right_entries)?;
2138    diff_name_status_maps_with_renames(
2139        &left_entries,
2140        &right_entries,
2141        right_entries.keys(),
2142        options,
2143        |oid| read_blob_bytes(db, oid),
2144    )
2145}
2146
2147/// Read a blob's raw bytes from the ODB, returning `None` if the object cannot
2148/// be read or is not a blob. Used as the similarity-scoring blob fetcher; a
2149/// missing object simply makes a candidate pair non-similar rather than failing
2150/// the whole diff.
2151fn read_blob_bytes(db: &FileObjectDatabase, oid: &ObjectId) -> Option<Vec<u8>> {
2152    match db.read_object(oid) {
2153        Ok(object) if object.object_type == ObjectType::Blob => Some(object.body.clone()),
2154        _ => None,
2155    }
2156}
2157
2158/// Build the raw per-path add/delete/modify change list (before any rename or
2159/// copy detection) from the two entry maps and the candidate path set.
2160fn raw_name_status_changes_for_unique_paths<'a>(
2161    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2162    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2163    paths: impl Iterator<Item = &'a Vec<u8>>,
2164) -> Vec<NameStatusEntry> {
2165    let mut changes = Vec::new();
2166    for path in paths {
2167        let left = left_entries.get(path);
2168        let right = right_entries.get(path);
2169        let status = match (left, right) {
2170            (None, Some(_)) => Some(NameStatus::Added),
2171            (Some(_), None) => Some(NameStatus::Deleted),
2172            (Some(left), Some(right)) if left != right => Some(NameStatus::Modified),
2173            _ => None,
2174        };
2175        if let Some(status) = status {
2176            changes.push(NameStatusEntry {
2177                status,
2178                path: path.clone().into(),
2179                old_path: None,
2180                old_mode: left.map(|entry| entry.mode),
2181                new_mode: right.map(|entry| entry.mode),
2182                old_oid: left.map(|entry| entry.oid),
2183                new_oid: right.map(|entry| entry.oid),
2184            });
2185        }
2186    }
2187    changes
2188}
2189
2190fn diff_name_status_maps<'a>(
2191    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2192    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2193    candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
2194    options: DiffNameStatusOptions,
2195) -> Result<Vec<NameStatusEntry>> {
2196    let paths = candidate_path_set(candidate_paths);
2197    diff_name_status_maps_for_path_set(left_entries, right_entries, &paths, options)
2198}
2199
2200fn diff_name_status_maps_for_path_set(
2201    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2202    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2203    candidate_paths: &BTreeSet<Vec<u8>>,
2204    options: DiffNameStatusOptions,
2205) -> Result<Vec<NameStatusEntry>> {
2206    diff_name_status_maps_for_unique_paths(
2207        left_entries,
2208        right_entries,
2209        candidate_paths.iter(),
2210        options,
2211    )
2212}
2213
2214fn diff_name_status_maps_for_unique_paths<'a>(
2215    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2216    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2217    candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
2218    options: DiffNameStatusOptions,
2219) -> Result<Vec<NameStatusEntry>> {
2220    let mut changes =
2221        raw_name_status_changes_for_unique_paths(left_entries, right_entries, candidate_paths);
2222    if options.detect_renames {
2223        changes = detect_exact_renames(changes, left_entries, right_entries, options.rename_empty);
2224    }
2225    if options.detect_copies {
2226        changes = detect_exact_copies(
2227            changes,
2228            left_entries,
2229            right_entries,
2230            options.find_copies_harder,
2231            options.rename_empty,
2232        );
2233    }
2234    Ok(changes)
2235}
2236
2237/// Like [`diff_name_status_maps`], but additionally runs inexact (similarity)
2238/// rename/copy detection when `options.detect_inexact` is set.
2239///
2240/// `fetch_blob` resolves an [`ObjectId`] to that blob's raw bytes; it is only
2241/// consulted for the candidate pairs considered during inexact detection, and
2242/// only when inexact detection is enabled. A pair whose blob bytes cannot be
2243/// fetched is simply skipped (treated as not similar), so a missing object never
2244/// fails the whole diff.
2245fn diff_name_status_maps_with_renames<'a>(
2246    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2247    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2248    candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
2249    options: RenameDetectionOptions,
2250    fetch_blob: impl Fn(&ObjectId) -> Option<Vec<u8>>,
2251) -> Result<Vec<NameStatusEntry>> {
2252    let paths = candidate_path_set(candidate_paths);
2253    diff_name_status_maps_with_renames_for_path_set(
2254        left_entries,
2255        right_entries,
2256        &paths,
2257        options,
2258        fetch_blob,
2259    )
2260}
2261
2262fn diff_name_status_maps_with_renames_for_path_set(
2263    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2264    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2265    candidate_paths: &BTreeSet<Vec<u8>>,
2266    options: RenameDetectionOptions,
2267    fetch_blob: impl Fn(&ObjectId) -> Option<Vec<u8>>,
2268) -> Result<Vec<NameStatusEntry>> {
2269    diff_name_status_maps_with_renames_for_unique_paths(
2270        left_entries,
2271        right_entries,
2272        candidate_paths.iter(),
2273        options,
2274        fetch_blob,
2275    )
2276}
2277
2278fn diff_name_status_maps_with_renames_for_unique_paths<'a>(
2279    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2280    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2281    candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
2282    options: RenameDetectionOptions,
2283    fetch_blob: impl Fn(&ObjectId) -> Option<Vec<u8>>,
2284) -> Result<Vec<NameStatusEntry>> {
2285    let base = options.base;
2286    let mut changes =
2287        raw_name_status_changes_for_unique_paths(left_entries, right_entries, candidate_paths);
2288    if base.detect_renames {
2289        changes = detect_exact_renames(changes, left_entries, right_entries, base.rename_empty);
2290    }
2291    // Inexact rename detection runs after exact renames so exact matches keep
2292    // priority (and their score of 100). It only fires when rename detection is
2293    // enabled at all, mirroring git's `-M`.
2294    if base.detect_renames && options.detect_inexact {
2295        changes = detect_inexact_renames(changes, &options, &fetch_blob);
2296    }
2297    if base.detect_copies {
2298        changes = detect_exact_copies(
2299            changes,
2300            left_entries,
2301            right_entries,
2302            base.find_copies_harder,
2303            base.rename_empty,
2304        );
2305    }
2306    if base.detect_copies && options.detect_inexact {
2307        changes = detect_inexact_copies(changes, left_entries, &options, &fetch_blob);
2308    }
2309    Ok(changes)
2310}
2311
2312fn detect_exact_renames(
2313    changes: Vec<NameStatusEntry>,
2314    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2315    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2316    rename_empty: bool,
2317) -> Vec<NameStatusEntry> {
2318    let added = changes
2319        .iter()
2320        .enumerate()
2321        .filter(|(_, entry)| entry.status == NameStatus::Added)
2322        .map(|(idx, entry)| (idx, entry.path.clone()))
2323        .collect::<Vec<_>>();
2324    let deleted = changes
2325        .iter()
2326        .filter(|entry| entry.status == NameStatus::Deleted)
2327        .map(|entry| entry.path.clone())
2328        .collect::<Vec<_>>();
2329    let mut consumed = BTreeSet::new();
2330    let mut renamed_old_paths = BTreeSet::new();
2331    let mut result = Vec::new();
2332
2333    for old_path in deleted {
2334        let Some(left) = left_entries.get(old_path.as_bytes()) else {
2335            continue;
2336        };
2337        if let Some((idx, new_path)) = added.iter().find(|(idx, new_path)| {
2338            !consumed.contains(idx)
2339                && right_entries.get(new_path.as_bytes()).is_some_and(|right| {
2340                    right.oid == left.oid && (rename_empty || !is_empty_blob_oid(&left.oid))
2341                })
2342        }) {
2343            consumed.insert(*idx);
2344            renamed_old_paths.insert(old_path.clone());
2345            let right = right_entries.get(new_path.as_bytes());
2346            result.push(NameStatusEntry {
2347                status: NameStatus::Renamed(100),
2348                path: new_path.clone(),
2349                old_path: Some(old_path),
2350                old_mode: Some(left.mode),
2351                new_mode: right.map(|entry| entry.mode),
2352                old_oid: Some(left.oid),
2353                new_oid: right.map(|entry| entry.oid),
2354            });
2355        }
2356    }
2357
2358    for (idx, entry) in changes.into_iter().enumerate() {
2359        if entry.status == NameStatus::Added && consumed.contains(&idx) {
2360            continue;
2361        }
2362        if entry.status == NameStatus::Deleted && renamed_old_paths.contains(&entry.path) {
2363            continue;
2364        }
2365        result.push(entry);
2366    }
2367    result.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
2368    result
2369}
2370
2371fn detect_exact_copies(
2372    changes: Vec<NameStatusEntry>,
2373    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2374    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2375    find_copies_harder: bool,
2376    rename_empty: bool,
2377) -> Vec<NameStatusEntry> {
2378    let changed_sources = changes
2379        .iter()
2380        .filter(|entry| matches!(entry.status, NameStatus::Deleted | NameStatus::Modified))
2381        .map(|entry| entry.path.clone())
2382        .collect::<BTreeSet<_>>();
2383    let source_paths = left_entries
2384        .keys()
2385        .filter(|path| find_copies_harder || changed_sources.contains(path.as_slice()))
2386        .cloned()
2387        .collect::<Vec<_>>();
2388
2389    let mut result = Vec::new();
2390    for entry in changes {
2391        if entry.status != NameStatus::Added {
2392            result.push(entry);
2393            continue;
2394        }
2395        let Some(right) = right_entries.get(entry.path.as_bytes()) else {
2396            result.push(entry);
2397            continue;
2398        };
2399        if let Some(old_path) = source_paths.iter().find(|old_path| {
2400            old_path.as_slice() != entry.path.as_bytes()
2401                && left_entries.get(*old_path).is_some_and(|left| {
2402                    left.oid == right.oid && (rename_empty || !is_empty_blob_oid(&left.oid))
2403                })
2404        }) {
2405            result.push(NameStatusEntry {
2406                status: NameStatus::Copied(100),
2407                path: entry.path,
2408                old_path: Some(old_path.clone().into()),
2409                old_mode: left_entries
2410                    .get(old_path.as_slice())
2411                    .map(|entry| entry.mode),
2412                new_mode: entry.new_mode,
2413                old_oid: left_entries.get(old_path.as_slice()).map(|entry| entry.oid),
2414                new_oid: entry.new_oid,
2415            });
2416        } else {
2417            result.push(entry);
2418        }
2419    }
2420    result.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
2421    result
2422}
2423
2424/// Old-side metadata of a rename source, snapshotted before the source delete
2425/// entry is consumed so it can be attached to the renamed destination.
2426#[derive(Debug, Clone)]
2427struct RenameSourceMeta {
2428    path: BString,
2429    mode: Option<u32>,
2430    oid: Option<ObjectId>,
2431}
2432
2433/// A scored candidate pairing of a deleted source with an added destination,
2434/// used to order inexact-rename assignment best-match-first.
2435struct ScoredPair {
2436    /// Index into the `deleted` candidate list.
2437    src: usize,
2438    /// Index into the `added` candidate list.
2439    dst: usize,
2440    /// Similarity percentage in `0..=100`.
2441    score: u8,
2442}
2443
2444/// Inexact rename detection: pair still-unmatched deleted files with still-
2445/// unmatched added files by content similarity, replacing the best matches
2446/// (similarity >= `rename_threshold`) with [`NameStatus::Renamed`].
2447///
2448/// Exact renames have already run, so the only `Deleted`/`Added` entries left
2449/// here are ones with no identical-OID partner. Assignment is greedy by
2450/// descending score (then by source/destination order for determinism), and
2451/// each source and destination is used at most once — matching git's
2452/// `diffcore-rename` behaviour. Empty blobs are never used as a rename source
2453/// when `rename_empty` is false, mirroring exact detection.
2454fn detect_inexact_renames(
2455    changes: Vec<NameStatusEntry>,
2456    options: &RenameDetectionOptions,
2457    fetch_blob: &impl Fn(&ObjectId) -> Option<Vec<u8>>,
2458) -> Vec<NameStatusEntry> {
2459    let threshold = options.rename_threshold;
2460    // A threshold above 100 can never be met; nothing to do.
2461    if threshold > 100 {
2462        return changes;
2463    }
2464
2465    // Collect the candidate sources (Deletes) and destinations (Adds) with their
2466    // positions in `changes`, fetching blob bytes once each.
2467    let mut deleted: Vec<(usize, Vec<u8>)> = Vec::new();
2468    let mut added: Vec<(usize, Vec<u8>)> = Vec::new();
2469    for (idx, entry) in changes.iter().enumerate() {
2470        match entry.status {
2471            NameStatus::Deleted => {
2472                let Some(oid) = entry.old_oid.as_ref() else {
2473                    continue;
2474                };
2475                if !options.base.rename_empty && is_empty_blob_oid(oid) {
2476                    continue;
2477                }
2478                if let Some(bytes) = fetch_blob(oid) {
2479                    deleted.push((idx, bytes));
2480                }
2481            }
2482            NameStatus::Added => {
2483                let Some(oid) = entry.new_oid.as_ref() else {
2484                    continue;
2485                };
2486                if !options.base.rename_empty && is_empty_blob_oid(oid) {
2487                    continue;
2488                }
2489                if let Some(bytes) = fetch_blob(oid) {
2490                    added.push((idx, bytes));
2491                }
2492            }
2493            _ => {}
2494        }
2495    }
2496
2497    if deleted.is_empty() || added.is_empty() {
2498        return changes;
2499    }
2500
2501    // Score every (delete, add) pair; keep only those meeting the threshold.
2502    let mut pairs: Vec<ScoredPair> = Vec::new();
2503    for (si, (_, src_bytes)) in deleted.iter().enumerate() {
2504        for (di, (_, dst_bytes)) in added.iter().enumerate() {
2505            let score = blob_similarity(src_bytes, dst_bytes);
2506            if score >= threshold {
2507                pairs.push(ScoredPair {
2508                    src: si,
2509                    dst: di,
2510                    score,
2511                });
2512            }
2513        }
2514    }
2515    // Best score first; ties broken by source then destination order so the
2516    // result is deterministic regardless of input ordering.
2517    pairs.sort_by(|a, b| {
2518        b.score
2519            .cmp(&a.score)
2520            .then_with(|| a.src.cmp(&b.src))
2521            .then_with(|| a.dst.cmp(&b.dst))
2522    });
2523
2524    // Greedily assign each source/destination once.
2525    let mut src_used = vec![false; deleted.len()];
2526    let mut dst_used = vec![false; added.len()];
2527    // destination changes-index -> (source changes-index, score).
2528    let mut rename_of: BTreeMap<usize, (usize, u8)> = BTreeMap::new();
2529    for pair in pairs {
2530        if src_used[pair.src] || dst_used[pair.dst] {
2531            continue;
2532        }
2533        src_used[pair.src] = true;
2534        dst_used[pair.dst] = true;
2535        let src_change_idx = deleted[pair.src].0;
2536        let dst_change_idx = added[pair.dst].0;
2537        rename_of.insert(dst_change_idx, (src_change_idx, pair.score));
2538    }
2539
2540    if rename_of.is_empty() {
2541        return changes;
2542    }
2543
2544    // Snapshot the source (delete) entries' metadata before we consume them, so
2545    // each renamed destination can carry the correct old path/mode/oid.
2546    let consumed_sources: BTreeSet<usize> =
2547        rename_of.values().map(|(src_idx, _)| *src_idx).collect();
2548    let source_meta: BTreeMap<usize, RenameSourceMeta> = consumed_sources
2549        .iter()
2550        .map(|&src_idx| {
2551            let src = &changes[src_idx];
2552            (
2553                src_idx,
2554                RenameSourceMeta {
2555                    path: src.path.clone(),
2556                    mode: src.old_mode,
2557                    oid: src.old_oid,
2558                },
2559            )
2560        })
2561        .collect();
2562
2563    let mut result = Vec::with_capacity(changes.len());
2564    for (idx, entry) in changes.into_iter().enumerate() {
2565        if consumed_sources.contains(&idx) {
2566            // This delete became the source of a rename; drop it.
2567            continue;
2568        }
2569        if let Some((src_idx, score)) = rename_of.get(&idx) {
2570            // The destination becomes a rename from the matched source. Pull the
2571            // old-side metadata from the snapshot; the new-side metadata stays as
2572            // the destination's.
2573            let meta = source_meta
2574                .get(src_idx)
2575                .cloned()
2576                .unwrap_or(RenameSourceMeta {
2577                    path: BString::default(),
2578                    mode: None,
2579                    oid: None,
2580                });
2581            result.push(NameStatusEntry {
2582                status: NameStatus::Renamed(*score),
2583                path: entry.path,
2584                old_path: Some(meta.path),
2585                old_mode: meta.mode,
2586                new_mode: entry.new_mode,
2587                old_oid: meta.oid,
2588                new_oid: entry.new_oid,
2589            });
2590            continue;
2591        }
2592        result.push(entry);
2593    }
2594
2595    result.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
2596    result
2597}
2598
2599/// Inexact copy detection: for each still-`Added` file, find the most similar
2600/// candidate *source* on the left side (similarity >= `copy_threshold`) and, if
2601/// found, report it as a [`NameStatus::Copied`]. The source is not removed
2602/// (copies leave the original in place).
2603///
2604/// Candidate sources follow the same rule as exact copy detection: with
2605/// `find_copies_harder` every left-side path is eligible; otherwise only paths
2606/// that were themselves changed (deleted or modified) on this diff. Exact copies
2607/// have already run, so any remaining `Added` here had no identical-OID source.
2608fn detect_inexact_copies(
2609    changes: Vec<NameStatusEntry>,
2610    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2611    options: &RenameDetectionOptions,
2612    fetch_blob: &impl Fn(&ObjectId) -> Option<Vec<u8>>,
2613) -> Vec<NameStatusEntry> {
2614    let threshold = options.copy_threshold;
2615    if threshold > 100 {
2616        return changes;
2617    }
2618
2619    let changed_sources = changes
2620        .iter()
2621        .filter(|entry| matches!(entry.status, NameStatus::Deleted | NameStatus::Modified))
2622        .map(|entry| entry.path.clone())
2623        .collect::<BTreeSet<_>>();
2624    // Eligible source paths, paired with their bytes (fetched lazily/once).
2625    let mut sources: Vec<(Vec<u8>, &TrackedEntry, Vec<u8>)> = Vec::new();
2626    for (path, tracked) in left_entries {
2627        if !(options.base.find_copies_harder || changed_sources.contains(path.as_slice())) {
2628            continue;
2629        }
2630        if !options.base.rename_empty && is_empty_blob_oid(&tracked.oid) {
2631            continue;
2632        }
2633        if let Some(bytes) = fetch_blob(&tracked.oid) {
2634            sources.push((path.clone(), tracked, bytes));
2635        }
2636    }
2637    if sources.is_empty() {
2638        return changes;
2639    }
2640
2641    let mut result = Vec::with_capacity(changes.len());
2642    for entry in changes {
2643        if entry.status != NameStatus::Added {
2644            result.push(entry);
2645            continue;
2646        }
2647        let Some(new_oid) = entry.new_oid.as_ref() else {
2648            result.push(entry);
2649            continue;
2650        };
2651        let Some(dst_bytes) = fetch_blob(new_oid) else {
2652            result.push(entry);
2653            continue;
2654        };
2655
2656        // Pick the best-scoring source path that meets the threshold. Ties are
2657        // broken by path order (BTreeMap iteration is sorted) so the choice is
2658        // deterministic.
2659        let mut best: Option<(usize, u8)> = None;
2660        for (i, (src_path, _, src_bytes)) in sources.iter().enumerate() {
2661            if src_path.as_slice() == entry.path.as_bytes() {
2662                continue;
2663            }
2664            let score = blob_similarity(src_bytes, &dst_bytes);
2665            if score < threshold {
2666                continue;
2667            }
2668            match best {
2669                Some((_, best_score)) if best_score >= score => {}
2670                _ => best = Some((i, score)),
2671            }
2672        }
2673
2674        if let Some((src_idx, score)) = best {
2675            let (src_path, src_tracked, _) = &sources[src_idx];
2676            result.push(NameStatusEntry {
2677                status: NameStatus::Copied(score),
2678                path: entry.path,
2679                old_path: Some(src_path.clone().into()),
2680                old_mode: Some(src_tracked.mode),
2681                new_mode: entry.new_mode,
2682                old_oid: Some(src_tracked.oid),
2683                new_oid: entry.new_oid,
2684            });
2685        } else {
2686            result.push(entry);
2687        }
2688    }
2689    result.sort_by(|left, right| diff_entry_sort_path(left).cmp(diff_entry_sort_path(right)));
2690    result
2691}
2692
2693fn is_empty_blob_oid(oid: &ObjectId) -> bool {
2694    object_id_for_bytes(oid.format(), "blob", b"").is_ok_and(|empty| empty == *oid)
2695}
2696
2697// ===========================================================================
2698// Content similarity (the engine for inexact `-M`/`-C` rename/copy detection).
2699//
2700// This mirrors upstream git's similarity estimate from `diffcore-delta.c`
2701// (the span-hash counting) and `diffcore-rename.c` (the score formula), so the
2702// `R<score>`/`C<score>` we emit match git's percentages.
2703//
2704// The metric, precisely:
2705//
2706//   1. Each blob is broken into *spans*. Starting at a byte, we accumulate a
2707//      rolling hash of the bytes and end the span at the first `\n` (inclusive)
2708//      or once the span reaches `MAX_SPAN_BYTES` (64) bytes, whichever comes
2709//      first. (The 64-byte cap keeps a file with no/few newlines — e.g. a
2710//      binary blob or one very long line — from collapsing into a single span,
2711//      so similarity still tracks shared substrings.) Each span yields a
2712//      `(hash, byte_count)` pair, where `byte_count` is the span's length in
2713//      bytes. This is the exact loop git uses in `hash_chars()`.
2714//
2715//   2. The two blobs' spans are reduced to multisets keyed by hash: for each
2716//      hash we keep the total number of bytes spanned by entries with that
2717//      hash, on each side. `common_bytes` is then the sum over all hashes of
2718//      `min(bytes_on_src, bytes_on_dst)` — the bytes that exist on both sides.
2719//      This is git's `src_copied`.
2720//
2721//   3. The score is `common_bytes / max(size_src, size_dst)`, scaled to a
2722//      percentage and rounded to the nearest integer:
2723//
2724//          score% = round(common_bytes * 100 / max(size_src, size_dst))
2725//
2726//      git computes an internal score `src_copied * MAX_SCORE / max_size` with
2727//      `MAX_SCORE == 60000` and reports `round(score * 100 / MAX_SCORE)`; that
2728//      is algebraically the same rounded percentage, which we compute directly
2729//      to avoid intermediate precision loss.
2730//
2731// Edge cases match git: two empty blobs are 100% similar (identical content);
2732// an empty blob vs a non-empty one is 0%. Equal byte buffers are always 100%.
2733
2734/// Maximum number of bytes in a single similarity span before it is force-cut.
2735///
2736/// git uses 64 (`hash_chars()` breaks a span once `++chunks >= 64`).
2737const MAX_SPAN_BYTES: usize = 64;
2738
2739/// Compute the content similarity of two blobs as an integer percentage in
2740/// `0..=100`, using git's span-hash counting metric (see the module comment
2741/// above for the exact definition).
2742///
2743/// The result is symmetric (`blob_similarity(a, b) == blob_similarity(b, a)`)
2744/// because the score divides the common-byte count by the larger of the two
2745/// sizes. Byte-identical blobs return `100`; a non-empty blob compared against
2746/// an empty one returns `0`; two empty blobs return `100`.
2747///
2748/// This is the same number git prints as `similarity index N%` and uses to
2749/// decide `-M`/`-C` rename and copy detection.
2750pub fn blob_similarity(a: &[u8], b: &[u8]) -> u8 {
2751    // Fast paths that also pin down the empty-blob conventions.
2752    if a == b {
2753        return 100;
2754    }
2755    let max_size = a.len().max(b.len());
2756    if max_size == 0 {
2757        // Both empty (and not caught by `a == b` only if both are empty, which
2758        // they are here) -> identical.
2759        return 100;
2760    }
2761
2762    let src = span_hash_counts(a);
2763    let dst = span_hash_counts(b);
2764    let common = common_span_bytes(&src, &dst);
2765
2766    // Match git's diffcore-rename integer math exactly. git computes an internal
2767    // score `src_copied * MAX_SCORE / max_size` (MAX_SCORE == 60000) with integer
2768    // truncation, then reports the similarity index as `score * 100 / MAX_SCORE`,
2769    // truncated again. This two-step truncation -- *not* a single rounded
2770    // `common * 100 / max_size` -- is what yields git's exact percentages: e.g.
2771    // common=4, max_size=6 gives 4*60000/6=40000 then 40000*100/60000=66 (git's
2772    // `R066`), whereas a rounded single step would give 67.
2773    const MAX_SCORE: u64 = 60000;
2774    let internal = (common as u64 * MAX_SCORE) / max_size as u64;
2775    let score = internal * 100 / MAX_SCORE;
2776    score.min(100) as u8
2777}
2778
2779/// Break `data` into spans and return, per span hash, the total number of bytes
2780/// covered by spans with that hash. Spans end at a newline (inclusive) or once
2781/// they reach [`MAX_SPAN_BYTES`] bytes — exactly git's `hash_chars()` loop.
2782///
2783/// The returned map is `hash -> total_span_bytes`. Summing all values yields
2784/// `data.len()`, so the byte accounting is exact.
2785fn span_hash_counts(data: &[u8]) -> BTreeMap<u64, usize> {
2786    let mut counts: BTreeMap<u64, usize> = BTreeMap::new();
2787    let mut idx = 0usize;
2788    let len = data.len();
2789    while idx < len {
2790        // Roll a hash over the bytes of this span. The mixing mirrors git's
2791        // two-accumulator scheme from `diffcore-delta.c`; the exact constants do
2792        // not matter for correctness (any good per-span hash works), only that
2793        // identical spans collide and distinct spans rarely do.
2794        let mut accum1: u32 = 0;
2795        let mut accum2: u32 = 0;
2796        let mut span_len = 0usize;
2797        loop {
2798            let c = data[idx] as u32;
2799            idx += 1;
2800            span_len += 1;
2801            accum1 = (accum1 << 7) ^ (accum2 >> 25);
2802            accum2 = (accum2 << 7) ^ (accum1 >> 25);
2803            accum1 = accum1.wrapping_add(c);
2804            let newline = c == u32::from(b'\n');
2805            if span_len >= MAX_SPAN_BYTES || newline || idx >= len {
2806                break;
2807            }
2808        }
2809        // Fold the two accumulators (and the span length) into one 64-bit key.
2810        // Including the length keeps spans of different lengths from colliding
2811        // when their rolling-hash states happen to coincide.
2812        let hash = ((accum1 as u64) << 32) ^ (accum2 as u64) ^ ((span_len as u64) << 1);
2813        *counts.entry(hash).or_insert(0) += span_len;
2814    }
2815    counts
2816}
2817
2818/// Sum, over every hash present in both maps, the smaller of the two byte
2819/// counts. This is git's `src_copied`: the number of bytes that appear on both
2820/// sides (counting multiplicity via the per-hash byte totals).
2821/// git `diffcore_count_changes()`: span-hash byte accounting between two
2822/// blobs. Returns `(src_copied, literal_added)` — the bytes of `src` that
2823/// survive into `dst`, and the bytes of `dst` not accounted for by `src`.
2824/// `--dirstat`'s default "changes" damage is
2825/// `(src.len() - src_copied) + literal_added`.
2826pub fn count_changes(src: &[u8], dst: &[u8]) -> (usize, usize) {
2827    let src_counts = span_hash_counts(src);
2828    let dst_counts = span_hash_counts(dst);
2829    let copied = common_span_bytes(&src_counts, &dst_counts);
2830    (copied, dst.len() - copied)
2831}
2832
2833fn common_span_bytes(src: &BTreeMap<u64, usize>, dst: &BTreeMap<u64, usize>) -> usize {
2834    let mut common = 0usize;
2835    // Iterate the smaller map for a few less lookups.
2836    let (small, large) = if src.len() <= dst.len() {
2837        (src, dst)
2838    } else {
2839        (dst, src)
2840    };
2841    for (hash, small_bytes) in small {
2842        if let Some(large_bytes) = large.get(hash) {
2843            common += (*small_bytes).min(*large_bytes);
2844        }
2845    }
2846    common
2847}
2848
2849fn diff_entry_sort_path(entry: &NameStatusEntry) -> &[u8] {
2850    // git's diffcore re-inserts rename/copy pairs at their *destination*'s
2851    // position, so the queue (raw, numstat, stat, ...) sorts by the new path.
2852    entry.path.as_bytes()
2853}
2854
2855fn mark_unstaged_worktree_oids_unresolved(
2856    changes: Vec<NameStatusEntry>,
2857    index_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2858    worktree_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
2859) -> Vec<NameStatusEntry> {
2860    changes
2861        .into_iter()
2862        .map(|mut entry| {
2863            let worktree_entry = worktree_entries.get(entry.path.as_bytes());
2864            if worktree_entry != index_entries.get(entry.path.as_bytes()) {
2865                entry.new_oid = None;
2866            }
2867            entry
2868        })
2869        .collect()
2870}
2871
2872#[derive(Debug, Clone, PartialEq, Eq)]
2873struct TrackedEntry {
2874    mode: u32,
2875    oid: ObjectId,
2876}
2877
2878/// A path-keyed map of tracked entries: one flattened side of a tree (or index/
2879/// worktree) snapshot.
2880type TrackedEntryMap = BTreeMap<Vec<u8>, TrackedEntry>;
2881
2882/// The `(left, right)` sides produced by a tree-vs-tree comparison.
2883type TrackedEntryPair = (TrackedEntryMap, TrackedEntryMap);
2884
2885struct IndexSnapshot {
2886    entries: BTreeMap<Vec<u8>, TrackedEntry>,
2887    stat_cache: IndexStatCache,
2888}
2889
2890fn read_index_entries(
2891    git_dir: &Path,
2892    format: ObjectFormat,
2893) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
2894    Ok(read_index_snapshot(git_dir, format)?.entries)
2895}
2896
2897fn read_index_snapshot(git_dir: &Path, format: ObjectFormat) -> Result<IndexSnapshot> {
2898    let index_path = sley_index::repository_index_path(git_dir);
2899    let index_metadata = match fs::metadata(&index_path) {
2900        Ok(metadata) => metadata,
2901        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
2902            return Ok(IndexSnapshot {
2903                entries: BTreeMap::new(),
2904                stat_cache: IndexStatCache::default(),
2905            });
2906        }
2907        Err(err) => return Err(err.into()),
2908    };
2909    let index = Index::parse(&fs::read(&index_path)?, format)?;
2910    let stat_cache =
2911        IndexStatCache::from_index_mtime(&index, sley_index::file_mtime_parts(&index_metadata));
2912    let entries = index
2913        .entries
2914        .into_iter()
2915        .map(|entry| {
2916            (
2917                entry.path.into_bytes(),
2918                TrackedEntry {
2919                    mode: entry.mode,
2920                    oid: entry.oid,
2921                },
2922            )
2923        })
2924        .collect();
2925    Ok(IndexSnapshot {
2926        entries,
2927        stat_cache,
2928    })
2929}
2930
2931trait WorktreeIndexEntry {
2932    fn git_path(&self) -> &[u8];
2933    fn stage(&self) -> sley_index::Stage;
2934    fn mode(&self) -> u32;
2935    fn oid(&self) -> ObjectId;
2936    fn reusable_with(&self, stat_cache: &IndexStatCache, metadata: &fs::Metadata) -> bool;
2937}
2938
2939impl WorktreeIndexEntry for sley_index::IndexEntry {
2940    fn git_path(&self) -> &[u8] {
2941        self.path.as_bytes()
2942    }
2943
2944    fn stage(&self) -> sley_index::Stage {
2945        sley_index::IndexEntry::stage(self)
2946    }
2947
2948    fn mode(&self) -> u32 {
2949        self.mode
2950    }
2951
2952    fn oid(&self) -> ObjectId {
2953        self.oid
2954    }
2955
2956    fn reusable_with(&self, stat_cache: &IndexStatCache, metadata: &fs::Metadata) -> bool {
2957        stat_cache.reusable_index_entry(self, metadata).is_some()
2958    }
2959}
2960
2961impl WorktreeIndexEntry for sley_index::IndexEntryRef<'_> {
2962    fn git_path(&self) -> &[u8] {
2963        self.path
2964    }
2965
2966    fn stage(&self) -> sley_index::Stage {
2967        sley_index::IndexEntryRef::stage(self)
2968    }
2969
2970    fn mode(&self) -> u32 {
2971        self.mode
2972    }
2973
2974    fn oid(&self) -> ObjectId {
2975        self.oid
2976    }
2977
2978    fn reusable_with(&self, stat_cache: &IndexStatCache, metadata: &fs::Metadata) -> bool {
2979        stat_cache.reusable_index_entry_ref(self, metadata)
2980    }
2981}
2982
2983fn tracked_entry_from_index(entry: &impl WorktreeIndexEntry) -> TrackedEntry {
2984    TrackedEntry {
2985        mode: entry.mode(),
2986        oid: entry.oid(),
2987    }
2988}
2989
2990fn head_tree_entries(
2991    git_dir: &Path,
2992    format: ObjectFormat,
2993    db: &FileObjectDatabase,
2994) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
2995    let refs = FileRefStore::new(git_dir, format);
2996    let Some(head) = refs.read_ref("HEAD")? else {
2997        return Ok(BTreeMap::new());
2998    };
2999    let commit_oid = match head {
3000        RefTarget::Direct(oid) => Some(oid),
3001        RefTarget::Symbolic(name) => match refs.read_ref(&name)? {
3002            Some(RefTarget::Direct(oid)) => Some(oid),
3003            _ => None,
3004        },
3005    };
3006    let Some(commit_oid) = commit_oid else {
3007        return Ok(BTreeMap::new());
3008    };
3009    let object = db.read_object(&commit_oid)?;
3010    if object.object_type != ObjectType::Commit {
3011        return Err(GitError::InvalidObject(format!(
3012            "HEAD {commit_oid} is not a commit"
3013        )));
3014    }
3015    let commit = Commit::parse_ref(format, &object.body)?;
3016    let mut entries = BTreeMap::new();
3017    collect_tree_entries(db, format, &commit.tree, Vec::new(), &mut entries)?;
3018    Ok(entries)
3019}
3020
3021/// Flatten `tree_oid` into `entries` (keyed by `prefix`-rooted full paths),
3022/// adapting the canonical [`flatten_tree`] tuples into [`TrackedEntry`].
3023///
3024/// `flatten_tree` flattens from an empty prefix; each of its paths is rejoined
3025/// under `prefix` with [`join_tree_path`], reproducing the recursive
3026/// prefix-building this helper previously did inline. Used by the full
3027/// (non-pruned) flatten paths: `--find-copies-harder` and the changed-subtree
3028/// add/delete sides of the simultaneous diff walk.
3029fn collect_tree_entries(
3030    db: &FileObjectDatabase,
3031    format: ObjectFormat,
3032    tree_oid: &ObjectId,
3033    prefix: Vec<u8>,
3034    entries: &mut BTreeMap<Vec<u8>, TrackedEntry>,
3035) -> Result<()> {
3036    for (rel_path, (mode, oid)) in flatten_tree(db, format, tree_oid)? {
3037        let path = join_tree_path(&prefix, &rel_path);
3038        entries.insert(path, TrackedEntry { mode, oid });
3039    }
3040    Ok(())
3041}
3042
3043/// Git's mode value for a subtree (directory) entry inside a tree object.
3044const TREE_ENTRY_MODE: u32 = 0o040000;
3045
3046/// Read `tree_oid` and parse it as a tree, erroring if the object is some other
3047/// type. Shared by the simultaneous tree-diff walk so both sides validate the
3048/// object type identically to [`collect_tree_entries`].
3049fn read_tree_object(
3050    db: &FileObjectDatabase,
3051    format: ObjectFormat,
3052    tree_oid: &ObjectId,
3053) -> Result<Tree> {
3054    let object = db.read_object(tree_oid)?;
3055    if object.object_type != ObjectType::Tree {
3056        return Err(GitError::InvalidObject(format!(
3057            "expected tree {tree_oid}, found {}",
3058            object.object_type.as_str()
3059        )));
3060    }
3061    Tree::parse(format, &object.body)
3062}
3063
3064/// Append `name` to `prefix` with a `/` separator (mirroring the path
3065/// construction in [`collect_tree_entries`]), returning the joined path.
3066fn join_tree_path(prefix: &[u8], name: &[u8]) -> Vec<u8> {
3067    let mut path = Vec::with_capacity(prefix.len() + 1 + name.len());
3068    path.extend_from_slice(prefix);
3069    if !path.is_empty() {
3070        path.push(b'/');
3071    }
3072    path.extend_from_slice(name);
3073    path
3074}
3075
3076/// Fully flatten both trees into independent `left`/`right` maps (every blob on
3077/// each side, no pruning). Used only on the `--find-copies-harder` path, where
3078/// copy detection may reach into otherwise-unchanged subtrees for a source.
3079fn collect_full_tree_pair(
3080    db: &FileObjectDatabase,
3081    format: ObjectFormat,
3082    left_tree: &ObjectId,
3083    right_tree: &ObjectId,
3084) -> Result<TrackedEntryPair> {
3085    let mut left = BTreeMap::new();
3086    collect_tree_entries(db, format, left_tree, Vec::new(), &mut left)?;
3087    let mut right = BTreeMap::new();
3088    collect_tree_entries(db, format, right_tree, Vec::new(), &mut right)?;
3089    Ok((left, right))
3090}
3091
3092/// Walk two trees *simultaneously*, collecting into `left` and `right` only the
3093/// blob entries that differ between the two sides — every entry that is present
3094/// and byte-identical (same mode + same OID) on both sides is omitted, and any
3095/// subtree whose OID is identical on both sides is skipped wholesale without
3096/// being read or recursed into. This is the core optimization git relies on to
3097/// make tree diffs cheap: equal subtrees are pruned in O(1).
3098///
3099/// The resulting `left`/`right` maps are exactly the subset of the fully
3100/// flattened maps (as produced by [`collect_tree_entries`]) restricted to the
3101/// paths that participate in an Added/Deleted/Modified change. Because
3102/// [`raw_name_status_changes`] emits nothing for a path that is identical on both
3103/// sides, diffing these pruned maps yields byte-identical name-status output to
3104/// diffing the full maps. (Callers that need the *complete* left map — i.e.
3105/// `--find-copies-harder`, where an unchanged file may be a copy source — must
3106/// still use [`collect_tree_entries`]; see the tree-diff entry points.)
3107fn changed_tree_entries(
3108    db: &FileObjectDatabase,
3109    format: ObjectFormat,
3110    left_tree: &ObjectId,
3111    right_tree: &ObjectId,
3112) -> Result<TrackedEntryPair> {
3113    let mut left = BTreeMap::new();
3114    let mut right = BTreeMap::new();
3115    // Identical root trees produce no changes at all and need not be read.
3116    if left_tree != right_tree {
3117        diff_tree_pair(
3118            db,
3119            format,
3120            left_tree,
3121            right_tree,
3122            &[],
3123            &mut left,
3124            &mut right,
3125        )?;
3126    }
3127    Ok((left, right))
3128}
3129
3130/// Recursively diff two subtrees rooted at `prefix`, appending differing blob
3131/// entries to `left` / `right`. Invariant: the two OIDs are already known to
3132/// differ (identical subtrees are pruned by the caller before recursing).
3133fn diff_tree_pair(
3134    db: &FileObjectDatabase,
3135    format: ObjectFormat,
3136    left_tree: &ObjectId,
3137    right_tree: &ObjectId,
3138    prefix: &[u8],
3139    left: &mut BTreeMap<Vec<u8>, TrackedEntry>,
3140    right: &mut BTreeMap<Vec<u8>, TrackedEntry>,
3141) -> Result<()> {
3142    let left_entries = read_tree_object(db, format, left_tree)?.entries;
3143    let right_entries = read_tree_object(db, format, right_tree)?.entries;
3144
3145    // Index the right side by name so the union of names can be walked without
3146    // relying on git's directory-aware entry ordering. (Iterating the union of
3147    // names, rather than a positional merge, keeps correctness independent of
3148    // entry order.)
3149    let mut right_by_name: HashMap<&[u8], &TreeEntry> = HashMap::with_capacity(right_entries.len());
3150    for entry in &right_entries {
3151        right_by_name.insert(entry.name.as_bytes(), entry);
3152    }
3153
3154    for left_entry in &left_entries {
3155        match right_by_name.remove(left_entry.name.as_bytes()) {
3156            Some(right_entry) => {
3157                merge_tree_entry(
3158                    db,
3159                    format,
3160                    prefix,
3161                    Some(left_entry),
3162                    Some(right_entry),
3163                    left,
3164                    right,
3165                )?;
3166            }
3167            None => {
3168                merge_tree_entry(db, format, prefix, Some(left_entry), None, left, right)?;
3169            }
3170        }
3171    }
3172    // Names only present on the right are pure additions.
3173    for right_entry in &right_entries {
3174        if right_by_name.contains_key(right_entry.name.as_bytes()) {
3175            merge_tree_entry(db, format, prefix, None, Some(right_entry), left, right)?;
3176        }
3177    }
3178    Ok(())
3179}
3180
3181/// Reconcile a single name that may appear on the left side, the right side, or
3182/// both, recording any resulting blob change(s) into `left` / `right`. This
3183/// reproduces exactly the union-of-flattened-maps semantics:
3184///
3185/// * tree vs tree with equal OID -> pruned (no read, no recursion);
3186/// * tree vs tree with differing OID -> recurse;
3187/// * blob vs blob, equal mode+OID -> unchanged, emitted nowhere;
3188/// * blob vs blob, differing mode or OID -> both sides recorded (a Modify);
3189/// * a tree on one side and a non-tree on the other (or a name present on only
3190///   one side) -> the flattened paths differ (`name/...` vs `name`), so the two
3191///   are unrelated: the tree side is flattened wholesale and the blob side is
3192///   recorded independently (an Add and/or a Delete).
3193fn merge_tree_entry(
3194    db: &FileObjectDatabase,
3195    format: ObjectFormat,
3196    prefix: &[u8],
3197    left_entry: Option<&TreeEntry>,
3198    right_entry: Option<&TreeEntry>,
3199    left: &mut BTreeMap<Vec<u8>, TrackedEntry>,
3200    right: &mut BTreeMap<Vec<u8>, TrackedEntry>,
3201) -> Result<()> {
3202    let left_is_tree = left_entry.is_some_and(|entry| entry.mode == TREE_ENTRY_MODE);
3203    let right_is_tree = right_entry.is_some_and(|entry| entry.mode == TREE_ENTRY_MODE);
3204
3205    if let (Some(left_entry), Some(right_entry)) = (left_entry, right_entry) {
3206        if left_is_tree && right_is_tree {
3207            // Two subtrees under the same name: prune if identical, else recurse.
3208            if left_entry.oid == right_entry.oid {
3209                return Ok(());
3210            }
3211            let path = join_tree_path(prefix, left_entry.name.as_bytes());
3212            return diff_tree_pair(
3213                db,
3214                format,
3215                &left_entry.oid,
3216                &right_entry.oid,
3217                &path,
3218                left,
3219                right,
3220            );
3221        }
3222        if !left_is_tree && !right_is_tree {
3223            // Two blobs under the same name. Identical mode+OID means unchanged
3224            // (nothing emitted); otherwise both sides are recorded so the diff
3225            // sees a Modify, matching the full-map `left != right` comparison.
3226            if left_entry.mode == right_entry.mode && left_entry.oid == right_entry.oid {
3227                return Ok(());
3228            }
3229            let path = join_tree_path(prefix, left_entry.name.as_bytes());
3230            left.insert(
3231                path.clone(),
3232                TrackedEntry {
3233                    mode: left_entry.mode,
3234                    oid: left_entry.oid,
3235                },
3236            );
3237            right.insert(
3238                path,
3239                TrackedEntry {
3240                    mode: right_entry.mode,
3241                    oid: right_entry.oid,
3242                },
3243            );
3244            return Ok(());
3245        }
3246        // Mixed: tree on one side, blob on the other. Their flattened paths
3247        // never collide, so handle each side as if the name existed only there.
3248    }
3249
3250    // Left side (if any): record as deletions.
3251    if let Some(left_entry) = left_entry {
3252        let path = join_tree_path(prefix, left_entry.name.as_bytes());
3253        if left_is_tree {
3254            collect_tree_entries(db, format, &left_entry.oid, path, left)?;
3255        } else {
3256            left.insert(
3257                path,
3258                TrackedEntry {
3259                    mode: left_entry.mode,
3260                    oid: left_entry.oid,
3261                },
3262            );
3263        }
3264    }
3265    // Right side (if any): record as additions.
3266    if let Some(right_entry) = right_entry {
3267        let path = join_tree_path(prefix, right_entry.name.as_bytes());
3268        if right_is_tree {
3269            collect_tree_entries(db, format, &right_entry.oid, path, right)?;
3270        } else {
3271            right.insert(
3272                path,
3273                TrackedEntry {
3274                    mode: right_entry.mode,
3275                    oid: right_entry.oid,
3276                },
3277            );
3278        }
3279    }
3280    Ok(())
3281}
3282
3283fn index_gitlinks(index: &BTreeMap<Vec<u8>, TrackedEntry>) -> BTreeMap<Vec<u8>, ObjectId> {
3284    index
3285        .iter()
3286        .filter(|(_, entry)| entry.mode == 0o160000)
3287        .map(|(path, entry)| (path.clone(), entry.oid))
3288        .collect()
3289}
3290
3291fn candidate_path_set<'a>(candidate_paths: impl Iterator<Item = &'a Vec<u8>>) -> BTreeSet<Vec<u8>> {
3292    candidate_paths.cloned().collect()
3293}
3294
3295fn worktree_entries_for_path_set(
3296    worktree_root: &Path,
3297    format: ObjectFormat,
3298    candidates: &BTreeSet<Vec<u8>>,
3299    index_gitlinks: &BTreeMap<Vec<u8>, ObjectId>,
3300    stat_cache: Option<&IndexStatCache>,
3301) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
3302    worktree_entries_for_unique_paths(
3303        worktree_root,
3304        format,
3305        candidates.iter(),
3306        index_gitlinks,
3307        stat_cache,
3308    )
3309}
3310
3311fn worktree_entries_for_unique_paths<'a>(
3312    worktree_root: &Path,
3313    format: ObjectFormat,
3314    candidates: impl Iterator<Item = &'a Vec<u8>>,
3315    index_gitlinks: &BTreeMap<Vec<u8>, ObjectId>,
3316    stat_cache: Option<&IndexStatCache>,
3317) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
3318    let mut entries = BTreeMap::new();
3319    for git_path in candidates {
3320        if let Some(entry) =
3321            worktree_entry_for_path(worktree_root, format, &git_path, index_gitlinks, stat_cache)?
3322        {
3323            entries.insert(git_path.clone(), entry);
3324        }
3325    }
3326    Ok(entries)
3327}
3328
3329fn worktree_entry_for_path(
3330    worktree_root: &Path,
3331    format: ObjectFormat,
3332    git_path: &[u8],
3333    index_gitlinks: &BTreeMap<Vec<u8>, ObjectId>,
3334    stat_cache: Option<&IndexStatCache>,
3335) -> Result<Option<TrackedEntry>> {
3336    let path = worktree_path_for_repo_path(worktree_root, git_path);
3337    let metadata = match fs::symlink_metadata(&path) {
3338        Ok(metadata) => metadata,
3339        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
3340        Err(err) => return Err(GitError::Io(err.to_string())),
3341    };
3342    let file_type = metadata.file_type();
3343    if let Some(staged_oid) = index_gitlinks.get(git_path)
3344        && metadata.is_dir()
3345    {
3346        let oid = gitlink_head_oid(&path, format).unwrap_or(*staged_oid);
3347        return Ok(Some(TrackedEntry {
3348            mode: 0o160000,
3349            oid,
3350        }));
3351    }
3352    if metadata.is_dir() {
3353        if let Some(oid) = gitlink_head_oid(&path, format) {
3354            return Ok(Some(TrackedEntry {
3355                mode: 0o160000,
3356                oid,
3357            }));
3358        }
3359        return Ok(None);
3360    }
3361    if !(metadata.is_file() || file_type.is_symlink()) {
3362        return Ok(None);
3363    }
3364    if let Some(entry) = stat_cache.and_then(|cache| cache.reusable_entry(git_path, &metadata)) {
3365        return Ok(Some(tracked_entry_from_index(entry)));
3366    }
3367    let body = if file_type.is_symlink() {
3368        symlink_target_bytes(&path)?
3369    } else {
3370        fs::read(&path)?
3371    };
3372    let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
3373    let mode = if file_type.is_symlink() {
3374        0o120000
3375    } else {
3376        file_mode(&metadata)
3377    };
3378    Ok(Some(TrackedEntry { mode, oid }))
3379}
3380
3381fn index_worktree_change_for_entry(
3382    path: &Path,
3383    format: ObjectFormat,
3384    index_entry: &impl WorktreeIndexEntry,
3385    stat_cache: &IndexStatCache,
3386) -> Result<Option<NameStatusEntry>> {
3387    let git_path = index_entry.git_path();
3388    let metadata = match fs::symlink_metadata(path) {
3389        Ok(metadata) => metadata,
3390        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
3391            return Ok(Some(index_worktree_deleted_entry(index_entry)));
3392        }
3393        Err(err) => return Err(GitError::Io(err.to_string())),
3394    };
3395    let file_type = metadata.file_type();
3396    let right = if metadata.is_dir() {
3397        if index_entry.mode() == 0o160000 {
3398            let oid = gitlink_head_oid(path, format).unwrap_or(index_entry.oid());
3399            Some(TrackedEntry {
3400                mode: 0o160000,
3401                oid,
3402            })
3403        } else if let Some(oid) = gitlink_head_oid(path, format) {
3404            Some(TrackedEntry {
3405                mode: 0o160000,
3406                oid,
3407            })
3408        } else {
3409            None
3410        }
3411    } else if metadata.is_file() || file_type.is_symlink() {
3412        if index_entry.reusable_with(stat_cache, &metadata) {
3413            return Ok(None);
3414        }
3415        let body = if file_type.is_symlink() {
3416            symlink_target_bytes(path)?
3417        } else {
3418            fs::read(path)?
3419        };
3420        let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
3421        let mode = if file_type.is_symlink() {
3422            0o120000
3423        } else {
3424            file_mode(&metadata)
3425        };
3426        Some(TrackedEntry { mode, oid })
3427    } else {
3428        None
3429    };
3430    let Some(right) = right else {
3431        return Ok(Some(index_worktree_deleted_entry(index_entry)));
3432    };
3433    let left = tracked_entry_from_index(index_entry);
3434    if right == left {
3435        return Ok(None);
3436    }
3437    Ok(Some(NameStatusEntry {
3438        status: NameStatus::Modified,
3439        path: git_path.to_vec().into(),
3440        old_path: None,
3441        old_mode: Some(left.mode),
3442        new_mode: Some(right.mode),
3443        old_oid: Some(left.oid),
3444        new_oid: Some(right.oid),
3445    }))
3446}
3447
3448fn index_worktree_deleted_entry(index_entry: &impl WorktreeIndexEntry) -> NameStatusEntry {
3449    NameStatusEntry {
3450        status: NameStatus::Deleted,
3451        path: index_entry.git_path().to_vec().into(),
3452        old_path: None,
3453        old_mode: Some(index_entry.mode()),
3454        new_mode: None,
3455        old_oid: Some(index_entry.oid()),
3456        new_oid: None,
3457    }
3458}
3459
3460fn worktree_blob_cache_for_path_set(
3461    worktree_root: &Path,
3462    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3463    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3464    candidate_paths: &BTreeSet<Vec<u8>>,
3465    options: RenameDetectionOptions,
3466) -> Result<HashMap<ObjectId, Vec<u8>>> {
3467    worktree_blob_cache_for_unique_paths(
3468        worktree_root,
3469        left_entries,
3470        right_entries,
3471        candidate_paths.iter(),
3472        options,
3473    )
3474}
3475
3476fn worktree_blob_cache_for_unique_paths<'a>(
3477    worktree_root: &Path,
3478    left_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3479    right_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
3480    candidate_paths: impl Iterator<Item = &'a Vec<u8>>,
3481    options: RenameDetectionOptions,
3482) -> Result<HashMap<ObjectId, Vec<u8>>> {
3483    if !options.detect_inexact || !(options.base.detect_renames || options.base.detect_copies) {
3484        return Ok(HashMap::new());
3485    }
3486    let base = options.base;
3487    let mut changes =
3488        raw_name_status_changes_for_unique_paths(left_entries, right_entries, candidate_paths);
3489    if base.detect_renames {
3490        changes = detect_exact_renames(changes, left_entries, right_entries, base.rename_empty);
3491    }
3492    if base.detect_copies {
3493        changes = detect_exact_copies(
3494            changes,
3495            left_entries,
3496            right_entries,
3497            base.find_copies_harder,
3498            base.rename_empty,
3499        );
3500    }
3501    let has_rename_source = base.detect_renames
3502        && changes.iter().any(|entry| {
3503            entry.status == NameStatus::Deleted
3504                && entry
3505                    .old_oid
3506                    .as_ref()
3507                    .is_some_and(|oid| base.rename_empty || !is_empty_blob_oid(oid))
3508        });
3509    let has_copy_source = base.detect_copies
3510        && (base.find_copies_harder
3511            || changes
3512                .iter()
3513                .any(|entry| matches!(entry.status, NameStatus::Deleted | NameStatus::Modified)));
3514    if !has_rename_source && !has_copy_source {
3515        return Ok(HashMap::new());
3516    }
3517    let candidate_oids = changes
3518        .iter()
3519        .filter(|entry| entry.status == NameStatus::Added)
3520        .filter_map(|entry| entry.new_oid)
3521        .filter(|oid| base.rename_empty || !is_empty_blob_oid(oid))
3522        .collect::<BTreeSet<_>>();
3523    if candidate_oids.is_empty() {
3524        return Ok(HashMap::new());
3525    }
3526    let mut cache = HashMap::new();
3527    for (git_path, entry) in right_entries {
3528        if entry.mode == 0o160000 || !candidate_oids.contains(&entry.oid) {
3529            continue;
3530        }
3531        let path = worktree_path_for_repo_path(worktree_root, git_path);
3532        let body = if entry.mode == 0o120000 {
3533            symlink_target_bytes(&path)?
3534        } else {
3535            fs::read(&path)?
3536        };
3537        cache.entry(entry.oid).or_insert(body);
3538    }
3539    Ok(cache)
3540}
3541
3542/// A blob fetcher that consults an in-memory `oid -> bytes` cache first (e.g.
3543/// freshly-read worktree files) and falls back to the object database.
3544fn cache_or_odb_blob(
3545    cache: &HashMap<ObjectId, Vec<u8>>,
3546    db: &FileObjectDatabase,
3547    oid: &ObjectId,
3548) -> Option<Vec<u8>> {
3549    if let Some(bytes) = cache.get(oid) {
3550        return Some(bytes.clone());
3551    }
3552    read_blob_bytes(db, oid)
3553}
3554
3555#[cfg(unix)]
3556fn worktree_path_for_repo_path(worktree_root: &Path, path: &[u8]) -> PathBuf {
3557    use std::ffi::OsStr;
3558    use std::os::unix::ffi::OsStrExt;
3559
3560    let mut out = PathBuf::from(worktree_root);
3561    out.push(OsStr::from_bytes(path));
3562    out
3563}
3564
3565#[cfg(unix)]
3566fn worktree_path_for_repo_path_into(out: &mut PathBuf, worktree_root: &Path, path: &[u8]) {
3567    use std::ffi::OsStr;
3568    use std::os::unix::ffi::OsStrExt;
3569
3570    out.clear();
3571    out.push(worktree_root);
3572    out.push(OsStr::from_bytes(path));
3573}
3574
3575#[cfg(not(unix))]
3576fn worktree_path_for_repo_path(worktree_root: &Path, path: &[u8]) -> PathBuf {
3577    worktree_root.join(repo_path_to_path(path))
3578}
3579
3580#[cfg(not(unix))]
3581fn worktree_path_for_repo_path_into(out: &mut PathBuf, worktree_root: &Path, path: &[u8]) {
3582    out.clear();
3583    out.push(worktree_root);
3584    out.push(repo_path_to_path(path));
3585}
3586
3587#[cfg(not(unix))]
3588fn repo_path_to_path(path: &[u8]) -> PathBuf {
3589    let mut out = PathBuf::new();
3590    for component in String::from_utf8_lossy(path).split('/') {
3591        if !component.is_empty() {
3592            out.push(component);
3593        }
3594    }
3595    out
3596}
3597
3598#[cfg(unix)]
3599fn file_mode(metadata: &fs::Metadata) -> u32 {
3600    use std::os::unix::fs::PermissionsExt;
3601    if metadata.permissions().mode() & 0o111 != 0 {
3602        0o100755
3603    } else {
3604        0o100644
3605    }
3606}
3607
3608#[cfg(not(unix))]
3609fn file_mode(_metadata: &fs::Metadata) -> u32 {
3610    0o100644
3611}
3612
3613#[cfg(unix)]
3614fn symlink_target_bytes(path: &Path) -> Result<Vec<u8>> {
3615    use std::os::unix::ffi::OsStrExt;
3616    let target = fs::read_link(path)?;
3617    Ok(target.as_os_str().as_bytes().to_vec())
3618}
3619
3620#[cfg(not(unix))]
3621fn symlink_target_bytes(path: &Path) -> Result<Vec<u8>> {
3622    let target = fs::read_link(path)?;
3623    Ok(target.to_string_lossy().replace('\\', "/").into_bytes())
3624}
3625
3626// ---------------------------------------------------------------------------
3627// Unified / git diff patch parsing and application (engine for `git apply`/`git am`).
3628//
3629// Operates purely on in-memory byte buffers; the caller is responsible for
3630// reading/writing blobs from the working tree or the object database. The
3631// parser understands the textual format git produces (`diff --git`, `---`/`+++`
3632// file headers, `@@` hunk headers, context/`+`/`-` body lines, the
3633// `\ No newline at end of file` marker, `/dev/null` for added/deleted files,
3634// file mode headers, and `rename from`/`rename to` headers).
3635// ---------------------------------------------------------------------------
3636
3637/// A single line inside a hunk. The stored bytes never include the trailing
3638/// line terminator; whether the line is terminated by `\n` is tracked
3639/// separately on the [`Hunk`] (see [`Hunk::old_no_newline`] /
3640/// [`Hunk::new_no_newline`]) so the no-final-newline case can be reproduced
3641/// byte-for-byte.
3642#[derive(Debug, Clone, PartialEq, Eq)]
3643pub enum HunkLine {
3644    /// A line present in both the old and new versions.
3645    Context(Vec<u8>),
3646    /// A line added by the patch (present only in the new version).
3647    Insert(Vec<u8>),
3648    /// A line removed by the patch (present only in the old version).
3649    Delete(Vec<u8>),
3650}
3651
3652impl HunkLine {
3653    /// The line content, without any trailing newline.
3654    pub fn content(&self) -> &[u8] {
3655        match self {
3656            Self::Context(bytes) | Self::Insert(bytes) | Self::Delete(bytes) => bytes,
3657        }
3658    }
3659}
3660
3661/// A single `@@ -old_start,old_len +new_start,new_len @@` hunk.
3662///
3663/// `old_start` / `new_start` are 1-based line numbers as they appear in the
3664/// patch header. The `*_no_newline` flags record that the final line on that
3665/// side of the hunk is *not* terminated by a newline (the `\ No newline at end
3666/// of file` marker).
3667#[derive(Debug, Clone, PartialEq, Eq)]
3668pub struct Hunk {
3669    pub old_start: usize,
3670    pub old_len: usize,
3671    pub new_start: usize,
3672    pub new_len: usize,
3673    pub lines: Vec<HunkLine>,
3674    /// The last context/deleted line of the old file lacks a trailing newline.
3675    pub old_no_newline: bool,
3676    /// The last context/inserted line of the new file lacks a trailing newline.
3677    pub new_no_newline: bool,
3678}
3679
3680/// A patch targeting a single file. Produced by [`parse_unified_patch`].
3681#[derive(Debug, Clone, PartialEq, Eq)]
3682pub struct FilePatch {
3683    /// Path on the `a/` (old) side, or `None` for a newly created file.
3684    pub old_path: Option<Vec<u8>>,
3685    /// Path on the `b/` (new) side, or `None` for a deleted file.
3686    pub new_path: Option<Vec<u8>>,
3687    /// Mode of the old file, when a mode header was present.
3688    pub old_mode: Option<u32>,
3689    /// Mode of the new file, when a mode header was present.
3690    pub new_mode: Option<u32>,
3691    pub hunks: Vec<Hunk>,
3692    /// The patch creates a new file (`--- /dev/null` / `new file mode`).
3693    pub is_new: bool,
3694    /// The patch deletes the file (`+++ /dev/null` / `deleted file mode`).
3695    pub is_delete: bool,
3696    /// The patch renames the file (`rename from`/`rename to`).
3697    pub is_rename: bool,
3698}
3699
3700/// Outcome of applying a [`FilePatch`] to a base buffer.
3701#[derive(Debug, Clone, PartialEq, Eq)]
3702pub enum ApplyOutcome {
3703    /// The patch applied cleanly; carries the resulting file bytes.
3704    Applied(Vec<u8>),
3705    /// At least one hunk's context/deleted lines did not match the base.
3706    Rejected,
3707}
3708
3709/// The minimum number of context lines git's `apply` insists on keeping when
3710/// it tries to fuzz a hunk into place — git's `apply_state.p_context`, which is
3711/// initialised to `UINT_MAX` (the `-C<n>` option lowers it). The fuzz loop in
3712/// `apply_one_fragment` stops the moment both leading and trailing context have
3713/// been reduced to this floor; with the default `UINT_MAX` floor that test is
3714/// already satisfied on the first failure, so **the default `git apply` / `git
3715/// am` path does no context fuzz and no begin/end relaxation at all** — a hunk
3716/// whose full preimage does not match at a valid position is simply rejected.
3717/// We keep the floor configurable so the structure mirrors git's, but the
3718/// shared apply engine only ever runs with the default.
3719const MIN_FUZZ_CONTEXT: usize = usize::MAX;
3720
3721/// Parse a unified/git diff into one [`FilePatch`] per file it touches.
3722///
3723/// The parser is intentionally lenient about leading commentary (commit
3724/// messages, `index <oid>..<oid>` lines, etc.): anything that is not part of a
3725/// recognised header or hunk body is skipped. It errors only on structurally
3726/// invalid hunks (bad `@@` headers, body lines that overflow the declared hunk
3727/// counts, or hunk bodies that appear with no preceding file header).
3728pub fn parse_unified_patch(input: &[u8]) -> Result<Vec<FilePatch>> {
3729    let lines = split_patch_lines(input);
3730    let mut parser = PatchParser {
3731        lines: &lines,
3732        index: 0,
3733    };
3734    parser.parse()
3735}
3736
3737/// Apply a single-file patch to `base`, returning the patched bytes.
3738///
3739/// This mirrors git's `apply.c` (`apply_one_fragment` / `find_pos` /
3740/// `match_fragment`) for the default, no-whitespace-fuzz settings `git am`
3741/// and `git apply` use:
3742///
3743/// * Each hunk builds a *preimage* (context + deleted lines) and *postimage*
3744///   (context + inserted lines).
3745/// * A hunk anchored at the file start (`old_start <= 1`) must match the
3746///   beginning of the file (`match_beginning`); a hunk with no trailing context
3747///   must match the end of the file (`match_end`).
3748/// * The full preimage is matched byte-for-byte; the search starts at the
3749///   recorded position and ping-pongs outward across the whole image.
3750/// * Fuzz is applied *only* by dropping leading/trailing context lines (never
3751///   by jumping to a spurious context-only match); if no position matches even
3752///   after dropping all context, the hunk — and thus the whole patch — is
3753///   [`ApplyOutcome::Rejected`].
3754///
3755/// Rejecting (rather than spuriously applying at a wrong offset) is what lets
3756/// `git am -3` correctly fall back to its 3-way merge path.
3757///
3758/// New-file patches (empty/ignored base) and the no-final-newline case are
3759/// handled byte-accurately. Clean exact-position applies are byte-identical to
3760/// the previous behaviour.
3761pub fn apply_file_patch(base: &[u8], patch: &FilePatch) -> ApplyOutcome {
3762    // A pure deletion with no hunks yields an empty file.
3763    if patch.is_delete && patch.hunks.is_empty() {
3764        return ApplyOutcome::Applied(Vec::new());
3765    }
3766    // A new file: the only sensible base is empty; ignore whatever was passed
3767    // and build the result from the inserted lines.
3768    let base_for_match: &[u8] = if patch.is_new { b"" } else { base };
3769
3770    // The "image" git mutates as each hunk applies. We splice in place so later
3771    // hunks see the effect of earlier ones (git carries the running offset for
3772    // the same reason).
3773    let mut image = split_blob_lines(base_for_match);
3774
3775    // git seeds the search for hunk N at `newpos-1` *plus* the offset earlier
3776    // hunks drifted by, so a uniform shift only costs the search once.
3777    let mut running_offset: isize = 0;
3778
3779    for hunk in &patch.hunks {
3780        match apply_one_hunk(&mut image, hunk, running_offset) {
3781            Some(drift) => running_offset += drift,
3782            None => return ApplyOutcome::Rejected,
3783        }
3784    }
3785
3786    ApplyOutcome::Applied(join_lines(&image))
3787}
3788
3789/// Splice a single hunk into `image`, returning the offset (applied position −
3790/// expected position) so later hunks can carry it forward, or `None` if the
3791/// hunk cannot be located (which rejects the whole patch).
3792///
3793/// Faithful to git's `apply_one_fragment`: build preimage/postimage, try the
3794/// full preimage at progressively-reduced context, and on a match replace the
3795/// matched preimage region with the postimage.
3796fn apply_one_hunk(image: &mut Vec<Line>, hunk: &Hunk, running_offset: isize) -> Option<isize> {
3797    // preimage = context + deletes (the old side we must find in the image).
3798    // postimage = context + inserts (what replaces it). They share their
3799    // leading/trailing *context* runs, which fuzz peels off symmetrically.
3800    let mut preimage: Vec<Line> = Vec::new();
3801    let mut postimage: Vec<Line> = Vec::new();
3802    let mut leading = 0usize; // context lines before the first +/-
3803    let mut trailing = 0usize; // context lines after the last +/-
3804    let mut seen_change = false;
3805    for hl in &hunk.lines {
3806        match hl {
3807            HunkLine::Context(bytes) => {
3808                preimage.push(Line {
3809                    content: bytes.clone(),
3810                    no_newline: false,
3811                });
3812                postimage.push(Line {
3813                    content: bytes.clone(),
3814                    no_newline: false,
3815                });
3816                if !seen_change {
3817                    leading += 1;
3818                }
3819                trailing += 1;
3820            }
3821            HunkLine::Delete(bytes) => {
3822                preimage.push(Line {
3823                    content: bytes.clone(),
3824                    no_newline: false,
3825                });
3826                seen_change = true;
3827                trailing = 0;
3828            }
3829            HunkLine::Insert(bytes) => {
3830                postimage.push(Line {
3831                    content: bytes.clone(),
3832                    no_newline: false,
3833                });
3834                seen_change = true;
3835                trailing = 0;
3836            }
3837        }
3838    }
3839
3840    // Mark the no-final-newline state on the last preimage/postimage line so the
3841    // exact-match check and the spliced result reproduce a missing terminal
3842    // newline byte-for-byte.
3843    if hunk.old_no_newline && let Some(last) = preimage.last_mut() {
3844        last.no_newline = true;
3845    }
3846    if hunk.new_no_newline && let Some(last) = postimage.last_mut() {
3847        last.no_newline = true;
3848    }
3849
3850    // A hunk that is `@@ -1,L ... @@` (or `@@ -0,0 ... @@` for an add-to-empty)
3851    // must match the beginning. A hunk with no trailing context must match the
3852    // end. (`git am`/`apply` do not pass `--unidiff-zero`, so old_start == 1
3853    // still implies match_beginning.)
3854    let mut match_beginning = hunk.old_start <= 1;
3855    let mut match_end = trailing == 0;
3856
3857    // git anchors the search at `newpos-1` (0-based), carried by the running
3858    // offset from earlier hunks. The anchor (`pos` in git) shifts up whenever a
3859    // *leading* context line is peeled, because the preimage then begins one
3860    // line later in its own content.
3861    let mut expected = expected_position(hunk, running_offset);
3862    // The full hunk's expected position never moves, so the returned drift is
3863    // measured against it (not the context-reduced anchor).
3864    let hunk_expected = expected;
3865
3866    loop {
3867        if let Some(pos) = find_hunk_pos(image, &preimage, expected, match_beginning, match_end) {
3868            // Splice: drop the matched preimage lines, insert the postimage.
3869            let take = preimage.len();
3870            let replacement: Vec<Line> = postimage.clone();
3871            image.splice(pos..pos + take, replacement);
3872            return Some(pos as isize - hunk_expected);
3873        }
3874
3875        // No position matched. Mirror git's guard *order* exactly: it first
3876        // checks whether context is already at the floor (`p_context`) and, if
3877        // so, gives up BEFORE relaxing match_beginning/match_end or peeling
3878        // context. With the default `UINT_MAX` floor this fires on the very
3879        // first failure, so the default path never fuzzes and never relaxes the
3880        // begin/end anchors — it rejects. (The comparison is intentionally
3881        // against the floor so the structure stays faithful to git even though
3882        // the default floor makes it unconditionally true.)
3883        #[allow(clippy::absurd_extreme_comparisons)]
3884        if leading <= MIN_FUZZ_CONTEXT && trailing <= MIN_FUZZ_CONTEXT {
3885            return None;
3886        }
3887
3888        // git relaxes the begin/end anchors before peeling context: a hunk that
3889        // "must match the start/end" but didn't is retried free-floating first.
3890        if match_beginning || match_end {
3891            match_beginning = false;
3892            match_end = false;
3893            continue;
3894        }
3895
3896        // Reduce context: peel the larger side (both if equal), exactly as git.
3897        if leading >= trailing {
3898            // Drop the first context line from pre+post; the anchor slides up.
3899            preimage.remove(0);
3900            postimage.remove(0);
3901            expected -= 1;
3902            leading -= 1;
3903        }
3904        if trailing > leading {
3905            preimage.pop();
3906            postimage.pop();
3907            trailing -= 1;
3908        }
3909    }
3910}
3911
3912/// A line with its content (sans terminator) and whether it is newline-terminated.
3913#[derive(Debug, Clone, PartialEq, Eq)]
3914struct Line {
3915    content: Vec<u8>,
3916    no_newline: bool,
3917}
3918
3919/// Split a blob into [`Line`]s. A trailing `\n` does not produce an empty final
3920/// line; instead the last real line is marked `no_newline = false`. A file that
3921/// does not end in `\n` marks its final line `no_newline = true`. An empty blob
3922/// produces no lines.
3923fn split_blob_lines(data: &[u8]) -> Vec<Line> {
3924    let mut lines = Vec::new();
3925    let mut start = 0usize;
3926    while start < data.len() {
3927        match data[start..].iter().position(|&b| b == b'\n') {
3928            Some(rel) => {
3929                let end = start + rel;
3930                lines.push(Line {
3931                    content: data[start..end].to_vec(),
3932                    no_newline: false,
3933                });
3934                start = end + 1;
3935            }
3936            None => {
3937                lines.push(Line {
3938                    content: data[start..].to_vec(),
3939                    no_newline: true,
3940                });
3941                start = data.len();
3942            }
3943        }
3944    }
3945    lines
3946}
3947
3948/// Reassemble lines into a byte buffer, honouring per-line newline state.
3949fn join_lines(lines: &[Line]) -> Vec<u8> {
3950    let mut out = Vec::new();
3951    for line in lines {
3952        out.extend_from_slice(&line.content);
3953        if !line.no_newline {
3954            out.push(b'\n');
3955        }
3956    }
3957    out
3958}
3959
3960/// The naive 0-based position where a hunk expects to apply, given the running
3961/// offset accumulated from earlier hunks.
3962fn expected_position(hunk: &Hunk, running_offset: isize) -> isize {
3963    // `old_start` is 1-based; an empty old side (new-file hunk) uses 0.
3964    let base = if hunk.old_start == 0 {
3965        0
3966    } else {
3967        hunk.old_start as isize - 1
3968    };
3969    base + running_offset
3970}
3971
3972/// Find the 0-based line index in `image` where `preimage` (the hunk's context
3973/// + deleted lines, possibly already context-reduced by fuzz) matches.
3974///
3975/// Port of git's `find_pos`: start the search at `expected` (clamped, or forced
3976/// to 0/end when `match_beginning`/`match_end`), then ping-pong outward across
3977/// the *whole* image — backward and forward alternately — until both ends are
3978/// exhausted. Returns the first matching line index, or `None`.
3979fn find_hunk_pos(
3980    image: &[Line],
3981    preimage: &[Line],
3982    expected: isize,
3983    match_beginning: bool,
3984    match_end: bool,
3985) -> Option<usize> {
3986    let line_nr = image.len();
3987    let pre_nr = preimage.len();
3988
3989    // git: if we must match the beginning, start at 0; if we must match the
3990    // end, start where the preimage would end exactly at EOF.
3991    let mut line: isize = if match_beginning {
3992        0
3993    } else if match_end {
3994        line_nr as isize - pre_nr as isize
3995    } else {
3996        expected
3997    };
3998    if line < 0 {
3999        line = 0;
4000    }
4001    if line as usize > line_nr {
4002        line = line_nr as isize;
4003    }
4004
4005    let start = line as usize;
4006    let mut backwards = start;
4007    let mut forwards = start;
4008    let mut current = start;
4009
4010    let mut i: u64 = 0;
4011    loop {
4012        if preimage_matches_at(image, preimage, current, match_beginning, match_end) {
4013            return Some(current);
4014        }
4015
4016        loop {
4017            // Both ends exhausted: no match anywhere.
4018            if backwards == 0 && forwards == line_nr {
4019                return None;
4020            }
4021            if i & 1 == 1 {
4022                // Step backward.
4023                if backwards == 0 {
4024                    i += 1;
4025                    continue;
4026                }
4027                backwards -= 1;
4028                current = backwards;
4029            } else {
4030                // Step forward.
4031                if forwards == line_nr {
4032                    i += 1;
4033                    continue;
4034                }
4035                forwards += 1;
4036                current = forwards;
4037            }
4038            break;
4039        }
4040        i += 1;
4041    }
4042}
4043
4044/// Whether `preimage` matches `image` starting at line `pos`.
4045///
4046/// Port of git's `match_fragment` for the default (no whitespace-fuzz) path:
4047/// a byte-exact full-preimage match. Honours `match_beginning` (pos must be 0)
4048/// and `match_end` (the preimage must reach *exactly* the end of the image),
4049/// and reproduces git's terminal-newline semantics — a preimage line marked
4050/// "no newline" only matches when it is the image's final line and that line is
4051/// itself newline-free.
4052fn preimage_matches_at(
4053    image: &[Line],
4054    preimage: &[Line],
4055    pos: usize,
4056    match_beginning: bool,
4057    match_end: bool,
4058) -> bool {
4059    if match_beginning && pos != 0 {
4060        return false;
4061    }
4062    // The whole preimage must fall within the image.
4063    if pos + preimage.len() > image.len() {
4064        return false;
4065    }
4066    if match_end && pos + preimage.len() != image.len() {
4067        return false;
4068    }
4069    for (i, pre) in preimage.iter().enumerate() {
4070        let img = &image[pos + i];
4071        if img.content != pre.content {
4072            return false;
4073        }
4074        // git compares the raw byte buffers, so a missing terminal newline on
4075        // either side only matches the other when both agree. A preimage line
4076        // that lacks a newline can only sit on the image's final line (which
4077        // must itself lack one); a preimage line that *has* a newline cannot
4078        // match a newline-free image line.
4079        if pre.no_newline != img.no_newline {
4080            return false;
4081        }
4082    }
4083    true
4084}
4085
4086/// Split raw patch bytes into lines, preserving the *content* without the
4087/// trailing `\n` (a final unterminated line is kept). Carriage returns are kept
4088/// as-is so CRLF patch bodies round-trip.
4089fn split_patch_lines(input: &[u8]) -> Vec<&[u8]> {
4090    let mut lines = Vec::new();
4091    let mut start = 0usize;
4092    while start < input.len() {
4093        match input[start..].iter().position(|&b| b == b'\n') {
4094            Some(rel) => {
4095                let end = start + rel;
4096                lines.push(&input[start..end]);
4097                start = end + 1;
4098            }
4099            None => {
4100                lines.push(&input[start..]);
4101                start = input.len();
4102            }
4103        }
4104    }
4105    lines
4106}
4107
4108struct PatchParser<'a> {
4109    lines: &'a [&'a [u8]],
4110    index: usize,
4111}
4112
4113impl<'a> PatchParser<'a> {
4114    fn parse(&mut self) -> Result<Vec<FilePatch>> {
4115        let mut patches = Vec::new();
4116        while self.index < self.lines.len() {
4117            let line = self.lines[self.index];
4118            if line.starts_with(b"diff --git ") {
4119                patches.push(self.parse_file(Some(line))?);
4120            } else if line.starts_with(b"--- ") {
4121                // A bare unified diff with no `diff --git` header.
4122                patches.push(self.parse_file(None)?);
4123            } else if line.starts_with(b"@@ ") {
4124                return Err(GitError::InvalidFormat(
4125                    "hunk header encountered before any file header".to_string(),
4126                ));
4127            } else {
4128                // Skip commentary / unrelated lines.
4129                self.index += 1;
4130            }
4131        }
4132        Ok(patches)
4133    }
4134
4135    /// Parse one file's headers and hunks. When `diff_line` is `Some`, the
4136    /// current line is the `diff --git` header (already inspected by the
4137    /// caller); otherwise parsing starts at a `--- ` line.
4138    fn parse_file(&mut self, diff_line: Option<&[u8]>) -> Result<FilePatch> {
4139        let mut patch = FilePatch {
4140            old_path: None,
4141            new_path: None,
4142            old_mode: None,
4143            new_mode: None,
4144            hunks: Vec::new(),
4145            is_new: false,
4146            is_delete: false,
4147            is_rename: false,
4148        };
4149        // Default paths from `diff --git a/x b/x` if present (overridden by
4150        // `---`/`+++` lines when those carry real paths).
4151        if let Some(diff_line) = diff_line {
4152            if let Some((a, b)) = parse_diff_git_paths(diff_line) {
4153                patch.old_path = Some(a);
4154                patch.new_path = Some(b);
4155            }
4156            self.index += 1;
4157        }
4158
4159        // Extended headers until the first `---`/`@@`/next `diff --git`.
4160        while self.index < self.lines.len() {
4161            let line = self.lines[self.index];
4162            if line.starts_with(b"--- ") {
4163                self.parse_old_file_header(line, &mut patch);
4164                self.index += 1;
4165                break;
4166            } else if line.starts_with(b"@@ ") {
4167                // No `---`/`+++` (e.g. pure rename or mode change with no body).
4168                break;
4169            } else if line.starts_with(b"diff --git ") {
4170                // Next file began with no body for this one.
4171                return Ok(patch);
4172            } else if let Some(rest) = strip_prefix(line, b"old mode ") {
4173                patch.old_mode = parse_octal(rest);
4174            } else if let Some(rest) = strip_prefix(line, b"new mode ") {
4175                patch.new_mode = parse_octal(rest);
4176            } else if let Some(rest) = strip_prefix(line, b"new file mode ") {
4177                patch.is_new = true;
4178                patch.new_mode = parse_octal(rest);
4179            } else if let Some(rest) = strip_prefix(line, b"deleted file mode ") {
4180                patch.is_delete = true;
4181                patch.old_mode = parse_octal(rest);
4182            } else if let Some(rest) = strip_prefix(line, b"rename from ") {
4183                patch.is_rename = true;
4184                patch.old_path = Some(rest.to_vec());
4185            } else if let Some(rest) = strip_prefix(line, b"rename to ") {
4186                patch.is_rename = true;
4187                patch.new_path = Some(rest.to_vec());
4188            } else {
4189                // `index ..`, `similarity index`, `copy from/to`, etc. — ignore.
4190                self.index += 1;
4191                continue;
4192            }
4193            self.index += 1;
4194        }
4195
4196        // `+++` header (the old-file branch above already advanced past `---`).
4197        if self.index < self.lines.len() && self.lines[self.index].starts_with(b"+++ ") {
4198            self.parse_new_file_header(self.lines[self.index], &mut patch);
4199            self.index += 1;
4200        }
4201
4202        // Hunks.
4203        while self.index < self.lines.len() {
4204            let line = self.lines[self.index];
4205            if line.starts_with(b"@@ ") {
4206                let hunk = self.parse_hunk()?;
4207                patch.hunks.push(hunk);
4208            } else if line.starts_with(b"diff --git ") {
4209                break;
4210            } else if line.starts_with(b"--- ") {
4211                // Start of a subsequent bare diff.
4212                break;
4213            } else {
4214                // Trailing commentary between/after hunks.
4215                self.index += 1;
4216            }
4217        }
4218
4219        Ok(patch)
4220    }
4221
4222    fn parse_old_file_header(&self, line: &[u8], patch: &mut FilePatch) {
4223        let rest = strip_prefix(line, b"--- ").unwrap_or(line);
4224        let path = strip_header_path(rest);
4225        match path {
4226            HeaderPath::DevNull => {
4227                patch.is_new = true;
4228                patch.old_path = None;
4229            }
4230            HeaderPath::Path(p) => {
4231                // Only override if we did not already learn a real path.
4232                if patch.old_path.is_none() || !patch.is_rename {
4233                    patch.old_path = Some(p);
4234                }
4235            }
4236        }
4237    }
4238
4239    fn parse_new_file_header(&self, line: &[u8], patch: &mut FilePatch) {
4240        let rest = strip_prefix(line, b"+++ ").unwrap_or(line);
4241        let path = strip_header_path(rest);
4242        match path {
4243            HeaderPath::DevNull => {
4244                patch.is_delete = true;
4245                patch.new_path = None;
4246            }
4247            HeaderPath::Path(p) => {
4248                if patch.new_path.is_none() || !patch.is_rename {
4249                    patch.new_path = Some(p);
4250                }
4251            }
4252        }
4253    }
4254
4255    fn parse_hunk(&mut self) -> Result<Hunk> {
4256        let header = self.lines[self.index];
4257        let (old_start, old_len, new_start, new_len) = parse_hunk_header(header)?;
4258        self.index += 1;
4259
4260        let mut hunk = Hunk {
4261            old_start,
4262            old_len,
4263            new_start,
4264            new_len,
4265            lines: Vec::new(),
4266            old_no_newline: false,
4267            new_no_newline: false,
4268        };
4269        let mut old_seen = 0usize;
4270        let mut new_seen = 0usize;
4271
4272        while self.index < self.lines.len() {
4273            // Stop when both sides are satisfied.
4274            if old_seen >= old_len && new_seen >= new_len {
4275                break;
4276            }
4277            let line = self.lines[self.index];
4278            if line.is_empty() {
4279                // A wholly empty line in a unified diff is a context line whose
4280                // content is the empty string (git emits a bare ` `, but some
4281                // tooling/email transport strips the trailing space).
4282                hunk.lines.push(HunkLine::Context(Vec::new()));
4283                old_seen += 1;
4284                new_seen += 1;
4285                self.index += 1;
4286                continue;
4287            }
4288            match line[0] {
4289                b' ' => {
4290                    hunk.lines.push(HunkLine::Context(line[1..].to_vec()));
4291                    old_seen += 1;
4292                    new_seen += 1;
4293                }
4294                b'+' => {
4295                    hunk.lines.push(HunkLine::Insert(line[1..].to_vec()));
4296                    new_seen += 1;
4297                }
4298                b'-' => {
4299                    hunk.lines.push(HunkLine::Delete(line[1..].to_vec()));
4300                    old_seen += 1;
4301                }
4302                b'\\' => {
4303                    // `\ No newline at end of file` — applies to the line just
4304                    // emitted. Set the appropriate side flag(s).
4305                    self.mark_no_newline(&mut hunk);
4306                    self.index += 1;
4307                    continue;
4308                }
4309                _ => {
4310                    // Anything else terminates the hunk body.
4311                    break;
4312                }
4313            }
4314            self.index += 1;
4315        }
4316
4317        // A trailing `\ No newline` may follow the final body line even after
4318        // the counts are satisfied; consume it.
4319        if self.index < self.lines.len() && self.lines[self.index].starts_with(b"\\") {
4320            self.mark_no_newline(&mut hunk);
4321            self.index += 1;
4322        }
4323
4324        if old_seen != old_len || new_seen != new_len {
4325            return Err(GitError::InvalidFormat(format!(
4326                "hunk body line counts mismatch: header declared -{old_len},+{new_len} \
4327                 but body had -{old_seen},+{new_seen}"
4328            )));
4329        }
4330
4331        Ok(hunk)
4332    }
4333
4334    /// Set the no-newline flag based on the kind of the most recently pushed
4335    /// hunk line.
4336    fn mark_no_newline(&self, hunk: &mut Hunk) {
4337        match hunk.lines.last() {
4338            Some(HunkLine::Context(_)) => {
4339                hunk.old_no_newline = true;
4340                hunk.new_no_newline = true;
4341            }
4342            Some(HunkLine::Insert(_)) => hunk.new_no_newline = true,
4343            Some(HunkLine::Delete(_)) => hunk.old_no_newline = true,
4344            None => {}
4345        }
4346    }
4347}
4348
4349enum HeaderPath {
4350    DevNull,
4351    Path(Vec<u8>),
4352}
4353
4354/// Extract the path from a `---`/`+++` header tail, stripping a leading `a/` or
4355/// `b/` prefix, an optional trailing timestamp (separated by a tab), and
4356/// recognising `/dev/null`.
4357fn strip_header_path(rest: &[u8]) -> HeaderPath {
4358    // Cut a trailing tab-delimited timestamp if present.
4359    let path = match rest.iter().position(|&b| b == b'\t') {
4360        Some(tab) => &rest[..tab],
4361        None => rest,
4362    };
4363    let path = trim_ascii_end(path);
4364    if path == b"/dev/null" {
4365        return HeaderPath::DevNull;
4366    }
4367    // Strip a leading `a/` or `b/` (git's default prefixes).
4368    let stripped = if path.starts_with(b"a/") || path.starts_with(b"b/") {
4369        &path[2..]
4370    } else {
4371        path
4372    };
4373    HeaderPath::Path(stripped.to_vec())
4374}
4375
4376/// Parse the two paths out of `diff --git a/<x> b/<y>`. Returns the paths with
4377/// their `a/`/`b/` prefixes stripped. Returns `None` when the line cannot be
4378/// split unambiguously (e.g. paths containing spaces, which git would quote).
4379fn parse_diff_git_paths(line: &[u8]) -> Option<(Vec<u8>, Vec<u8>)> {
4380    let rest = strip_prefix(line, b"diff --git ")?;
4381    // Quoted paths are uncommon in this engine's inputs; bail and let the
4382    // `---`/`+++` headers supply the names instead.
4383    if rest.first() == Some(&b'"') {
4384        return None;
4385    }
4386    // Find the split point: the boundary between the `a/...` and `b/...` halves.
4387    // git separates them with a single space; the simplest robust heuristic is
4388    // to look for ` b/` preceded by an `a/` start.
4389    if !rest.starts_with(b"a/") {
4390        return None;
4391    }
4392    let sep = find_subslice(rest, b" b/")?;
4393    let a = &rest[2..sep];
4394    let b = &rest[sep + 3..];
4395    Some((a.to_vec(), b.to_vec()))
4396}
4397
4398/// Parse an `@@ -l,s +l,s @@` header into `(old_start, old_len, new_start,
4399/// new_len)`. A missing `,s` means a length of 1.
4400fn parse_hunk_header(line: &[u8]) -> Result<(usize, usize, usize, usize)> {
4401    let err = || GitError::InvalidFormat(format!("malformed hunk header: {}", lossy(line)));
4402    let rest = strip_prefix(line, b"@@ ").ok_or_else(err)?;
4403    // Up to the closing ` @@`.
4404    let close = find_subslice(rest, b" @@").ok_or_else(err)?;
4405    let ranges = &rest[..close];
4406    let mut parts = ranges.split(|&b| b == b' ').filter(|p| !p.is_empty());
4407    let old = parts.next().ok_or_else(err)?;
4408    let new = parts.next().ok_or_else(err)?;
4409    let old = strip_prefix(old, b"-").ok_or_else(err)?;
4410    let new = strip_prefix(new, b"+").ok_or_else(err)?;
4411    let (old_start, old_len) = parse_range(old).ok_or_else(err)?;
4412    let (new_start, new_len) = parse_range(new).ok_or_else(err)?;
4413    Ok((old_start, old_len, new_start, new_len))
4414}
4415
4416/// Parse `start[,len]` into `(start, len)`, defaulting `len` to 1.
4417fn parse_range(range: &[u8]) -> Option<(usize, usize)> {
4418    match range.iter().position(|&b| b == b',') {
4419        Some(comma) => {
4420            let start = parse_usize(&range[..comma])?;
4421            let len = parse_usize(&range[comma + 1..])?;
4422            Some((start, len))
4423        }
4424        None => Some((parse_usize(range)?, 1)),
4425    }
4426}
4427
4428fn parse_usize(bytes: &[u8]) -> Option<usize> {
4429    if bytes.is_empty() {
4430        return None;
4431    }
4432    let mut value: usize = 0;
4433    for &b in bytes {
4434        if !b.is_ascii_digit() {
4435            return None;
4436        }
4437        value = value.checked_mul(10)?.checked_add((b - b'0') as usize)?;
4438    }
4439    Some(value)
4440}
4441
4442fn parse_octal(bytes: &[u8]) -> Option<u32> {
4443    let trimmed = trim_ascii_end(bytes);
4444    if trimmed.is_empty() {
4445        return None;
4446    }
4447    let mut value: u32 = 0;
4448    for &b in trimmed {
4449        if !(b'0'..=b'7').contains(&b) {
4450            return None;
4451        }
4452        value = value.checked_mul(8)?.checked_add((b - b'0') as u32)?;
4453    }
4454    Some(value)
4455}
4456
4457fn strip_prefix<'b>(line: &'b [u8], prefix: &[u8]) -> Option<&'b [u8]> {
4458    if line.starts_with(prefix) {
4459        Some(&line[prefix.len()..])
4460    } else {
4461        None
4462    }
4463}
4464
4465fn find_subslice(haystack: &[u8], needle: &[u8]) -> Option<usize> {
4466    if needle.is_empty() || needle.len() > haystack.len() {
4467        return None;
4468    }
4469    haystack
4470        .windows(needle.len())
4471        .position(|window| window == needle)
4472}
4473
4474fn trim_ascii_end(bytes: &[u8]) -> &[u8] {
4475    let mut end = bytes.len();
4476    while end > 0 && (bytes[end - 1] == b' ' || bytes[end - 1] == b'\r') {
4477        end -= 1;
4478    }
4479    &bytes[..end]
4480}
4481
4482fn lossy(bytes: &[u8]) -> String {
4483    String::from_utf8_lossy(bytes).into_owned()
4484}
4485
4486// ===========================================================================
4487// Library tree-merge seam (`merge_trees`).
4488//
4489// This is the single 3-way tree-merge engine that every merge porcelain calls.
4490// Before it existed the logic was duplicated across the CLI: `merge-tree
4491// --write-tree` had its own copy and `git merge` / `cherry-pick` / `revert`
4492// had a second copy. Both copies implemented the identical per-path diff3
4493// resolution; the only differences were *rendering* (write-tree emits a tree +
4494// stage list + messages; the porcelains stage an index + materialize a
4495// worktree). This seam computes the merge once and returns a per-path result
4496// rich enough for both renderings, so the resolution lives in exactly one
4497// place.
4498//
4499// The result is byte-identical to the old per-command copies on every cell
4500// they already handled (clean merges, content / add-add / modify-delete
4501// conflicts, mode merges). On top of that it adds rename-aware resolution: a
4502// file renamed on one side and modified on the other follows the rename,
4503// gated by [`MergeTreesOptions::detect_renames`] (the classic merge-ort
4504// non-recursive rename case).
4505// ===========================================================================
4506
4507/// Flattened tree: repository-relative path -> (mode, blob/symlink/gitlink oid).
4508pub type MergeEntryMap = BTreeMap<Vec<u8>, (u32, ObjectId)>;
4509
4510/// Whether to favour one side wholesale for textual conflicts (`-Xours` /
4511/// `-Xtheirs`), or to leave conflict markers in place.
4512#[derive(Clone, Copy, PartialEq, Eq, Debug)]
4513pub enum MergeFavor {
4514    /// Leave conflict markers in place (the default).
4515    None,
4516    /// On a textual conflict, take ours' content wholesale.
4517    Ours,
4518    /// On a textual conflict, take theirs' content wholesale.
4519    Theirs,
4520}
4521
4522/// Options controlling a [`merge_trees`] run.
4523pub struct MergeTreesOptions<'a> {
4524    /// Conflict-marker label for ours (e.g. a branch name or `HEAD`).
4525    pub ours_label: &'a str,
4526    /// Conflict-marker label for theirs.
4527    pub theirs_label: &'a str,
4528    /// Diff3 ancestor label (the `|||||||` side); merge porcelains use
4529    /// `"merged common ancestors"`.
4530    pub ancestor_label: &'a str,
4531    /// `-Xours` / `-Xtheirs` favouring for textual conflicts.
4532    pub favor: MergeFavor,
4533    /// Enable rename-aware merging: a file renamed on one side and modified on
4534    /// the other follows the rename. When `false`, the merge is purely
4535    /// path-keyed (the historical behaviour).
4536    pub detect_renames: bool,
4537    /// Minimum similarity (`0..=100`) for inexact rename detection.
4538    pub rename_threshold: u8,
4539    /// Directory-rename detection mode. When [`DirectoryRenames::False`], a file
4540    /// added on one side under a directory that the *other* side renamed stays
4541    /// put. When enabled, such files are re-homed into the renamed directory,
4542    /// matching `merge.directoryRenames`. Requires `detect_renames` to have any
4543    /// effect (directory renames are inferred from the file renames it finds).
4544    pub directory_renames: DirectoryRenames,
4545    /// Conflict-marker style for textual conflicts (`merge.conflictStyle`).
4546    pub style: ConflictStyle,
4547}
4548
4549/// How directory-rename detection behaves, mirroring git's
4550/// `merge.directoryRenames` configuration.
4551#[derive(Clone, Copy, PartialEq, Eq, Debug, Default)]
4552pub enum DirectoryRenames {
4553    /// Disable directory-rename detection (`merge.directoryRenames=false`).
4554    #[default]
4555    False,
4556    /// Apply directory renames silently (`merge.directoryRenames=true`).
4557    True,
4558    /// Detect directory renames but treat each re-homed path as a conflict
4559    /// requiring confirmation (`merge.directoryRenames=conflict`). git's default.
4560    Conflict,
4561}
4562
4563impl Default for MergeTreesOptions<'_> {
4564    fn default() -> Self {
4565        Self {
4566            ours_label: "ours",
4567            theirs_label: "theirs",
4568            ancestor_label: "merged common ancestors",
4569            favor: MergeFavor::None,
4570            detect_renames: false,
4571            rename_threshold: DEFAULT_RENAME_THRESHOLD,
4572            directory_renames: DirectoryRenames::False,
4573            style: ConflictStyle::Merge,
4574        }
4575    }
4576}
4577
4578/// The kind of conflict recorded for a path, used to render the stable
4579/// conflict-type token and human message.
4580#[derive(Debug, Clone, PartialEq, Eq)]
4581pub enum MergeConflictKind {
4582    /// Both sides changed the file content differently (or both added it with
4583    /// differing content — an add/add).
4584    Content { add_add: bool },
4585    /// The file was deleted on one side and modified on the other.
4586    ModifyDelete {
4587        /// The side label that deleted the path.
4588        deleted_in: String,
4589        /// The side label that modified (and thus kept) the path.
4590        modified_in: String,
4591    },
4592    /// A file renamed on one side, with a content conflict against the other
4593    /// side's change at the destination.
4594    RenameContent {
4595        /// The original (pre-rename) path.
4596        old_path: Vec<u8>,
4597    },
4598    /// A file renamed on one side whose source was deleted on the other side.
4599    RenameDelete {
4600        /// The pre-rename source path.
4601        old_path: Vec<u8>,
4602        /// The side label that performed the rename.
4603        renamed_in: String,
4604        /// The side label that deleted the source.
4605        deleted_in: String,
4606    },
4607}
4608
4609/// One resolved/conflicted path in the merged tree.
4610#[derive(Debug, Clone)]
4611pub struct MergedPath {
4612    /// Destination path in the merged tree.
4613    pub path: Vec<u8>,
4614    /// The per-stage (1=base, 2=ours, 3=theirs) entries when conflicted; all
4615    /// `None` for a clean resolution.
4616    pub stages: MergeStages,
4617    /// `Some((mode, oid))` is the final leaf written to the merged tree; `None`
4618    /// means the path is absent in the result (a clean delete).
4619    pub result: Option<(u32, ObjectId)>,
4620    /// When conflicted, the worktree bytes + mode to materialize (content with
4621    /// conflict markers, or the surviving side's bytes). `None` for a clean
4622    /// path.
4623    pub worktree: Option<(u32, Vec<u8>)>,
4624    /// `Some(..)` exactly when this path conflicted.
4625    pub conflict: Option<MergeConflictKind>,
4626    /// True when this path went through a textual 3-way content merge (both
4627    /// sides diverged and both were mergeable files). Drives the "Auto-merging
4628    /// <path>" informational message, which `git merge-tree` emits for every
4629    /// such path — clean or conflicted.
4630    pub auto_merged: bool,
4631}
4632
4633impl MergedPath {
4634    /// True when this path resolved cleanly (no conflict recorded).
4635    pub fn is_clean(&self) -> bool {
4636        self.conflict.is_none()
4637    }
4638}
4639
4640/// Per-stage higher-order index entries for a conflicted path.
4641#[derive(Debug, Clone, Default)]
4642pub struct MergeStages {
4643    pub base: Option<(u32, ObjectId)>,
4644    pub ours: Option<(u32, ObjectId)>,
4645    pub theirs: Option<(u32, ObjectId)>,
4646}
4647
4648/// The outcome of a 3-way tree merge: the merged top-level tree plus per-path
4649/// detail and a clean/conflicted flag.
4650#[derive(Debug, Clone)]
4651pub struct MergeTreesResult {
4652    /// Object id of the merged top-level tree (always written, even on
4653    /// conflict — conflicted blobs go in with their marker content).
4654    pub tree: ObjectId,
4655    /// Per-path results, sorted by path.
4656    pub paths: Vec<MergedPath>,
4657    /// False if any path conflicted.
4658    pub clean: bool,
4659}
4660
4661impl MergeTreesResult {
4662    /// Iterate over the paths that conflicted, in path order.
4663    pub fn conflicts(&self) -> impl Iterator<Item = &MergedPath> {
4664        self.paths.iter().filter(|entry| entry.conflict.is_some())
4665    }
4666}
4667
4668/// Read a tree object (by oid) into a flattened path -> (mode, oid) map,
4669/// descending into subtrees. The canonical empty tree yields an empty map.
4670pub fn flatten_tree(
4671    reader: &impl ObjectReader,
4672    format: ObjectFormat,
4673    tree_oid: &ObjectId,
4674) -> Result<MergeEntryMap> {
4675    let mut entries = BTreeMap::new();
4676    if *tree_oid == empty_tree_oid(format)? {
4677        return Ok(entries);
4678    }
4679    collect_flat_tree(reader, format, tree_oid, Vec::new(), &mut entries)?;
4680    Ok(entries)
4681}
4682
4683fn collect_flat_tree(
4684    reader: &impl ObjectReader,
4685    format: ObjectFormat,
4686    tree_oid: &ObjectId,
4687    prefix: Vec<u8>,
4688    entries: &mut MergeEntryMap,
4689) -> Result<()> {
4690    let object = reader.read_object(tree_oid)?;
4691    if object.object_type != ObjectType::Tree {
4692        return Err(GitError::InvalidObject(format!(
4693            "expected tree {}, found {}",
4694            tree_oid,
4695            object.object_type.as_str()
4696        )));
4697    }
4698    for entry in TreeEntries::new(format, &object.body) {
4699        let entry = entry?;
4700        let mut path = prefix.clone();
4701        if !path.is_empty() {
4702            path.push(b'/');
4703        }
4704        path.extend_from_slice(entry.name);
4705        if entry.mode == 0o040000 {
4706            collect_flat_tree(reader, format, &entry.oid, path, entries)?;
4707        } else {
4708            entries.insert(path, (entry.mode, entry.oid));
4709        }
4710    }
4711    Ok(())
4712}
4713
4714/// True for a plain file blob (regular or executable) — i.e. a mode whose
4715/// content can be textually 3-way merged. Symlinks and gitlinks are excluded.
4716pub fn is_mergeable_file_mode(mode: u32) -> bool {
4717    mode == 0o100644 || mode == 0o100755
4718}
4719
4720/// 3-way merge of three trees into a single merged tree.
4721///
4722/// `base` is the common-ancestor tree (`None` for unrelated histories — every
4723/// path is then treated as added on both sides). `ours`/`theirs` are the two
4724/// sides. Cleanly-merged blob content and the resulting (sub)trees are written
4725/// to `db`; the returned [`MergeTreesResult`] carries the merged top-level tree
4726/// oid plus per-path detail.
4727///
4728/// This is the shared engine behind `git merge-tree --write-tree`, `git merge`,
4729/// `git cherry-pick`, and `git revert`. It is behaviour-preserving relative to
4730/// the per-command copies it replaced, and additionally resolves renames when
4731/// [`MergeTreesOptions::detect_renames`] is set.
4732pub fn merge_trees(
4733    db: &FileObjectDatabase,
4734    format: ObjectFormat,
4735    base: Option<&ObjectId>,
4736    ours: &ObjectId,
4737    theirs: &ObjectId,
4738    options: &MergeTreesOptions<'_>,
4739) -> Result<MergeTreesResult> {
4740    let base_map = match base {
4741        Some(tree) => flatten_tree(db, format, tree)?,
4742        None => MergeEntryMap::new(),
4743    };
4744    let ours_map = flatten_tree(db, format, ours)?;
4745    let theirs_map = flatten_tree(db, format, theirs)?;
4746    merge_entry_maps(db, format, &base_map, &ours_map, &theirs_map, options)
4747}
4748
4749/// [`merge_trees`] operating on already-flattened entry maps. The merge
4750/// porcelains often hold the flattened maps already (e.g. cherry-pick builds
4751/// `theirs` from a picked commit's tree), so this avoids re-reading them.
4752pub fn merge_entry_maps(
4753    db: &FileObjectDatabase,
4754    format: ObjectFormat,
4755    base_map: &MergeEntryMap,
4756    ours_map: &MergeEntryMap,
4757    theirs_map: &MergeEntryMap,
4758    options: &MergeTreesOptions<'_>,
4759) -> Result<MergeTreesResult> {
4760    // Rename-aware step: detect files renamed on exactly one side relative to
4761    // base, so a modification on the other side follows the rename. This is the
4762    // non-recursive merge-ort rename case. We compute a rewrite map that, for a
4763    // one-sided rename old->new, presents the *other* side's `old` content at
4764    // `new` (and drops `old`), letting the path-keyed core below do the 3-way
4765    // content merge at the destination.
4766    let (renames, side_renames) = if options.detect_renames {
4767        let (renames, ours_side, theirs_side) =
4768            detect_merge_renames(db, format, base_map, ours_map, theirs_map, options)?;
4769        (renames, Some((ours_side, theirs_side)))
4770    } else {
4771        (MergeRenames::default(), None)
4772    };
4773
4774    // Build the effective per-side maps with file renames applied.
4775    let (eff_base, mut eff_ours, mut eff_theirs) =
4776        apply_merge_renames(base_map, ours_map, theirs_map, &renames);
4777
4778    // Directory-rename detection: when one side renamed a whole directory and
4779    // the other added files under the old directory, re-home those additions
4780    // into the renamed directory (the merge.directoryRenames behaviour). This
4781    // runs on top of the file-rename rewrite, using the *original* side maps to
4782    // decide which paths were freshly added.
4783    if options.directory_renames != DirectoryRenames::False
4784        && let Some((ours_side, theirs_side)) = &side_renames
4785    {
4786        let dir_renames =
4787            compute_directory_renames(base_map, ours_map, theirs_map, ours_side, theirs_side);
4788        let (new_ours, new_theirs, _rehomed) =
4789            apply_directory_renames(&eff_base, &eff_ours, &eff_theirs, &dir_renames);
4790        eff_ours = new_ours;
4791        eff_theirs = new_theirs;
4792    }
4793
4794    let mut all_paths = BTreeSet::new();
4795    all_paths.extend(eff_base.keys().cloned());
4796    all_paths.extend(eff_ours.keys().cloned());
4797    all_paths.extend(eff_theirs.keys().cloned());
4798
4799    let mut paths: Vec<MergedPath> = Vec::new();
4800    let mut leaves: MergeEntryMap = BTreeMap::new();
4801    let mut clean = true;
4802
4803    for path in all_paths {
4804        let base = eff_base.get(&path).cloned();
4805        let ours = eff_ours.get(&path).cloned();
4806        let theirs = eff_theirs.get(&path).cloned();
4807        let rename = renames.dest_to_source.get(&path);
4808        let old_path = rename.map(|r| r.source.clone());
4809
4810        // Trivial resolutions (identical to the historical per-command logic).
4811        if ours == theirs {
4812            if let Some(entry) = ours {
4813                leaves.insert(path.clone(), entry);
4814            }
4815            paths.push(clean_path(path, ours));
4816            continue;
4817        }
4818        if ours == base {
4819            if let Some(entry) = &theirs {
4820                leaves.insert(path.clone(), *entry);
4821            }
4822            paths.push(clean_path(path, theirs));
4823            continue;
4824        }
4825        if theirs == base {
4826            if let Some(entry) = &ours {
4827                leaves.insert(path.clone(), *entry);
4828            }
4829            paths.push(clean_path(path, ours));
4830            continue;
4831        }
4832
4833        // Both sides diverged. Decide how to combine.
4834        let content_mergeable = matches!(&ours, Some((mode, _)) if is_mergeable_file_mode(*mode))
4835            && matches!(&theirs, Some((mode, _)) if is_mergeable_file_mode(*mode))
4836            && match &base {
4837                Some((mode, _)) => is_mergeable_file_mode(*mode),
4838                None => true,
4839            };
4840
4841        if let (true, Some((ours_mode, ours_oid)), Some((theirs_mode, theirs_oid))) =
4842            (content_mergeable, &ours, &theirs)
4843        {
4844            let add_add = base.is_none();
4845            let base_bytes = match &base {
4846                Some((_, oid)) => merge_blob_bytes(db, oid)?,
4847                None => Vec::new(),
4848            };
4849            let ours_bytes = merge_blob_bytes(db, ours_oid)?;
4850            let theirs_bytes = merge_blob_bytes(db, theirs_oid)?;
4851            // When this destination came from a one-sided rename, git qualifies
4852            // the conflict-marker labels with the per-side path (the renaming
4853            // side shows the new path, the other side the old path), e.g.
4854            // `<<<<<<< HEAD:old.txt` / `>>>>>>> feature:new.txt`.
4855            let (ours_label, theirs_label) = match rename {
4856                Some(MergeRename { source, side }) => {
4857                    let (ours_path, theirs_path) = match side {
4858                        // theirs renamed -> ours kept the source path.
4859                        RenameSide::Theirs => (source.as_slice(), path.as_slice()),
4860                        // ours renamed -> theirs kept the source path.
4861                        RenameSide::Ours => (path.as_slice(), source.as_slice()),
4862                    };
4863                    (
4864                        qualify_label(options.ours_label, ours_path),
4865                        qualify_label(options.theirs_label, theirs_path),
4866                    )
4867                }
4868                None => (
4869                    options.ours_label.to_string(),
4870                    options.theirs_label.to_string(),
4871                ),
4872            };
4873            let result = merge_blobs(
4874                &base_bytes,
4875                &ours_bytes,
4876                &theirs_bytes,
4877                &MergeBlobOptions {
4878                    ours_label: &ours_label,
4879                    theirs_label: &theirs_label,
4880                    base_label: options.ancestor_label,
4881                    style: options.style,
4882                },
4883            );
4884
4885            let base_mode = base.as_ref().map(|(mode, _)| *mode);
4886            let (resolved_mode, mode_conflict) =
4887                merge_file_modes(base_mode, *ours_mode, *theirs_mode);
4888
4889            if !result.conflicted && !mode_conflict {
4890                let oid = db.write_object(EncodedObject::new(ObjectType::Blob, result.content))?;
4891                leaves.insert(path.clone(), (resolved_mode, oid));
4892                paths.push(clean_path_auto(path, Some((resolved_mode, oid)), true));
4893            } else if options.favor != MergeFavor::None && !mode_conflict {
4894                let chosen = if options.favor == MergeFavor::Ours {
4895                    ours
4896                } else {
4897                    theirs
4898                };
4899                if let Some(entry) = chosen {
4900                    leaves.insert(path.clone(), entry);
4901                }
4902                paths.push(clean_path_auto(path, chosen, true));
4903            } else {
4904                clean = false;
4905                let oid =
4906                    db.write_object(EncodedObject::new(ObjectType::Blob, result.content.clone()))?;
4907                leaves.insert(path.clone(), (resolved_mode, oid));
4908                let worktree_mode = if *ours_mode == *theirs_mode {
4909                    *ours_mode
4910                } else {
4911                    0o100644
4912                };
4913                let conflict = match &old_path {
4914                    Some(old) => MergeConflictKind::RenameContent {
4915                        old_path: old.clone(),
4916                    },
4917                    None => MergeConflictKind::Content { add_add },
4918                };
4919                paths.push(MergedPath {
4920                    path: path.clone(),
4921                    stages: stages_for(&base, &ours, &theirs),
4922                    result: Some((resolved_mode, oid)),
4923                    worktree: Some((worktree_mode, result.content)),
4924                    conflict: Some(conflict),
4925                    auto_merged: true,
4926                });
4927            }
4928        } else if base.is_some() && (ours.is_none() || theirs.is_none()) {
4929            // modify/delete.
4930            clean = false;
4931            let (deleted_in, modified_in, surviving) = if ours.is_none() {
4932                (
4933                    options.ours_label.to_string(),
4934                    options.theirs_label.to_string(),
4935                    theirs,
4936                )
4937            } else {
4938                (
4939                    options.theirs_label.to_string(),
4940                    options.ours_label.to_string(),
4941                    ours,
4942                )
4943            };
4944            let worktree = match &surviving {
4945                Some((mode, oid)) => Some((*mode, merge_blob_bytes(db, oid)?)),
4946                None => None,
4947            };
4948            if let Some(entry) = surviving {
4949                leaves.insert(path.clone(), entry);
4950            }
4951            paths.push(MergedPath {
4952                path: path.clone(),
4953                stages: stages_for(&base, &ours, &theirs),
4954                result: surviving,
4955                worktree,
4956                conflict: Some(MergeConflictKind::ModifyDelete {
4957                    deleted_in,
4958                    modified_in,
4959                }),
4960                auto_merged: false,
4961            });
4962        } else {
4963            // add/add of non-files, type changes, mode changes, etc. Keep the
4964            // surviving side's content and record a generic content conflict.
4965            clean = false;
4966            let add_add = base.is_none();
4967            let surviving = ours.or(theirs);
4968            let worktree = match &surviving {
4969                Some((mode, oid)) => Some((*mode, merge_blob_bytes(db, oid)?)),
4970                None => None,
4971            };
4972            if let Some(entry) = surviving {
4973                leaves.insert(path.clone(), entry);
4974            }
4975            paths.push(MergedPath {
4976                path: path.clone(),
4977                stages: stages_for(&base, &ours, &theirs),
4978                result: surviving,
4979                worktree,
4980                conflict: Some(MergeConflictKind::Content { add_add }),
4981                auto_merged: false,
4982            });
4983        }
4984    }
4985
4986    // Rename/delete conflicts: a file renamed on one side whose source the other
4987    // side deleted. The merge core resolved the destination cleanly (only the
4988    // renaming side has it), but git flags this as a conflict — keep the renamed
4989    // content in the tree, record higher-order stages, and mark the merge dirty.
4990    if !renames.rename_deletes.is_empty() {
4991        for (dest, rd) in &renames.rename_deletes {
4992            // Skip if another conflict already claimed this destination.
4993            let Some(slot) = paths.iter_mut().find(|p| &p.path == dest) else {
4994                continue;
4995            };
4996            if slot.conflict.is_some() {
4997                continue;
4998            }
4999            let base_entry = base_map.get(&rd.source).copied();
5000            let renamed_entry = slot.result;
5001            // The renamed content sits on the renaming side; the deleting side
5002            // contributes no stage at the destination.
5003            let (ours_stage, theirs_stage) = match rd.side {
5004                RenameSide::Ours => (renamed_entry, None),
5005                RenameSide::Theirs => (None, renamed_entry),
5006            };
5007            let (renamed_in, deleted_in) = match rd.side {
5008                RenameSide::Ours => (
5009                    options.ours_label.to_string(),
5010                    options.theirs_label.to_string(),
5011                ),
5012                RenameSide::Theirs => (
5013                    options.theirs_label.to_string(),
5014                    options.ours_label.to_string(),
5015                ),
5016            };
5017            let worktree = match &renamed_entry {
5018                Some((mode, oid)) => Some((*mode, merge_blob_bytes(db, oid)?)),
5019                None => None,
5020            };
5021            slot.stages = MergeStages {
5022                base: base_entry,
5023                ours: ours_stage,
5024                theirs: theirs_stage,
5025            };
5026            slot.worktree = worktree;
5027            slot.conflict = Some(MergeConflictKind::RenameDelete {
5028                old_path: rd.source.clone(),
5029                renamed_in,
5030                deleted_in,
5031            });
5032            clean = false;
5033        }
5034    }
5035
5036    let tree = write_merged_tree(db, &leaves)?;
5037
5038    Ok(MergeTreesResult { tree, paths, clean })
5039}
5040
5041/// Construct a clean (non-conflicted) [`MergedPath`].
5042fn clean_path(path: Vec<u8>, result: Option<(u32, ObjectId)>) -> MergedPath {
5043    clean_path_auto(path, result, false)
5044}
5045
5046/// Like [`clean_path`] but records whether the path went through a textual
5047/// 3-way content merge (for the "Auto-merging" message).
5048fn clean_path_auto(
5049    path: Vec<u8>,
5050    result: Option<(u32, ObjectId)>,
5051    auto_merged: bool,
5052) -> MergedPath {
5053    MergedPath {
5054        path,
5055        stages: MergeStages::default(),
5056        result,
5057        worktree: None,
5058        conflict: None,
5059        auto_merged,
5060    }
5061}
5062
5063/// Snapshot the present stages for a conflicted path.
5064fn stages_for(
5065    base: &Option<(u32, ObjectId)>,
5066    ours: &Option<(u32, ObjectId)>,
5067    theirs: &Option<(u32, ObjectId)>,
5068) -> MergeStages {
5069    MergeStages {
5070        base: *base,
5071        ours: *ours,
5072        theirs: *theirs,
5073    }
5074}
5075
5076/// Read a blob's raw bytes, requiring it to be a blob object.
5077fn merge_blob_bytes(reader: &impl ObjectReader, oid: &ObjectId) -> Result<Vec<u8>> {
5078    let object = reader.read_object(oid)?;
5079    if object.object_type != ObjectType::Blob {
5080        return Err(GitError::InvalidObject(format!(
5081            "expected blob {}, found {}",
5082            oid,
5083            object.object_type.as_str()
5084        )));
5085    }
5086    Ok(object.body.clone())
5087}
5088
5089/// 3-way merge of a file mode. Returns the resolved mode and whether the modes
5090/// conflict (both sides changed it to different non-base values).
5091fn merge_file_modes(base: Option<u32>, ours: u32, theirs: u32) -> (u32, bool) {
5092    if ours == theirs {
5093        return (ours, false);
5094    }
5095    match base {
5096        Some(base) if ours == base => (theirs, false),
5097        Some(base) if theirs == base => (ours, false),
5098        _ => (ours, true),
5099    }
5100}
5101
5102/// Build a top-level tree object from a flat map of `path -> (mode, oid)`
5103/// leaves, writing every (sub)tree object to `db`.
5104fn write_merged_tree(db: &FileObjectDatabase, leaves: &MergeEntryMap) -> Result<ObjectId> {
5105    let mut root = MergeTreeNode::default();
5106    for (path, (mode, oid)) in leaves {
5107        root.insert(path, *mode, *oid);
5108    }
5109    root.write(db)
5110}
5111
5112#[derive(Default)]
5113struct MergeTreeNode {
5114    blobs: BTreeMap<Vec<u8>, (u32, ObjectId)>,
5115    subtrees: BTreeMap<Vec<u8>, MergeTreeNode>,
5116}
5117
5118impl MergeTreeNode {
5119    fn insert(&mut self, path: &[u8], mode: u32, oid: ObjectId) {
5120        match path.iter().position(|byte| *byte == b'/') {
5121            Some(slash) => {
5122                let component = path[..slash].to_vec();
5123                let rest = &path[slash + 1..];
5124                self.subtrees
5125                    .entry(component)
5126                    .or_default()
5127                    .insert(rest, mode, oid);
5128            }
5129            None => {
5130                self.blobs.insert(path.to_vec(), (mode, oid));
5131            }
5132        }
5133    }
5134
5135    fn write(&self, db: &FileObjectDatabase) -> Result<ObjectId> {
5136        let mut entries: Vec<TreeEntry> = Vec::new();
5137        for (name, (mode, oid)) in &self.blobs {
5138            entries.push(TreeEntry {
5139                mode: *mode,
5140                name: BString::from(name.clone()),
5141                oid: *oid,
5142            });
5143        }
5144        for (name, subtree) in &self.subtrees {
5145            let oid = subtree.write(db)?;
5146            entries.push(TreeEntry {
5147                mode: 0o040000,
5148                name: BString::from(name.clone()),
5149                oid,
5150            });
5151        }
5152        entries.sort_by_key(merge_tree_sort_key);
5153        let tree = Tree { entries };
5154        db.write_object(EncodedObject::new(ObjectType::Tree, tree.write()))
5155    }
5156}
5157
5158fn merge_tree_sort_key(entry: &TreeEntry) -> Vec<u8> {
5159    let mut key = entry.name.as_bytes().to_vec();
5160    if entry.mode == 0o040000 {
5161        key.push(b'/');
5162    }
5163    key
5164}
5165
5166// --- Rename-aware non-recursive merge -------------------------------------
5167
5168/// Which side of the merge performed a rename.
5169#[derive(Clone, Copy, PartialEq, Eq)]
5170enum RenameSide {
5171    Ours,
5172    Theirs,
5173}
5174
5175/// One detected one-sided rename: its source path and which side renamed it.
5176#[derive(Clone)]
5177struct MergeRename {
5178    source: Vec<u8>,
5179    side: RenameSide,
5180}
5181
5182/// A file renamed on one side whose source was *deleted* on the other side — a
5183/// rename/delete conflict. git keeps the renamed content at the destination but
5184/// flags the merge as conflicted.
5185#[derive(Clone)]
5186struct RenameDelete {
5187    /// The pre-rename source path (deleted on the other side).
5188    source: Vec<u8>,
5189    /// Which side performed the rename (the other side deleted the source).
5190    side: RenameSide,
5191}
5192
5193/// The rename pairings discovered for one merge: which destination paths came
5194/// from which source path, and which side renamed (so the other side's change
5195/// can follow the rename and conflict labels can be path-qualified like git).
5196#[derive(Default)]
5197struct MergeRenames {
5198    /// One-sided renames keyed by *destination* path. Only renames where the
5199    /// OTHER side kept/modified the source in place are recorded (the case
5200    /// where the modification must follow the rename).
5201    dest_to_source: BTreeMap<Vec<u8>, MergeRename>,
5202    /// Rename/delete conflicts: a file renamed on one side whose source the
5203    /// other side deleted. Keyed by destination path.
5204    rename_deletes: BTreeMap<Vec<u8>, RenameDelete>,
5205}
5206
5207/// Every file rename observed on one side (base->side), as `(old, new)` pairs.
5208/// Unlike [`MergeRenames`] this is the *complete* rename set on a side — it is
5209/// the input to directory-rename inference, which needs to see all the per-file
5210/// moves between directories, not just the ones the other side kept in place.
5211struct SideRenames {
5212    pairs: Vec<(Vec<u8>, Vec<u8>)>,
5213}
5214
5215/// Detect one-sided renames usable for a non-recursive merge: a path present in
5216/// `base`, deleted on one side and present (renamed) at a new path on that same
5217/// side, while the OTHER side still has the original path (modified or
5218/// unchanged). Such a rename lets the other side's change move to the
5219/// destination.
5220///
5221/// Also returns the complete per-side rename set so the caller can infer
5222/// directory renames (which need every file move, not just the merge-relevant
5223/// ones).
5224fn detect_merge_renames(
5225    db: &FileObjectDatabase,
5226    format: ObjectFormat,
5227    base_map: &MergeEntryMap,
5228    ours_map: &MergeEntryMap,
5229    theirs_map: &MergeEntryMap,
5230    options: &MergeTreesOptions<'_>,
5231) -> Result<(MergeRenames, SideRenames, SideRenames)> {
5232    let mut renames = MergeRenames::default();
5233
5234    // Renames on ours: the other side that must carry its change is theirs.
5235    let ours_side = collect_side_renames(
5236        db,
5237        format,
5238        base_map,
5239        ours_map,
5240        theirs_map,
5241        RenameSide::Ours,
5242        options.rename_threshold,
5243        &mut renames,
5244    )?;
5245    // Renames on theirs: the other side that carries its change is ours.
5246    let theirs_side = collect_side_renames(
5247        db,
5248        format,
5249        base_map,
5250        theirs_map,
5251        ours_map,
5252        RenameSide::Theirs,
5253        options.rename_threshold,
5254        &mut renames,
5255    )?;
5256
5257    Ok((renames, ours_side, theirs_side))
5258}
5259
5260/// Collect renames that occurred on `side` (relative to `base`). Records the
5261/// merge-relevant subset (renames the `other` side still references) into
5262/// `renames`, and returns the *complete* per-side rename set for directory-rename
5263/// inference. `db`/`format` resolve blob bytes for similarity scoring.
5264#[allow(clippy::too_many_arguments)]
5265fn collect_side_renames(
5266    db: &FileObjectDatabase,
5267    format: ObjectFormat,
5268    base_map: &MergeEntryMap,
5269    side_map: &MergeEntryMap,
5270    other_map: &MergeEntryMap,
5271    side: RenameSide,
5272    threshold: u8,
5273    renames: &mut MergeRenames,
5274) -> Result<SideRenames> {
5275    // Diff base->side with inexact rename detection; the resulting `Renamed`
5276    // entries name (old_path -> new_path) pairs on this side.
5277    let base_tree = entry_map_as_tracked(base_map);
5278    let side_tree = entry_map_as_tracked(side_map);
5279    let options = RenameDetectionOptions {
5280        base: DiffNameStatusOptions {
5281            detect_renames: true,
5282            detect_copies: false,
5283            find_copies_harder: false,
5284            rename_empty: false,
5285        },
5286        detect_inexact: true,
5287        rename_threshold: threshold,
5288        copy_threshold: threshold,
5289    };
5290    let changes = diff_name_status_maps_with_renames(
5291        &base_tree,
5292        &side_tree,
5293        base_tree.keys().chain(side_tree.keys()),
5294        options,
5295        |oid| merge_blob_bytes(db, oid).ok(),
5296    )?;
5297
5298    let mut pairs = Vec::new();
5299    for change in changes {
5300        let NameStatus::Renamed(_) = change.status else {
5301            continue;
5302        };
5303        let Some(old_path) = change.old_path.as_ref() else {
5304            continue;
5305        };
5306        let old = old_path.as_bytes().to_vec();
5307        let new = change.path.as_bytes().to_vec();
5308        // Complete rename set, fed to directory-rename inference.
5309        pairs.push((old.clone(), new.clone()));
5310
5311        // Only act when the destination is genuinely new (not already present
5312        // in either side from a different origin) and the OTHER side still
5313        // references the source path — i.e. the other side modified/kept `old`,
5314        // and its change should follow the rename to `new`.
5315        if !other_map.contains_key(&old) {
5316            // The source path is gone on the other side. If it existed in base
5317            // (so the other side *deleted* it) and the other side did not also
5318            // produce `new`, this is a rename/delete conflict: this side renamed
5319            // the file, the other side deleted its source.
5320            if base_map.contains_key(&old) && !other_map.contains_key(&new) {
5321                renames
5322                    .rename_deletes
5323                    .entry(new.clone())
5324                    .or_insert(RenameDelete {
5325                        source: old.clone(),
5326                        side,
5327                    });
5328            }
5329            continue;
5330        }
5331        // If the other side ALSO renamed/created `new`, that is a rename/rename
5332        // or rename/add corner case we leave to the path-keyed core (stage-b).
5333        if other_map.contains_key(&new) {
5334            continue;
5335        }
5336        // Skip if both sides renamed the same source to the same dest (already
5337        // recorded) or to anything (first writer wins; the path-keyed core then
5338        // sees identical dest entries and resolves trivially).
5339        renames
5340            .dest_to_source
5341            .entry(new)
5342            .or_insert(MergeRename { source: old, side });
5343    }
5344
5345    let _ = format;
5346    Ok(SideRenames { pairs })
5347}
5348
5349/// Rewrite the three side maps so that each detected one-sided rename old->new
5350/// presents the OTHER side's `old` entry at `new`, and removes `old` from
5351/// every side. The path-keyed merge core then performs the 3-way content merge
5352/// at `new` with base=base[old], one side = the renaming side's new content,
5353/// the other side = the modifying side's old content.
5354fn apply_merge_renames(
5355    base_map: &MergeEntryMap,
5356    ours_map: &MergeEntryMap,
5357    theirs_map: &MergeEntryMap,
5358    renames: &MergeRenames,
5359) -> (MergeEntryMap, MergeEntryMap, MergeEntryMap) {
5360    if renames.dest_to_source.is_empty() {
5361        return (base_map.clone(), ours_map.clone(), theirs_map.clone());
5362    }
5363    let mut base = base_map.clone();
5364    let mut ours = ours_map.clone();
5365    let mut theirs = theirs_map.clone();
5366
5367    for (new, rename) in &renames.dest_to_source {
5368        let old = &rename.source;
5369        // Move base[old] to base[new] so the destination has a proper ancestor.
5370        if let Some(entry) = base.remove(old) {
5371            base.entry(new.clone()).or_insert(entry);
5372        }
5373        // For each side, if it still has `old`, move that entry to `new`.
5374        for side in [&mut ours, &mut theirs] {
5375            if let Some(entry) = side.remove(old) {
5376                side.entry(new.clone()).or_insert(entry);
5377            }
5378        }
5379    }
5380    (base, ours, theirs)
5381}
5382
5383// --- Directory-rename detection -------------------------------------------
5384
5385/// The parent directory of `path`, or `None` for a top-level path.
5386fn parent_dir(path: &[u8]) -> Option<&[u8]> {
5387    path.iter().rposition(|b| *b == b'/').map(|i| &path[..i])
5388}
5389
5390/// Apply a directory rename `old_dir -> new_dir` to `path` (which must live
5391/// under `old_dir`). E.g. `old_dir=z`, `new_dir=y`, `path=z/d` -> `y/d`; an
5392/// empty `new_dir` (rename into the repo root) drops the directory prefix.
5393fn apply_dir_rename(old_dir: &[u8], new_dir: &[u8], path: &[u8]) -> Vec<u8> {
5394    // The portion of `path` after `old_dir/` (handle root-target by stepping
5395    // past the separator, exactly as git's apply_dir_rename does).
5396    let rest_start = if new_dir.is_empty() {
5397        old_dir.len() + 1
5398    } else {
5399        old_dir.len()
5400    };
5401    let mut out = new_dir.to_vec();
5402    out.extend_from_slice(&path[rest_start..]);
5403    out
5404}
5405
5406/// Find the longest renamed ancestor directory of `path`: walk parent dirs from
5407/// the deepest up and return the first one present in `dir_renames`.
5408fn check_dir_renamed<'a>(
5409    path: &[u8],
5410    dir_renames: &'a BTreeMap<Vec<u8>, Vec<u8>>,
5411) -> Option<(&'a [u8], &'a [u8])> {
5412    let mut cur = parent_dir(path);
5413    while let Some(dir) = cur {
5414        if let Some((old_dir, new_dir)) = dir_renames.get_key_value(dir) {
5415            return Some((old_dir.as_slice(), new_dir.as_slice()));
5416        }
5417        cur = parent_dir(dir);
5418    }
5419    None
5420}
5421
5422/// A computed directory rename `old_dir -> new_dir` on one side of the merge.
5423struct DirectoryRenameMaps {
5424    /// Renames detected on ours' side (so files theirs added under `old_dir`
5425    /// re-home into `new_dir`).
5426    ours: BTreeMap<Vec<u8>, Vec<u8>>,
5427    /// Renames detected on theirs' side.
5428    theirs: BTreeMap<Vec<u8>, Vec<u8>>,
5429    /// Source directories whose split was unclear (no unique majority target);
5430    /// re-homing a file under one of these is a conflict, not silent.
5431    split_dirs: BTreeSet<Vec<u8>>,
5432}
5433
5434/// Infer directory renames from the complete per-side file-rename sets, mirroring
5435/// merge-ort's `get_provisional_directory_renames`: for every file moved
5436/// `old_dir/x -> new_dir/x`, tally `count[old_dir][new_dir]`, then collapse to
5437/// `old_dir -> best_new_dir` where `best` is the unique highest count. A tie
5438/// (no unique majority) marks the source directory as a "split" and is not
5439/// applied silently. A directory rename is only kept if the source directory was
5440/// *entirely removed* on that side (git's `dirs_removed` gate): if any file under
5441/// `old_dir` survives on the renaming side, the directory was not really renamed.
5442fn compute_directory_renames(
5443    base_map: &MergeEntryMap,
5444    ours_map: &MergeEntryMap,
5445    theirs_map: &MergeEntryMap,
5446    ours_side: &SideRenames,
5447    theirs_side: &SideRenames,
5448) -> DirectoryRenameMaps {
5449    let ours = compute_side_dir_renames(&ours_side.pairs, base_map, ours_map);
5450    let theirs = compute_side_dir_renames(&theirs_side.pairs, base_map, theirs_map);
5451
5452    // Collect split dirs from both sides.
5453    let mut split_dirs = BTreeSet::new();
5454    split_dirs.extend(ours.split.iter().cloned());
5455    split_dirs.extend(theirs.split.iter().cloned());
5456
5457    // A directory renamed on BOTH sides (to whatever target) is ambiguous;
5458    // git's handle_directory_level_conflicts drops it from both maps so neither
5459    // side's directory rename is applied.
5460    let mut ours_map_out = ours.renames;
5461    let mut theirs_map_out = theirs.renames;
5462    let dup: Vec<Vec<u8>> = ours_map_out
5463        .keys()
5464        .filter(|k| theirs_map_out.contains_key(*k))
5465        .cloned()
5466        .collect();
5467    for k in dup {
5468        ours_map_out.remove(&k);
5469        theirs_map_out.remove(&k);
5470    }
5471
5472    DirectoryRenameMaps {
5473        ours: ours_map_out,
5474        theirs: theirs_map_out,
5475        split_dirs,
5476    }
5477}
5478
5479/// Per-side directory-rename computation result.
5480struct SideDirRenames {
5481    renames: BTreeMap<Vec<u8>, Vec<u8>>,
5482    split: BTreeSet<Vec<u8>>,
5483}
5484
5485/// Compute one side's `old_dir -> new_dir` map from its file renames, gated on
5486/// the source directory being fully removed on that side.
5487fn compute_side_dir_renames(
5488    pairs: &[(Vec<u8>, Vec<u8>)],
5489    base_map: &MergeEntryMap,
5490    side_map: &MergeEntryMap,
5491) -> SideDirRenames {
5492    // count[old_dir][new_dir] = number of files moved from old_dir to new_dir.
5493    let mut counts: BTreeMap<Vec<u8>, BTreeMap<Vec<u8>, usize>> = BTreeMap::new();
5494    for (old, new) in pairs {
5495        // Only count moves that actually cross directories (the per-file basename
5496        // is preserved by the move; merge-ort tallies the *immediate* parent dir
5497        // move). We consider every ancestor pairing so nested renames register on
5498        // the directory that contains the file.
5499        let old_dir = parent_dir(old);
5500        let new_dir = parent_dir(new);
5501        if old_dir == new_dir {
5502            continue;
5503        }
5504        let od = old_dir.map(<[u8]>::to_vec).unwrap_or_default();
5505        let nd = new_dir.map(<[u8]>::to_vec).unwrap_or_default();
5506        *counts.entry(od).or_default().entry(nd).or_default() += 1;
5507    }
5508
5509    let mut renames = BTreeMap::new();
5510    let mut split = BTreeSet::new();
5511    for (old_dir, targets) in counts {
5512        let mut max = 0usize;
5513        let mut bad_max = 0usize;
5514        let mut best: Option<Vec<u8>> = None;
5515        for (target, count) in &targets {
5516            if *count == max {
5517                bad_max = max;
5518            } else if *count > max {
5519                max = *count;
5520                best = Some(target.clone());
5521            }
5522        }
5523        if max == 0 {
5524            continue;
5525        }
5526        if bad_max == max {
5527            split.insert(old_dir);
5528            continue;
5529        }
5530        // dirs_removed gate: the source directory must be entirely gone on this
5531        // side. If any base path under old_dir/ still exists on the side, the
5532        // directory was not renamed wholesale and we must not re-home into it.
5533        if let Some(best) = best
5534            && directory_fully_removed(&old_dir, base_map, side_map)
5535        {
5536            renames.insert(old_dir, best);
5537        }
5538    }
5539
5540    SideDirRenames { renames, split }
5541}
5542
5543/// True when every base path under `dir/` is absent on `side` (the directory was
5544/// entirely removed there). Mirrors merge-ort's `dirs_removed` precondition.
5545fn directory_fully_removed(dir: &[u8], base_map: &MergeEntryMap, side_map: &MergeEntryMap) -> bool {
5546    let mut prefix = dir.to_vec();
5547    prefix.push(b'/');
5548    for path in base_map.keys() {
5549        if path.starts_with(&prefix) && side_map.contains_key(path) {
5550            return false;
5551        }
5552    }
5553    true
5554}
5555
5556/// Re-home files added on one side under a directory the OTHER side renamed.
5557///
5558/// For each path that is newly added on a side (present there, absent in base)
5559/// and absent on the other side, if it lives under a directory the *other* side
5560/// renamed `old_dir -> new_dir`, move it to `new_dir/...`. Returns the rewritten
5561/// ours/theirs maps plus the set of re-homed destination paths (for `=conflict`
5562/// reporting) and any path that landed under a split directory.
5563fn apply_directory_renames(
5564    base_map: &MergeEntryMap,
5565    ours_map: &MergeEntryMap,
5566    theirs_map: &MergeEntryMap,
5567    dir_renames: &DirectoryRenameMaps,
5568) -> (MergeEntryMap, MergeEntryMap, BTreeSet<Vec<u8>>) {
5569    let mut ours = ours_map.clone();
5570    let mut theirs = theirs_map.clone();
5571    let mut rehomed = BTreeSet::new();
5572
5573    // Files added on theirs follow OURS' directory renames (ours renamed the
5574    // directory, so theirs' additions under the old directory re-home into the
5575    // new one) — and symmetrically for ours' additions following theirs' renames.
5576    rehome_side(
5577        base_map,
5578        &mut theirs,
5579        &dir_renames.ours,
5580        &dir_renames.split_dirs,
5581        &mut rehomed,
5582    );
5583    rehome_side(
5584        base_map,
5585        &mut ours,
5586        &dir_renames.theirs,
5587        &dir_renames.split_dirs,
5588        &mut rehomed,
5589    );
5590
5591    (ours, theirs, rehomed)
5592}
5593
5594/// Re-home freshly-added paths in `target` that live under a directory the OTHER
5595/// side renamed (`renamer_dirs`: `old_dir -> new_dir`). "Added" means present in
5596/// `target` but absent in `base_map`. Paths under a "split" directory (no clear
5597/// destination) are left in place. Re-homed destinations are recorded in
5598/// `rehomed`.
5599fn rehome_side(
5600    base_map: &MergeEntryMap,
5601    target: &mut MergeEntryMap,
5602    renamer_dirs: &BTreeMap<Vec<u8>, Vec<u8>>,
5603    split_dirs: &BTreeSet<Vec<u8>>,
5604    rehomed: &mut BTreeSet<Vec<u8>>,
5605) {
5606    if renamer_dirs.is_empty() {
5607        return;
5608    }
5609    // Snapshot the added paths first; we mutate `target` while iterating.
5610    let candidates: Vec<Vec<u8>> = target
5611        .keys()
5612        .filter(|p| !base_map.contains_key(*p))
5613        .cloned()
5614        .collect();
5615    for path in candidates {
5616        // A file under a "split" directory has no clear destination; leave it.
5617        if let Some(dir) = parent_dir(&path)
5618            && split_dirs.contains(dir)
5619        {
5620            continue;
5621        }
5622        let Some((old_dir, new_dir)) = check_dir_renamed(&path, renamer_dirs) else {
5623            continue;
5624        };
5625        let dest = apply_dir_rename(old_dir, new_dir, &path);
5626        if dest == path {
5627            continue;
5628        }
5629        if let Some(entry) = target.remove(&path) {
5630            target.entry(dest.clone()).or_insert(entry);
5631            rehomed.insert(dest);
5632        }
5633    }
5634}
5635
5636/// Build a path-qualified conflict-marker label `"<label>:<path>"`, as git does
5637/// for renamed files (so the two sides of a conflict name their distinct paths).
5638fn qualify_label(label: &str, path: &[u8]) -> String {
5639    format!("{label}:{}", String::from_utf8_lossy(path))
5640}
5641
5642/// Adapt a flat `path -> (mode, oid)` map into the `TrackedEntry` map the
5643/// name-status diff core consumes.
5644fn entry_map_as_tracked(map: &MergeEntryMap) -> BTreeMap<Vec<u8>, TrackedEntry> {
5645    map.iter()
5646        .map(|(path, (mode, oid))| {
5647            (
5648                path.clone(),
5649                TrackedEntry {
5650                    mode: *mode,
5651                    oid: *oid,
5652                },
5653            )
5654        })
5655        .collect()
5656}
5657
5658#[cfg(test)]
5659mod tests {
5660    use super::*;
5661    use sley_formats::RepositoryLayout;
5662    use sley_object::TreeEntry;
5663    use sley_odb::ObjectWriter;
5664    use std::path::PathBuf;
5665    use std::sync::atomic::{AtomicU64, Ordering};
5666
5667    static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
5668
5669    #[test]
5670    fn name_status_reports_added_from_index() {
5671        let root = temp_root();
5672        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
5673            .expect("test operation should succeed");
5674        let db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
5675        let oid = db
5676            .write_object(EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec()))
5677            .expect("test operation should succeed");
5678        let index = Index {
5679            version: 2,
5680            entries: vec![sley_index::IndexEntry {
5681                ctime_seconds: 0,
5682                ctime_nanoseconds: 0,
5683                mtime_seconds: 0,
5684                mtime_nanoseconds: 0,
5685                dev: 0,
5686                ino: 0,
5687                mode: 0o100644,
5688                uid: 0,
5689                gid: 0,
5690                size: 6,
5691                oid,
5692                flags: "hello.txt".len() as u16,
5693                flags_extended: 0,
5694                path: BString::from(b"hello.txt"),
5695            }],
5696            extensions: Vec::new(),
5697            checksum: None,
5698        };
5699        fs::write(
5700            layout.git_dir.join("index"),
5701            index
5702                .write_v2_sha1()
5703                .expect("test operation should succeed"),
5704        )
5705        .expect("test operation should succeed");
5706        fs::write(root.join("hello.txt"), b"hello\n").expect("test operation should succeed");
5707        let changes = diff_name_status_head_worktree(&root, &layout.git_dir, ObjectFormat::Sha1)
5708            .expect("test operation should succeed");
5709        assert_eq!(changes[0].line(), "A\thello.txt");
5710        fs::remove_dir_all(root).expect("test operation should succeed");
5711    }
5712
5713    #[test]
5714    fn index_worktree_diff_returns_staged_gitlinks() {
5715        let root = temp_root();
5716        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
5717            .expect("test operation should succeed");
5718        let oid = ObjectId::from_hex(
5719            ObjectFormat::Sha1,
5720            "1111111111111111111111111111111111111111",
5721        )
5722        .expect("test operation should succeed");
5723        let index = Index {
5724            version: 2,
5725            entries: vec![sley_index::IndexEntry {
5726                ctime_seconds: 0,
5727                ctime_nanoseconds: 0,
5728                mtime_seconds: 0,
5729                mtime_nanoseconds: 0,
5730                dev: 0,
5731                ino: 0,
5732                mode: 0o160000,
5733                uid: 0,
5734                gid: 0,
5735                size: 0,
5736                oid,
5737                flags: "deps/sub".len() as u16,
5738                flags_extended: 0,
5739                path: BString::from(b"deps/sub"),
5740            }],
5741            extensions: Vec::new(),
5742            checksum: None,
5743        };
5744        fs::write(
5745            layout.git_dir.join("index"),
5746            index
5747                .write_v2_sha1()
5748                .expect("test operation should succeed"),
5749        )
5750        .expect("test operation should succeed");
5751
5752        let diff = diff_name_status_index_worktree_with_options_and_gitlinks(
5753            &root,
5754            &layout.git_dir,
5755            ObjectFormat::Sha1,
5756            DiffNameStatusOptions::default(),
5757        )
5758        .expect("test operation should succeed");
5759
5760        assert_eq!(diff.entries.len(), 1);
5761        let gitlinks = diff.staged_gitlinks;
5762        assert_eq!(gitlinks.len(), 1);
5763        assert_eq!(gitlinks[0].path.as_bytes(), b"deps/sub");
5764        assert_eq!(gitlinks[0].oid, oid);
5765        fs::remove_dir_all(root).expect("test operation should succeed");
5766    }
5767
5768    #[cfg(unix)]
5769    #[test]
5770    fn index_worktree_diff_ignores_untracked_dangling_symlink() {
5771        use std::os::unix::fs::symlink;
5772
5773        let root = temp_root();
5774        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
5775            .expect("test operation should succeed");
5776        let db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
5777        let oid = db
5778            .write_object(EncodedObject::new(ObjectType::Blob, b"clean\n".to_vec()))
5779            .expect("test operation should succeed");
5780        let index = Index {
5781            version: 2,
5782            entries: vec![sley_index::IndexEntry {
5783                ctime_seconds: 0,
5784                ctime_nanoseconds: 0,
5785                mtime_seconds: 0,
5786                mtime_nanoseconds: 0,
5787                dev: 0,
5788                ino: 0,
5789                mode: 0o100644,
5790                uid: 0,
5791                gid: 0,
5792                size: 6,
5793                oid,
5794                flags: "tracked.txt".len() as u16,
5795                flags_extended: 0,
5796                path: BString::from(b"tracked.txt"),
5797            }],
5798            extensions: Vec::new(),
5799            checksum: None,
5800        };
5801        fs::write(
5802            layout.git_dir.join("index"),
5803            index
5804                .write_v2_sha1()
5805                .expect("test operation should succeed"),
5806        )
5807        .expect("test operation should succeed");
5808        fs::write(root.join("tracked.txt"), b"clean\n").expect("test operation should succeed");
5809        symlink("missing-target", root.join("untracked-link"))
5810            .expect("test operation should succeed");
5811
5812        let changes = diff_name_status_index_worktree_with_options(
5813            &root,
5814            &layout.git_dir,
5815            ObjectFormat::Sha1,
5816            DiffNameStatusOptions {
5817                detect_renames: false,
5818                detect_copies: false,
5819                find_copies_harder: false,
5820                rename_empty: true,
5821            },
5822        )
5823        .expect("untracked dangling symlink should be ignored");
5824        assert!(changes.is_empty());
5825        fs::remove_dir_all(root).expect("test operation should succeed");
5826    }
5827
5828    #[test]
5829    fn index_worktree_diff_trusts_non_racy_stat_cache() {
5830        let root = temp_root();
5831        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
5832            .expect("test operation should succeed");
5833        let worktree_path = root.join("tracked.txt");
5834        fs::write(&worktree_path, b"clean\n").expect("test operation should succeed");
5835        let metadata = fs::symlink_metadata(&worktree_path).expect("test operation should succeed");
5836        let (mtime_seconds, mtime_nanoseconds) =
5837            sley_index::file_mtime_parts(&metadata).expect("test operation should succeed");
5838        let bogus_oid = ObjectId::from_hex(
5839            ObjectFormat::Sha1,
5840            "1111111111111111111111111111111111111111",
5841        )
5842        .expect("test operation should succeed");
5843        let index = Index {
5844            version: 2,
5845            entries: vec![sley_index::IndexEntry {
5846                ctime_seconds: 0,
5847                ctime_nanoseconds: 0,
5848                mtime_seconds: mtime_seconds as u32,
5849                mtime_nanoseconds: mtime_nanoseconds as u32,
5850                dev: 0,
5851                ino: 0,
5852                mode: sley_index::worktree_metadata_mode(&metadata),
5853                uid: 0,
5854                gid: 0,
5855                size: metadata.len() as u32,
5856                oid: bogus_oid,
5857                flags: "tracked.txt".len() as u16,
5858                flags_extended: 0,
5859                path: BString::from(b"tracked.txt"),
5860            }],
5861            extensions: Vec::new(),
5862            checksum: None,
5863        };
5864        std::thread::sleep(std::time::Duration::from_millis(1100));
5865        fs::write(
5866            layout.git_dir.join("index"),
5867            index
5868                .write_v2_sha1()
5869                .expect("test operation should succeed"),
5870        )
5871        .expect("test operation should succeed");
5872
5873        let changes = diff_name_status_index_worktree(&root, &layout.git_dir, ObjectFormat::Sha1)
5874            .expect("test operation should succeed");
5875        assert!(
5876            changes.is_empty(),
5877            "a clean non-racy stat match must reuse the cached index oid"
5878        );
5879        fs::remove_dir_all(root).expect("test operation should succeed");
5880    }
5881
5882    fn temp_root() -> PathBuf {
5883        let path = std::env::temp_dir().join(format!(
5884            "sley-diff-{}-{}",
5885            std::process::id(),
5886            TEMP_COUNTER.fetch_add(1, Ordering::Relaxed)
5887        ));
5888        fs::create_dir_all(&path).expect("test operation should succeed");
5889        path
5890    }
5891
5892    // ---- line diff / blob merge tests ---------------------------------------
5893
5894    fn merge_opts() -> MergeBlobOptions<'static> {
5895        MergeBlobOptions {
5896            ours_label: "ours",
5897            theirs_label: "theirs",
5898            base_label: "base",
5899            style: ConflictStyle::Merge,
5900        }
5901    }
5902
5903    #[test]
5904    fn split_lines_preserves_content_and_newlines() {
5905        let lines = split_lines(b"a\nb\nc\n");
5906        assert_eq!(lines.len(), 3);
5907        assert_eq!(lines[0].content, b"a\n");
5908        assert!(lines[0].has_newline);
5909        assert_eq!(lines[2].content, b"c\n");
5910        assert!(lines[2].has_newline);
5911        assert!(split_lines(b"").is_empty());
5912    }
5913
5914    #[test]
5915    fn split_lines_tracks_missing_final_newline() {
5916        let lines = split_lines(b"a\nb");
5917        assert_eq!(lines.len(), 2);
5918        assert!(lines[0].has_newline);
5919        assert!(!lines[1].has_newline);
5920        assert_eq!(lines[1].content, b"b");
5921        assert_eq!(lines[1].bytes_without_newline(), b"b");
5922        // A line that lost its newline must not compare equal to one that has it.
5923        let with_nl = split_lines(b"b\n");
5924        assert_ne!(lines[1], with_nl[0]);
5925    }
5926
5927    #[test]
5928    fn myers_replace_single_line() {
5929        let old = split_lines(b"a\nb\nc\n");
5930        let new = split_lines(b"a\nx\nc\n");
5931        assert_eq!(
5932            myers_diff_lines(&old, &new),
5933            vec![
5934                DiffOp::Equal(1),
5935                DiffOp::Delete(1),
5936                DiffOp::Insert(1),
5937                DiffOp::Equal(1),
5938            ]
5939        );
5940    }
5941
5942    #[test]
5943    fn myers_identical_is_single_equal() {
5944        let old = split_lines(b"a\nb\nc\n");
5945        let new = split_lines(b"a\nb\nc\n");
5946        assert_eq!(myers_diff_lines(&old, &new), vec![DiffOp::Equal(3)]);
5947    }
5948
5949    #[test]
5950    fn myers_pure_insert_and_delete() {
5951        let empty = split_lines(b"");
5952        let two = split_lines(b"a\nb\n");
5953        assert_eq!(myers_diff_lines(&empty, &two), vec![DiffOp::Insert(2)]);
5954        assert_eq!(myers_diff_lines(&two, &empty), vec![DiffOp::Delete(2)]);
5955
5956        let old = split_lines(b"a\nb\nc\nd\n");
5957        let new = split_lines(b"a\nc\nd\n");
5958        assert_eq!(
5959            myers_diff_lines(&old, &new),
5960            vec![DiffOp::Equal(1), DiffOp::Delete(1), DiffOp::Equal(2)]
5961        );
5962    }
5963
5964    #[test]
5965    fn myers_reconstructs_new_and_is_minimal() {
5966        // Apply the script to `old` and confirm it yields `new`; also count edits.
5967        let old = split_lines(b"the\nquick\nbrown\nfox\n");
5968        let new = split_lines(b"the\nlazy\nbrown\ncat\n");
5969        let ops = myers_diff_lines(&old, &new);
5970        let mut oi = 0usize;
5971        let mut ni = 0usize;
5972        let mut edits = 0usize;
5973        let mut rebuilt: Vec<u8> = Vec::new();
5974        for op in &ops {
5975            match *op {
5976                DiffOp::Equal(n) => {
5977                    for _ in 0..n {
5978                        assert_eq!(old[oi], new[ni]);
5979                        rebuilt.extend_from_slice(old[oi].content);
5980                        oi += 1;
5981                        ni += 1;
5982                    }
5983                }
5984                DiffOp::Delete(n) => {
5985                    oi += n;
5986                    edits += n;
5987                }
5988                DiffOp::Insert(n) => {
5989                    for _ in 0..n {
5990                        rebuilt.extend_from_slice(new[ni].content);
5991                        ni += 1;
5992                    }
5993                    edits += n;
5994                }
5995            }
5996        }
5997        assert_eq!(rebuilt, b"the\nlazy\nbrown\ncat\n");
5998        // Two lines changed -> 2 deletes + 2 inserts is the minimal SES here.
5999        assert_eq!(edits, 4);
6000    }
6001
6002    #[test]
6003    fn merge_non_overlapping_changes_is_clean() {
6004        let base = b"a\nb\nc\nd\ne\n";
6005        let ours = b"A\nb\nc\nd\ne\n";
6006        let theirs = b"a\nb\nc\nd\nE\n";
6007        let result = merge_blobs(base, ours, theirs, &merge_opts());
6008        assert!(!result.conflicted);
6009        assert_eq!(result.content, b"A\nb\nc\nd\nE\n");
6010    }
6011
6012    #[test]
6013    fn merge_identical_changes_no_conflict() {
6014        let base = b"a\nb\nc\n";
6015        let ours = b"a\nX\nc\n";
6016        let theirs = b"a\nX\nc\n";
6017        let result = merge_blobs(base, ours, theirs, &merge_opts());
6018        assert!(!result.conflicted);
6019        assert_eq!(result.content, b"a\nX\nc\n");
6020    }
6021
6022    #[test]
6023    fn merge_overlapping_change_emits_exact_markers() {
6024        let base = b"a\nb\nc\n";
6025        let ours = b"a\nOURS\nc\n";
6026        let theirs = b"a\nTHEIRS\nc\n";
6027        let result = merge_blobs(base, ours, theirs, &merge_opts());
6028        assert!(result.conflicted);
6029        assert_eq!(
6030            result.content,
6031            b"a\n<<<<<<< ours\nOURS\n=======\nTHEIRS\n>>>>>>> theirs\nc\n".to_vec(),
6032        );
6033    }
6034
6035    #[test]
6036    fn merge_diff3_style_includes_base_section() {
6037        let base = b"a\nb\nc\n";
6038        let ours = b"a\nOURS\nc\n";
6039        let theirs = b"a\nTHEIRS\nc\n";
6040        let options = MergeBlobOptions {
6041            style: ConflictStyle::Diff3,
6042            ..merge_opts()
6043        };
6044        let result = merge_blobs(base, ours, theirs, &options);
6045        assert!(result.conflicted);
6046        assert_eq!(
6047            result.content,
6048            b"a\n<<<<<<< ours\nOURS\n||||||| base\nb\n=======\nTHEIRS\n>>>>>>> theirs\nc\n"
6049                .to_vec(),
6050        );
6051    }
6052
6053    #[test]
6054    fn merge_empty_label_omits_trailing_space() {
6055        let base = b"a\nb\nc\n";
6056        let ours = b"a\nOURS\nc\n";
6057        let theirs = b"a\nTHEIRS\nc\n";
6058        let options = MergeBlobOptions {
6059            ours_label: "",
6060            theirs_label: "",
6061            base_label: "",
6062            style: ConflictStyle::Merge,
6063        };
6064        let result = merge_blobs(base, ours, theirs, &options);
6065        assert!(result.conflicted);
6066        // No trailing space after the 7 marker chars when the label is empty.
6067        assert_eq!(
6068            result.content,
6069            b"a\n<<<<<<<\nOURS\n=======\nTHEIRS\n>>>>>>>\nc\n".to_vec(),
6070        );
6071    }
6072
6073    #[test]
6074    fn merge_add_add_empty_base_conflicts() {
6075        let result = merge_blobs(b"", b"x\ny\n", b"p\nq\n", &merge_opts());
6076        assert!(result.conflicted);
6077        assert_eq!(
6078            result.content,
6079            b"<<<<<<< ours\nx\ny\n=======\np\nq\n>>>>>>> theirs\n".to_vec(),
6080        );
6081    }
6082
6083    #[test]
6084    fn merge_add_add_empty_base_identical_is_clean() {
6085        let result = merge_blobs(b"", b"x\ny\n", b"x\ny\n", &merge_opts());
6086        assert!(!result.conflicted);
6087        assert_eq!(result.content, b"x\ny\n");
6088    }
6089
6090    #[test]
6091    fn merge_deletion_one_side_takes_deletion() {
6092        // ours deletes line b; theirs leaves it -> clean, deletion wins.
6093        let result = merge_blobs(b"a\nb\nc\n", b"a\nc\n", b"a\nb\nc\n", &merge_opts());
6094        assert!(!result.conflicted);
6095        assert_eq!(result.content, b"a\nc\n");
6096    }
6097
6098    #[test]
6099    fn merge_deletion_vs_modification_conflicts() {
6100        // ours deletes b; theirs modifies b -> conflict.
6101        let result = merge_blobs(b"a\nb\nc\n", b"a\nc\n", b"a\nB!\nc\n", &merge_opts());
6102        assert!(result.conflicted);
6103        // ours side of the conflict is empty (the line was deleted).
6104        assert_eq!(
6105            result.content,
6106            b"a\n<<<<<<< ours\n=======\nB!\n>>>>>>> theirs\nc\n".to_vec(),
6107        );
6108    }
6109
6110    #[test]
6111    fn merge_missing_final_newline_marker_starts_on_own_line() {
6112        // Both sides drop the trailing newline AND conflict at the end. The
6113        // closing marker section must still begin on its own line.
6114        let base = b"a\nb";
6115        let ours = b"a\nOURS";
6116        let theirs = b"a\nTHEIRS";
6117        let result = merge_blobs(base, ours, theirs, &merge_opts());
6118        assert!(result.conflicted);
6119        assert_eq!(
6120            result.content,
6121            b"a\n<<<<<<< ours\nOURS\n=======\nTHEIRS\n>>>>>>> theirs\n".to_vec(),
6122        );
6123    }
6124
6125    #[test]
6126    fn merge_clean_preserves_missing_final_newline() {
6127        // ours removes the trailing newline; theirs is unchanged -> ours wins,
6128        // and the result keeps the missing newline.
6129        let result = merge_blobs(b"a\nb\n", b"a\nb", b"a\nb\n", &merge_opts());
6130        assert!(!result.conflicted);
6131        assert_eq!(result.content, b"a\nb");
6132    }
6133
6134    #[test]
6135    fn merge_both_append_identical_tail_is_clean() {
6136        let result = merge_blobs(b"a\n", b"a\nz\n", b"a\nz\n", &merge_opts());
6137        assert!(!result.conflicted);
6138        assert_eq!(result.content, b"a\nz\n");
6139    }
6140
6141    #[test]
6142    fn merge_when_ours_equals_base_yields_theirs() {
6143        // Regression: a side that did not change must not suppress the other
6144        // side's edits anywhere in the file.
6145        let base = b"b\na\n";
6146        let theirs = b"b\nb\nc\na\nc\n";
6147        let result = merge_blobs(base, base, theirs, &merge_opts());
6148        assert!(!result.conflicted);
6149        assert_eq!(result.content, theirs.to_vec());
6150    }
6151    fn applied(outcome: ApplyOutcome) -> Vec<u8> {
6152        match outcome {
6153            ApplyOutcome::Applied(bytes) => bytes,
6154            ApplyOutcome::Rejected => panic!("expected Applied, got Rejected"),
6155        }
6156    }
6157
6158    #[test]
6159    fn parse_multi_file_patch() {
6160        let patch = b"\
6161diff --git a/one.txt b/one.txt
6162index aaaaaaa..bbbbbbb 100644
6163--- a/one.txt
6164+++ b/one.txt
6165@@ -1,3 +1,3 @@
6166 alpha
6167-beta
6168+BETA
6169 gamma
6170diff --git a/two.txt b/two.txt
6171index ccccccc..ddddddd 100644
6172--- a/two.txt
6173+++ b/two.txt
6174@@ -1,2 +1,3 @@
6175 first
6176+inserted
6177 second
6178";
6179        let patches = parse_unified_patch(patch).expect("test operation should succeed");
6180        assert_eq!(patches.len(), 2);
6181
6182        assert_eq!(patches[0].old_path.as_deref(), Some(b"one.txt".as_slice()));
6183        assert_eq!(patches[0].new_path.as_deref(), Some(b"one.txt".as_slice()));
6184        assert_eq!(patches[0].old_mode, None);
6185        assert_eq!(patches[0].hunks.len(), 1);
6186        let h = &patches[0].hunks[0];
6187        assert_eq!(
6188            (h.old_start, h.old_len, h.new_start, h.new_len),
6189            (1, 3, 1, 3)
6190        );
6191        assert_eq!(
6192            h.lines,
6193            vec![
6194                HunkLine::Context(b"alpha".to_vec()),
6195                HunkLine::Delete(b"beta".to_vec()),
6196                HunkLine::Insert(b"BETA".to_vec()),
6197                HunkLine::Context(b"gamma".to_vec()),
6198            ]
6199        );
6200
6201        assert_eq!(patches[1].new_path.as_deref(), Some(b"two.txt".as_slice()));
6202        assert_eq!(patches[1].hunks[0].new_len, 3);
6203    }
6204
6205    #[test]
6206    fn parse_default_hunk_range_length() {
6207        // `@@ -1 +1,2 @@` (no comma) means a length of 1 on the old side.
6208        let patch = b"\
6209--- a/x
6210+++ b/x
6211@@ -1 +1,2 @@
6212 line
6213+added
6214";
6215        let patches = parse_unified_patch(patch).expect("test operation should succeed");
6216        let h = &patches[0].hunks[0];
6217        assert_eq!(
6218            (h.old_start, h.old_len, h.new_start, h.new_len),
6219            (1, 1, 1, 2)
6220        );
6221    }
6222
6223    #[test]
6224    fn parse_hunk_header_before_file_errors() {
6225        let patch = b"@@ -1,1 +1,1 @@\n context\n";
6226        assert!(parse_unified_patch(patch).is_err());
6227    }
6228
6229    #[test]
6230    fn parse_mismatched_counts_errors() {
6231        // Header promises two old lines but only one is present.
6232        let patch = b"--- a/x\n+++ b/x\n@@ -1,2 +1,2 @@\n only\n+new\n";
6233        assert!(parse_unified_patch(patch).is_err());
6234    }
6235
6236    #[test]
6237    fn apply_clean_hunk() {
6238        let base = b"alpha\nbeta\ngamma\n";
6239        let patch = parse_unified_patch(
6240            b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n alpha\n-beta\n+BETA\n gamma\n",
6241        )
6242        .expect("test operation should succeed");
6243        let out = applied(apply_file_patch(base, &patch[0]));
6244        assert_eq!(out, b"alpha\nBETA\ngamma\n");
6245    }
6246
6247    #[test]
6248    fn apply_with_line_offset() {
6249        // The hunk's recorded position (line 2) is a couple of lines above where
6250        // the matching context actually lives (line 4); the outward search must
6251        // find it. The hunk is NOT anchored at the file start (old_start > 1, so
6252        // no match_beginning) and has trailing context (`tail`, so no
6253        // match_end), which is exactly the shape a real drifted patch takes —
6254        // verified against `git apply` ("Hunk #1 succeeded at 4 (offset 2)").
6255        let base = b"pre1\npre2\npre3\nalpha\nbeta\ngamma\ntail\n";
6256        let patch = parse_unified_patch(
6257            b"--- a/x\n+++ b/x\n@@ -2,4 +2,4 @@\n alpha\n-beta\n+BETA\n gamma\n tail\n",
6258        )
6259        .expect("test operation should succeed");
6260        let out = applied(apply_file_patch(base, &patch[0]));
6261        assert_eq!(out, b"pre1\npre2\npre3\nalpha\nBETA\ngamma\ntail\n");
6262    }
6263
6264    #[test]
6265    fn apply_with_negative_line_offset() {
6266        // Recorded position is well past the real location; search backward.
6267        let base = b"alpha\nbeta\ngamma\n";
6268        let patch = parse_unified_patch(
6269            b"--- a/x\n+++ b/x\n@@ -50,3 +50,3 @@\n alpha\n-beta\n+BETA\n gamma\n",
6270        )
6271        .expect("test operation should succeed");
6272        let out = applied(apply_file_patch(base, &patch[0]));
6273        assert_eq!(out, b"alpha\nBETA\ngamma\n");
6274    }
6275
6276    #[test]
6277    fn apply_multiple_hunks() {
6278        let base = b"a\nb\nc\nd\ne\nf\ng\nh\n";
6279        let patch = parse_unified_patch(
6280            b"--- a/x\n+++ b/x\n\
6281@@ -1,3 +1,3 @@\n a\n-b\n+B\n c\n\
6282@@ -6,3 +6,3 @@\n f\n-g\n+G\n h\n",
6283        )
6284        .expect("test operation should succeed");
6285        let out = applied(apply_file_patch(base, &patch[0]));
6286        assert_eq!(out, b"a\nB\nc\nd\ne\nf\nG\nh\n");
6287    }
6288
6289    #[test]
6290    fn reject_on_context_mismatch() {
6291        let base = b"alpha\nDIFFERENT\ngamma\n";
6292        let patch = parse_unified_patch(
6293            b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n alpha\n-beta\n+BETA\n gamma\n",
6294        )
6295        .expect("test operation should succeed");
6296        assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
6297    }
6298
6299    #[test]
6300    fn reject_when_match_end_required_but_not_at_eof() {
6301        // git's `apply.c`: a hunk with NO trailing context must match the END of
6302        // the file (`match_end`). Here the leading context (`tail`/`anchor`)
6303        // matches at the middle of the base, but there are further lines after
6304        // it, so the preimage does not reach EOF. git rejects this; the old
6305        // sley matcher wrongly applied it (duplicating the appended block). This
6306        // is the t4150-am cell-34 lever: rejection forces `am -3`'s 3-way path.
6307        let base = b"one\ntwo\nanchor\nalready\nappended\n";
6308        // Hunk: context `anchor`, then append `added1`/`added2`. No trailing
6309        // context => match_end. At line 3 (`anchor`) the preimage is just one
6310        // line and does not reach EOF, so it must be rejected.
6311        let patch = parse_unified_patch(
6312            b"--- a/x\n+++ b/x\n@@ -3,1 +3,3 @@\n anchor\n+added1\n+added2\n",
6313        )
6314        .expect("test operation should succeed");
6315        assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
6316    }
6317
6318    #[test]
6319    fn append_at_eof_matches_when_context_reaches_end() {
6320        // The mirror of the rejection case: the same shape applies cleanly when
6321        // the matching context IS the last line of the file (preimage reaches
6322        // EOF), so `match_end` is satisfied.
6323        let base = b"one\ntwo\nanchor\n";
6324        let patch = parse_unified_patch(
6325            b"--- a/x\n+++ b/x\n@@ -3,1 +3,3 @@\n anchor\n+added1\n+added2\n",
6326        )
6327        .expect("test operation should succeed");
6328        let out = applied(apply_file_patch(base, &patch[0]));
6329        assert_eq!(out, b"one\ntwo\nanchor\nadded1\nadded2\n");
6330    }
6331
6332    #[test]
6333    fn reject_when_match_beginning_required_but_not_at_start() {
6334        // A hunk anchored at line 1 (`old_start <= 1`) must match the START of
6335        // the file (`match_beginning`). If the matching context only appears
6336        // later, git rejects rather than wandering to it.
6337        let base = b"junk\nalpha\nbeta\ngamma\n";
6338        let patch = parse_unified_patch(
6339            b"--- a/x\n+++ b/x\n@@ -1,2 +1,3 @@\n alpha\n+INSERT\n beta\n",
6340        )
6341        .expect("test operation should succeed");
6342        assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
6343    }
6344
6345    #[test]
6346    fn no_default_fuzz_rejects_on_trailing_context_mismatch() {
6347        // `git apply` / `git am` keep `p_context = UINT_MAX` by default, so they
6348        // do NOT fuzz a hunk in by dropping context. Here the trailing context
6349        // line (`gamma`) differs from the base (`DIVERGED`), and because the
6350        // anchor is line 1 the hunk must match the beginning with its FULL
6351        // preimage. Verified against real `git apply`: this is rejected.
6352        let base = b"alpha\nbeta\nDIVERGED\n";
6353        let patch = parse_unified_patch(
6354            b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n alpha\n-beta\n+BETA\n gamma\n",
6355        )
6356        .expect("test operation should succeed");
6357        assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
6358    }
6359
6360    #[test]
6361    fn parse_and_apply_new_file() {
6362        let patch = parse_unified_patch(
6363            b"\
6364diff --git a/new.txt b/new.txt
6365new file mode 100644
6366index 0000000..1111111
6367--- /dev/null
6368+++ b/new.txt
6369@@ -0,0 +1,2 @@
6370+hello
6371+world
6372",
6373        )
6374        .expect("test operation should succeed");
6375        assert!(patches_first_is_new(&patch));
6376        assert_eq!(patch[0].old_path, None);
6377        assert_eq!(patch[0].new_path.as_deref(), Some(b"new.txt".as_slice()));
6378        assert_eq!(patch[0].new_mode, Some(0o100644));
6379        // Base is ignored for a new file.
6380        let out = applied(apply_file_patch(b"garbage that is ignored", &patch[0]));
6381        assert_eq!(out, b"hello\nworld\n");
6382    }
6383
6384    fn patches_first_is_new(patches: &[FilePatch]) -> bool {
6385        patches.first().map(|p| p.is_new).unwrap_or(false)
6386    }
6387
6388    #[test]
6389    fn parse_and_apply_delete_file() {
6390        let patch = parse_unified_patch(
6391            b"\
6392diff --git a/gone.txt b/gone.txt
6393deleted file mode 100644
6394index 1111111..0000000
6395--- a/gone.txt
6396+++ /dev/null
6397@@ -1,2 +0,0 @@
6398-hello
6399-world
6400",
6401        )
6402        .expect("test operation should succeed");
6403        assert!(patch[0].is_delete);
6404        assert_eq!(patch[0].old_path.as_deref(), Some(b"gone.txt".as_slice()));
6405        assert_eq!(patch[0].new_path, None);
6406        assert_eq!(patch[0].old_mode, Some(0o100644));
6407        let out = applied(apply_file_patch(b"hello\nworld\n", &patch[0]));
6408        assert_eq!(out, b"");
6409    }
6410
6411    #[test]
6412    fn parse_rename_headers() {
6413        let patch = parse_unified_patch(
6414            b"\
6415diff --git a/old/name.txt b/new/name.txt
6416similarity index 100%
6417rename from old/name.txt
6418rename to new/name.txt
6419",
6420        )
6421        .expect("test operation should succeed");
6422        assert!(patch[0].is_rename);
6423        assert_eq!(
6424            patch[0].old_path.as_deref(),
6425            Some(b"old/name.txt".as_slice())
6426        );
6427        assert_eq!(
6428            patch[0].new_path.as_deref(),
6429            Some(b"new/name.txt".as_slice())
6430        );
6431        assert!(patch[0].hunks.is_empty());
6432    }
6433
6434    #[test]
6435    fn parse_mode_change_headers() {
6436        let patch = parse_unified_patch(
6437            b"\
6438diff --git a/script.sh b/script.sh
6439old mode 100644
6440new mode 100755
6441",
6442        )
6443        .expect("test operation should succeed");
6444        assert_eq!(patch[0].old_mode, Some(0o100644));
6445        assert_eq!(patch[0].new_mode, Some(0o100755));
6446        assert!(!patch[0].is_new);
6447        assert!(!patch[0].is_delete);
6448    }
6449
6450    #[test]
6451    fn no_final_newline_base_preserved_when_untouched() {
6452        // The change is on line 1; the final line has no newline and is not
6453        // modified, so its no-newline state must survive. This uses the patch
6454        // shape real `git diff` emits for such a change — `@@ -1,3 +1,3 @@` with
6455        // the two unchanged lines as trailing context (the `\ No newline`
6456        // marker rides the last context line). A hand-rolled `@@ -1,1 +1,1 @@`
6457        // with NO trailing context would (correctly) be rejected by git, since
6458        // a no-trailing-context hunk anchored at line 1 must span the whole
6459        // file (`match_beginning` && `match_end`).
6460        let base = b"alpha\nbeta\nnotail"; // "notail" has no trailing \n
6461        let patch = parse_unified_patch(
6462            b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n-alpha\n+ALPHA\n beta\n notail\n\\ No newline at end of file\n",
6463        )
6464        .expect("test operation should succeed");
6465        let out = applied(apply_file_patch(base, &patch[0]));
6466        assert_eq!(out, b"ALPHA\nbeta\nnotail");
6467    }
6468
6469    #[test]
6470    fn no_final_newline_added_by_patch() {
6471        // Old file ends with a newline; patch rewrites the last line to one
6472        // without a trailing newline.
6473        let base = b"alpha\nbeta\n";
6474        let patch = parse_unified_patch(
6475            b"--- a/x\n+++ b/x\n@@ -2,1 +2,1 @@\n-beta\n+beta-notail\n\\ No newline at end of file\n",
6476        )
6477        .expect("test operation should succeed");
6478        assert!(patch[0].hunks[0].new_no_newline);
6479        assert!(!patch[0].hunks[0].old_no_newline);
6480        let out = applied(apply_file_patch(base, &patch[0]));
6481        assert_eq!(out, b"alpha\nbeta-notail");
6482    }
6483
6484    #[test]
6485    fn no_final_newline_in_base_matched_and_kept() {
6486        // Both sides lack a trailing newline; context match must require the
6487        // base's final line to itself be newline-free.
6488        let base = b"alpha\nbeta"; // no trailing newline
6489        let patch = parse_unified_patch(
6490            b"--- a/x\n+++ b/x\n@@ -1,2 +1,2 @@\n-alpha\n+ALPHA\n beta\n\\ No newline at end of file\n",
6491        )
6492        .expect("test operation should succeed");
6493        assert!(patch[0].hunks[0].old_no_newline);
6494        assert!(patch[0].hunks[0].new_no_newline);
6495        let out = applied(apply_file_patch(base, &patch[0]));
6496        assert_eq!(out, b"ALPHA\nbeta");
6497    }
6498
6499    #[test]
6500    fn no_final_newline_mismatch_rejected() {
6501        // Patch asserts the old file has no trailing newline, but the base does.
6502        // That must be rejected rather than silently mis-applied.
6503        let base = b"alpha\nbeta\n"; // HAS trailing newline
6504        let patch = parse_unified_patch(
6505            b"--- a/x\n+++ b/x\n@@ -2,1 +2,1 @@\n-beta\n\\ No newline at end of file\n+beta2\n",
6506        )
6507        .expect("test operation should succeed");
6508        assert!(patch[0].hunks[0].old_no_newline);
6509        assert_eq!(apply_file_patch(base, &patch[0]), ApplyOutcome::Rejected);
6510    }
6511
6512    #[test]
6513    fn delete_with_no_final_newline() {
6514        // Deleting the entire content of a file that had no trailing newline.
6515        let base = b"only line no newline";
6516        let patch = parse_unified_patch(
6517            b"--- a/x\n+++ /dev/null\n@@ -1,1 +0,0 @@\n-only line no newline\n\\ No newline at end of file\n",
6518        )
6519        .expect("test operation should succeed");
6520        assert!(patch[0].is_delete);
6521        let out = applied(apply_file_patch(base, &patch[0]));
6522        assert_eq!(out, b"");
6523    }
6524
6525    #[test]
6526    fn apply_pure_insertion_hunk() {
6527        let base = b"first\nsecond\n";
6528        let patch =
6529            parse_unified_patch(b"--- a/x\n+++ b/x\n@@ -1,2 +1,3 @@\n first\n+middle\n second\n")
6530                .expect("test operation should succeed");
6531        let out = applied(apply_file_patch(base, &patch[0]));
6532        assert_eq!(out, b"first\nmiddle\nsecond\n");
6533    }
6534
6535    #[test]
6536    fn apply_pure_deletion_hunk() {
6537        let base = b"first\nmiddle\nsecond\n";
6538        let patch =
6539            parse_unified_patch(b"--- a/x\n+++ b/x\n@@ -1,3 +1,2 @@\n first\n-middle\n second\n")
6540                .expect("test operation should succeed");
6541        let out = applied(apply_file_patch(base, &patch[0]));
6542        assert_eq!(out, b"first\nsecond\n");
6543    }
6544
6545    #[test]
6546    fn apply_then_reparse_round_trip() {
6547        // Hand-written unified diff -> apply -> the result is exactly the new
6548        // file content the diff describes. Re-parsing the same patch yields an
6549        // identical structure (idempotent parse).
6550        let base = b"l1\nl2\nl3\nl4\nl5\n";
6551        let text = b"--- a/f\n+++ b/f\n@@ -2,3 +2,4 @@\n l2\n-l3\n+L3\n+L3b\n l4\n";
6552        let p1 = parse_unified_patch(text).expect("test operation should succeed");
6553        let p2 = parse_unified_patch(text).expect("test operation should succeed");
6554        assert_eq!(p1, p2);
6555        let out = applied(apply_file_patch(base, &p1[0]));
6556        assert_eq!(out, b"l1\nl2\nL3\nL3b\nl4\nl5\n");
6557    }
6558
6559    #[test]
6560    fn empty_context_line_without_trailing_space() {
6561        // Some transports strip the single leading space from blank context
6562        // lines; the parser treats a wholly empty body line as blank context.
6563        let base = b"a\n\nb\n";
6564        let patch = parse_unified_patch(b"--- a/x\n+++ b/x\n@@ -1,3 +1,3 @@\n a\n\n-b\n+B\n")
6565            .expect("test operation should succeed");
6566        assert_eq!(patch[0].hunks[0].lines[1], HunkLine::Context(Vec::new()));
6567        let out = applied(apply_file_patch(base, &patch[0]));
6568        assert_eq!(out, b"a\n\nB\n");
6569    }
6570
6571    #[test]
6572    fn split_blob_lines_handles_edge_cases() {
6573        assert!(split_blob_lines(b"").is_empty());
6574        let single = split_blob_lines(b"abc");
6575        assert_eq!(single.len(), 1);
6576        assert!(single[0].no_newline);
6577        let terminated = split_blob_lines(b"abc\n");
6578        assert_eq!(terminated.len(), 1);
6579        assert!(!terminated[0].no_newline);
6580        let blank_then_eof = split_blob_lines(b"x\n");
6581        assert_eq!(blank_then_eof.len(), 1);
6582    }
6583
6584    // ---- content similarity & inexact rename/copy detection -----------------
6585
6586    #[test]
6587    fn similarity_identical_and_empty_conventions() {
6588        // Byte-identical blobs are always 100% similar.
6589        assert_eq!(blob_similarity(b"hello\nworld\n", b"hello\nworld\n"), 100);
6590        // Two empty blobs are identical -> 100.
6591        assert_eq!(blob_similarity(b"", b""), 100);
6592        // An empty blob vs a non-empty one shares nothing -> 0.
6593        assert_eq!(blob_similarity(b"", b"hello\n"), 0);
6594        assert_eq!(blob_similarity(b"hello\n", b""), 0);
6595    }
6596
6597    #[test]
6598    fn similarity_one_changed_line_is_75_and_symmetric() {
6599        // A = one/two/three/four/five (bytes: 4+4+6+5+5 = 24).
6600        // B changes "three\n" -> "THREE\n" (same total size 24).
6601        // Common spans: one,two,four,five = 4+4+5+5 = 18 bytes.
6602        // score = round(18 * 100 / max(24, 24)) = round(75) = 75.
6603        // Verified against `git diff -M` which reports "similarity index 75%".
6604        let a = b"one\ntwo\nthree\nfour\nfive\n";
6605        let b = b"one\ntwo\nTHREE\nfour\nfive\n";
6606        assert_eq!(blob_similarity(a, b), 75);
6607        // The metric is symmetric.
6608        assert_eq!(blob_similarity(b, a), 75);
6609    }
6610
6611    #[test]
6612    fn similarity_one_edited_line_of_three_is_66_not_67() {
6613        // "a\nb\nc\n" -> "a\nB\nc\n": one of three lines edited (4 common bytes of
6614        // 6). git reports `R066` / "similarity index 66%". git's two-step integer
6615        // math is `4 * 60000 / 6 = 40000`, then `40000 * 100 / 60000 = 66` (both
6616        // truncated); a single rounded `4 * 100 / 6` would give 67. This pins the
6617        // MAX_SCORE-based rounding so it stays aligned with diffcore-rename.
6618        assert_eq!(blob_similarity(b"a\nb\nc\n", b"a\nB\nc\n"), 66);
6619        assert_eq!(blob_similarity(b"a\nB\nc\n", b"a\nb\nc\n"), 66);
6620    }
6621
6622    #[test]
6623    fn similarity_small_append_is_88() {
6624        // A: 8 lines totalling 46 bytes. B: same 8 lines + "ADDED\n" (6 bytes) = 52.
6625        // Common = the 46 original bytes; score = round(46*100/52) = 88.
6626        // Verified against `git diff -M` -> "similarity index 88%".
6627        let a = b"alpha\nbeta\ngamma\ndelta\nepsilon\nzeta\neta\ntheta\n";
6628        let b = b"alpha\nbeta\ngamma\ndelta\nepsilon\nzeta\neta\ntheta\nADDED\n";
6629        assert_eq!(blob_similarity(a, b), 88);
6630    }
6631
6632    #[test]
6633    fn similarity_half_rewrite_is_50() {
6634        // 6 lines, last 3 rewritten. Common = l1,l2,l3 = 9 bytes; total each 18.
6635        // score = round(9*100/18) = 50. Verified against `git diff -M`.
6636        let a = b"l1\nl2\nl3\nl4\nl5\nl6\n";
6637        let b = b"l1\nl2\nl3\nX4\nX5\nX6\n";
6638        assert_eq!(blob_similarity(a, b), 50);
6639    }
6640
6641    // ---- tree-diff based inexact detection ----------------------------------
6642
6643    /// Write a blob and return its oid.
6644    fn write_blob(db: &mut FileObjectDatabase, bytes: &[u8]) -> ObjectId {
6645        db.write_object(EncodedObject::new(ObjectType::Blob, bytes.to_vec()))
6646            .expect("test operation should succeed")
6647    }
6648
6649    /// Write a tree from `(name, mode, oid)` entries (sorted by name as git
6650    /// requires) and return its oid.
6651    fn write_tree(db: &mut FileObjectDatabase, entries: &[(&[u8], u32, ObjectId)]) -> ObjectId {
6652        let mut tree_entries: Vec<TreeEntry> = entries
6653            .iter()
6654            .map(|(name, mode, oid)| TreeEntry {
6655                mode: *mode,
6656                name: BString::from(*name),
6657                oid: *oid,
6658            })
6659            .collect();
6660        tree_entries.sort_by(|a, b| a.name.cmp(&b.name));
6661        let tree = Tree {
6662            entries: tree_entries,
6663        };
6664        db.write_object(EncodedObject::new(ObjectType::Tree, tree.write()))
6665            .expect("test operation should succeed")
6666    }
6667
6668    #[test]
6669    fn inexact_rename_detected_with_plausible_score() {
6670        // a.txt (one changed line vs the new b.txt) should be detected as a
6671        // rename with score 75 (see `similarity_one_changed_line_is_75`).
6672        let root = temp_root();
6673        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
6674            .expect("test operation should succeed");
6675        let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
6676
6677        let old = write_blob(&mut db, b"one\ntwo\nthree\nfour\nfive\n");
6678        let new = write_blob(&mut db, b"one\ntwo\nTHREE\nfour\nfive\n");
6679        let left = write_tree(&mut db, &[(b"a.txt", 0o100644, old)]);
6680        let right = write_tree(&mut db, &[(b"b.txt", 0o100644, new)]);
6681
6682        let opts = RenameDetectionOptions {
6683            base: DiffNameStatusOptions {
6684                detect_renames: true,
6685                detect_copies: false,
6686                find_copies_harder: false,
6687                rename_empty: true,
6688            },
6689            detect_inexact: true,
6690            rename_threshold: DEFAULT_RENAME_THRESHOLD,
6691            copy_threshold: DEFAULT_RENAME_THRESHOLD,
6692        };
6693        let entries = diff_name_status_trees_with_rename_options(
6694            &db,
6695            ObjectFormat::Sha1,
6696            &left,
6697            &right,
6698            opts,
6699        )
6700        .expect("test operation should succeed");
6701
6702        assert_eq!(
6703            entries.len(),
6704            1,
6705            "expected a single rename entry: {entries:?}"
6706        );
6707        assert_eq!(entries[0].status, NameStatus::Renamed(75));
6708        assert_eq!(
6709            entries[0].old_path.as_ref().map(|p| p.as_bytes()),
6710            Some(b"a.txt".as_slice())
6711        );
6712        assert_eq!(entries[0].path, b"b.txt");
6713        assert_eq!(entries[0].line(), "R075\ta.txt\tb.txt");
6714        fs::remove_dir_all(root).expect("test operation should succeed");
6715    }
6716
6717    #[test]
6718    fn inexact_rename_below_threshold_not_detected() {
6719        // A half-rewrite scores 50%. With a 60% threshold it must NOT be paired;
6720        // the change shows up as a separate Add + Delete instead.
6721        let root = temp_root();
6722        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
6723            .expect("test operation should succeed");
6724        let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
6725
6726        let old = write_blob(&mut db, b"l1\nl2\nl3\nl4\nl5\nl6\n");
6727        let new = write_blob(&mut db, b"l1\nl2\nl3\nX4\nX5\nX6\n");
6728        let left = write_tree(&mut db, &[(b"a.txt", 0o100644, old)]);
6729        let right = write_tree(&mut db, &[(b"b.txt", 0o100644, new)]);
6730
6731        let opts = RenameDetectionOptions {
6732            base: DiffNameStatusOptions {
6733                detect_renames: true,
6734                detect_copies: false,
6735                find_copies_harder: false,
6736                rename_empty: true,
6737            },
6738            detect_inexact: true,
6739            rename_threshold: 60,
6740            copy_threshold: 60,
6741        };
6742        let entries = diff_name_status_trees_with_rename_options(
6743            &db,
6744            ObjectFormat::Sha1,
6745            &left,
6746            &right,
6747            opts,
6748        )
6749        .expect("test operation should succeed");
6750
6751        let statuses: Vec<_> = entries.iter().map(|e| e.status).collect();
6752        assert!(
6753            statuses.contains(&NameStatus::Added) && statuses.contains(&NameStatus::Deleted),
6754            "expected separate add/delete below threshold, got {entries:?}"
6755        );
6756        assert!(
6757            !statuses.iter().any(|s| matches!(s, NameStatus::Renamed(_))),
6758            "no rename should be reported below threshold: {entries:?}"
6759        );
6760
6761        // Sanity: lowering the threshold to 50 *does* detect it (boundary is
6762        // inclusive), and the score is exactly 50.
6763        let opts_low = RenameDetectionOptions {
6764            rename_threshold: 50,
6765            ..opts
6766        };
6767        let entries_low = diff_name_status_trees_with_rename_options(
6768            &db,
6769            ObjectFormat::Sha1,
6770            &left,
6771            &right,
6772            opts_low,
6773        )
6774        .expect("test operation should succeed");
6775        assert_eq!(entries_low.len(), 1);
6776        assert_eq!(entries_low[0].status, NameStatus::Renamed(50));
6777        fs::remove_dir_all(root).expect("test operation should succeed");
6778    }
6779
6780    #[test]
6781    fn exact_rename_scores_100_and_takes_priority() {
6782        // Identical content moved to a new path is an exact rename: score 100,
6783        // detected even with inexact disabled, and still 100 with it enabled.
6784        let root = temp_root();
6785        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
6786            .expect("test operation should succeed");
6787        let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
6788
6789        let oid = write_blob(&mut db, b"identical\ncontent\nhere\n");
6790        let left = write_tree(&mut db, &[(b"old.txt", 0o100644, oid)]);
6791        let right = write_tree(&mut db, &[(b"new.txt", 0o100644, oid)]);
6792
6793        for inexact in [false, true] {
6794            let opts = RenameDetectionOptions {
6795                base: DiffNameStatusOptions {
6796                    detect_renames: true,
6797                    detect_copies: false,
6798                    find_copies_harder: false,
6799                    rename_empty: true,
6800                },
6801                detect_inexact: inexact,
6802                rename_threshold: DEFAULT_RENAME_THRESHOLD,
6803                copy_threshold: DEFAULT_RENAME_THRESHOLD,
6804            };
6805            let entries = diff_name_status_trees_with_rename_options(
6806                &db,
6807                ObjectFormat::Sha1,
6808                &left,
6809                &right,
6810                opts,
6811            )
6812            .expect("test operation should succeed");
6813            assert_eq!(entries.len(), 1, "inexact={inexact}: {entries:?}");
6814            assert_eq!(entries[0].status, NameStatus::Renamed(100));
6815            assert_eq!(
6816                entries[0].old_path.as_ref().map(|p| p.as_bytes()),
6817                Some(b"old.txt".as_slice())
6818            );
6819            assert_eq!(entries[0].path, b"new.txt");
6820        }
6821        fs::remove_dir_all(root).expect("test operation should succeed");
6822    }
6823
6824    #[test]
6825    fn inexact_copy_detected_with_score() {
6826        // orig.txt is unchanged and a near-copy (one line differs, 80% similar)
6827        // is added. With copy detection + find_copies_harder + inexact, the new
6828        // file is reported as a copy with score 80 (matches `git diff -C
6829        // --find-copies-harder`).
6830        let root = temp_root();
6831        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
6832            .expect("test operation should succeed");
6833        let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
6834
6835        let orig = write_blob(&mut db, b"aaa\nbbb\nccc\nddd\neee\n");
6836        let copy = write_blob(&mut db, b"aaa\nbbb\nccc\nddd\nEEE\n");
6837        let left = write_tree(&mut db, &[(b"orig.txt", 0o100644, orig.clone())]);
6838        let right = write_tree(
6839            &mut db,
6840            &[(b"orig.txt", 0o100644, orig), (b"copy.txt", 0o100644, copy)],
6841        );
6842
6843        let opts = RenameDetectionOptions {
6844            base: DiffNameStatusOptions {
6845                detect_renames: true,
6846                detect_copies: true,
6847                find_copies_harder: true,
6848                rename_empty: true,
6849            },
6850            detect_inexact: true,
6851            rename_threshold: DEFAULT_RENAME_THRESHOLD,
6852            copy_threshold: DEFAULT_RENAME_THRESHOLD,
6853        };
6854        let entries = diff_name_status_trees_with_rename_options(
6855            &db,
6856            ObjectFormat::Sha1,
6857            &left,
6858            &right,
6859            opts,
6860        )
6861        .expect("test operation should succeed");
6862
6863        let copy_entry = entries
6864            .iter()
6865            .find(|e| e.path == b"copy.txt")
6866            .unwrap_or_else(|| panic!("no copy.txt entry: {entries:?}"));
6867        assert_eq!(copy_entry.status, NameStatus::Copied(80));
6868        assert_eq!(
6869            copy_entry.old_path.as_ref().map(|p| p.as_bytes()),
6870            Some(b"orig.txt".as_slice())
6871        );
6872        // The source remains present (copies do not consume the original).
6873        assert!(
6874            entries.iter().all(|e| e.status != NameStatus::Deleted),
6875            "copy must not delete the source: {entries:?}"
6876        );
6877        fs::remove_dir_all(root).expect("test operation should succeed");
6878    }
6879
6880    #[test]
6881    fn inexact_rename_with_small_edit_scores_88() {
6882        // A rename that also appends a single line scores 88% (see
6883        // `similarity_small_append_is_88`).
6884        let root = temp_root();
6885        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
6886            .expect("test operation should succeed");
6887        let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
6888
6889        let old = write_blob(
6890            &mut db,
6891            b"alpha\nbeta\ngamma\ndelta\nepsilon\nzeta\neta\ntheta\n",
6892        );
6893        let new = write_blob(
6894            &mut db,
6895            b"alpha\nbeta\ngamma\ndelta\nepsilon\nzeta\neta\ntheta\nADDED\n",
6896        );
6897        let left = write_tree(&mut db, &[(b"src.txt", 0o100644, old)]);
6898        let right = write_tree(&mut db, &[(b"dst.txt", 0o100644, new)]);
6899
6900        let opts = RenameDetectionOptions::inexact(DiffNameStatusOptions {
6901            detect_renames: true,
6902            detect_copies: false,
6903            find_copies_harder: false,
6904            rename_empty: true,
6905        });
6906        let entries = diff_name_status_trees_with_rename_options(
6907            &db,
6908            ObjectFormat::Sha1,
6909            &left,
6910            &right,
6911            opts,
6912        )
6913        .expect("test operation should succeed");
6914
6915        assert_eq!(entries.len(), 1, "{entries:?}");
6916        assert_eq!(entries[0].status, NameStatus::Renamed(88));
6917        assert_eq!(
6918            entries[0].old_path.as_ref().map(|p| p.as_bytes()),
6919            Some(b"src.txt".as_slice())
6920        );
6921        assert_eq!(entries[0].path, b"dst.txt");
6922        fs::remove_dir_all(root).expect("test operation should succeed");
6923    }
6924
6925    #[test]
6926    fn inexact_disabled_default_preserves_exact_only_behavior() {
6927        // With RenameDetectionOptions::default() (detect_inexact == false), a
6928        // similar-but-not-identical pair is NOT a rename — identical to the
6929        // legacy exact-only path. Defaults must not silently turn on inexact.
6930        assert!(!RenameDetectionOptions::default().detect_inexact);
6931        assert_eq!(
6932            RenameDetectionOptions::default().rename_threshold,
6933            DEFAULT_RENAME_THRESHOLD
6934        );
6935
6936        let root = temp_root();
6937        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
6938            .expect("test operation should succeed");
6939        let mut db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
6940
6941        let old = write_blob(&mut db, b"one\ntwo\nthree\nfour\nfive\n");
6942        let new = write_blob(&mut db, b"one\ntwo\nTHREE\nfour\nfive\n");
6943        let left = write_tree(&mut db, &[(b"a.txt", 0o100644, old)]);
6944        let right = write_tree(&mut db, &[(b"b.txt", 0o100644, new)]);
6945
6946        let entries = diff_name_status_trees_with_rename_options(
6947            &db,
6948            ObjectFormat::Sha1,
6949            &left,
6950            &right,
6951            RenameDetectionOptions::default(),
6952        )
6953        .expect("test operation should succeed");
6954        let statuses: Vec<_> = entries.iter().map(|e| e.status).collect();
6955        assert!(statuses.contains(&NameStatus::Added));
6956        assert!(statuses.contains(&NameStatus::Deleted));
6957        assert!(!statuses.iter().any(|s| matches!(s, NameStatus::Renamed(_))));
6958        fs::remove_dir_all(root).expect("test operation should succeed");
6959    }
6960
6961    // ---- patience / histogram diff tests ------------------------------------
6962
6963    /// Apply an edit script to `old` and return the reconstructed `new` bytes.
6964    ///
6965    /// Panics (test-only) if the script ever references a line out of range or
6966    /// claims a line is `Equal` when the corresponding `old`/`new` lines differ
6967    /// — that is exactly the invariant a correct LCS diff must uphold.
6968    fn apply_ops(old: &[DiffLine<'_>], new: &[DiffLine<'_>], ops: &[DiffOp]) -> Vec<u8> {
6969        let mut oi = 0usize;
6970        let mut ni = 0usize;
6971        let mut rebuilt: Vec<u8> = Vec::new();
6972        for op in ops {
6973            match *op {
6974                DiffOp::Equal(n) => {
6975                    for _ in 0..n {
6976                        // Equal must mean genuinely-equal lines (LCS-correct).
6977                        assert_eq!(old[oi], new[ni], "Equal op covered unequal lines");
6978                        rebuilt.extend_from_slice(old[oi].content);
6979                        oi += 1;
6980                        ni += 1;
6981                    }
6982                }
6983                DiffOp::Delete(n) => oi += n,
6984                DiffOp::Insert(n) => {
6985                    for _ in 0..n {
6986                        rebuilt.extend_from_slice(new[ni].content);
6987                        ni += 1;
6988                    }
6989                }
6990            }
6991        }
6992        // The script must consume every line of both sides exactly once.
6993        assert_eq!(oi, old.len(), "script did not consume all of old");
6994        assert_eq!(ni, new.len(), "script did not consume all of new");
6995        rebuilt
6996    }
6997
6998    /// Assert that `ops` is a valid LCS-correct script: it reconstructs `new`
6999    /// from `old`, and consecutive ops are coalesced (no two same-kind in a row).
7000    fn assert_valid_script(old_bytes: &[u8], new_bytes: &[u8], ops: &[DiffOp]) {
7001        let old = split_lines(old_bytes);
7002        let new = split_lines(new_bytes);
7003        let rebuilt = apply_ops(&old, &new, ops);
7004        assert_eq!(rebuilt, new_bytes, "script did not rebuild new");
7005        for pair in ops.windows(2) {
7006            let same_kind = matches!(
7007                (pair[0], pair[1]),
7008                (DiffOp::Equal(_), DiffOp::Equal(_))
7009                    | (DiffOp::Delete(_), DiffOp::Delete(_))
7010                    | (DiffOp::Insert(_), DiffOp::Insert(_))
7011            );
7012            assert!(!same_kind, "ops not coalesced: {:?}", ops);
7013        }
7014    }
7015
7016    /// Run all three real algorithms over a byte pair and assert each produces a
7017    /// valid, coalesced, LCS-correct script.
7018    fn check_all_algorithms(old_bytes: &[u8], new_bytes: &[u8]) {
7019        let old = split_lines(old_bytes);
7020        let new = split_lines(new_bytes);
7021        for algo in [
7022            DiffAlgorithm::Myers,
7023            DiffAlgorithm::Minimal,
7024            DiffAlgorithm::Patience,
7025            DiffAlgorithm::Histogram,
7026        ] {
7027            let ops = diff_lines_with_algorithm(&old, &new, algo);
7028            assert_valid_script(old_bytes, new_bytes, &ops);
7029        }
7030    }
7031
7032    #[test]
7033    fn patience_and_histogram_match_myers_on_simple_cases() {
7034        // For localized single-line edits with no repeated lines, all three
7035        // algorithms agree with the canonical Myers script.
7036        let cases: &[(&[u8], &[u8], Vec<DiffOp>)] = &[
7037            (
7038                b"a\nb\nc\n",
7039                b"a\nx\nc\n",
7040                vec![
7041                    DiffOp::Equal(1),
7042                    DiffOp::Delete(1),
7043                    DiffOp::Insert(1),
7044                    DiffOp::Equal(1),
7045                ],
7046            ),
7047            (b"a\nb\nc\n", b"a\nb\nc\n", vec![DiffOp::Equal(3)]),
7048            (b"", b"a\nb\n", vec![DiffOp::Insert(2)]),
7049            (b"a\nb\n", b"", vec![DiffOp::Delete(2)]),
7050            (
7051                b"a\nb\nc\nd\n",
7052                b"a\nc\nd\n",
7053                vec![DiffOp::Equal(1), DiffOp::Delete(1), DiffOp::Equal(2)],
7054            ),
7055        ];
7056        for (old_bytes, new_bytes, expected) in cases {
7057            let old = split_lines(old_bytes);
7058            let new = split_lines(new_bytes);
7059            assert_eq!(&patience_diff_lines(&old, &new), expected);
7060            assert_eq!(&histogram_diff_lines(&old, &new), expected);
7061            assert_eq!(&myers_diff_lines(&old, &new), expected);
7062        }
7063    }
7064
7065    #[test]
7066    fn patience_handles_both_empty() {
7067        let empty = split_lines(b"");
7068        assert!(patience_diff_lines(&empty, &empty).is_empty());
7069        assert!(histogram_diff_lines(&empty, &empty).is_empty());
7070    }
7071
7072    #[test]
7073    fn patience_aligns_unique_anchors_across_moved_block() {
7074        // Reordering two unique blocks: patience anchors on the unique lines and
7075        // produces a delete-then-insert (or insert-then-delete) that still
7076        // reconstructs `new`. Validity is the contract; exact shape may differ
7077        // from Myers, so we only assert reconstruction here.
7078        check_all_algorithms(
7079            b"alpha\nbeta\ngamma\ndelta\n",
7080            b"gamma\ndelta\nalpha\nbeta\n",
7081        );
7082    }
7083
7084    #[test]
7085    fn histogram_differs_from_myers_keeping_block_contiguous() {
7086        // A case where histogram diverges from Myers. With old = "b a" and a new
7087        // that surrounds an intact "b a" with inserted "b" lines, Myers splits
7088        // the common run into two single-line Equals (matching the leading and
7089        // trailing `b`/`a` separately), while histogram anchors on the rare line
7090        // and keeps the original two lines together as one Equal(2) block.
7091        let old = b"b\na\n";
7092        let new = b"a\nb\nb\na\nb\n";
7093        let old_l = split_lines(old);
7094        let new_l = split_lines(new);
7095
7096        let myers = myers_diff_lines(&old_l, &new_l);
7097        let histogram = histogram_diff_lines(&old_l, &new_l);
7098
7099        // All variants must reconstruct `new`.
7100        assert_valid_script(old, new, &myers);
7101        assert_valid_script(old, new, &histogram);
7102
7103        // Exact, pinned shapes: Myers interleaves single-line equals; histogram
7104        // keeps "b\na\n" contiguous.
7105        assert_eq!(
7106            myers,
7107            vec![
7108                DiffOp::Insert(1),
7109                DiffOp::Equal(1),
7110                DiffOp::Insert(1),
7111                DiffOp::Equal(1),
7112                DiffOp::Insert(1),
7113            ]
7114        );
7115        assert_eq!(
7116            histogram,
7117            vec![DiffOp::Insert(2), DiffOp::Equal(2), DiffOp::Insert(1)]
7118        );
7119        // The contract the task calls out: histogram differs from Myers here.
7120        assert_ne!(myers, histogram);
7121    }
7122
7123    #[test]
7124    fn patience_differs_from_myers_on_repeated_lines() {
7125        // A case where patience diverges from Myers. old = "b a", new = "a a b".
7126        // Myers deletes the leading `b` and appends; patience anchors on the
7127        // single unique-in-both line `a`... but `a` occurs twice in `new`, so it
7128        // is NOT unique there; patience instead falls through to its recursive
7129        // structure and produces the mirror script. Both reconstruct `new`.
7130        let old = b"b\na\n";
7131        let new = b"a\na\nb\n";
7132        let old_l = split_lines(old);
7133        let new_l = split_lines(new);
7134
7135        let myers = myers_diff_lines(&old_l, &new_l);
7136        let patience = patience_diff_lines(&old_l, &new_l);
7137
7138        assert_valid_script(old, new, &myers);
7139        assert_valid_script(old, new, &patience);
7140
7141        assert_eq!(
7142            myers,
7143            vec![DiffOp::Delete(1), DiffOp::Equal(1), DiffOp::Insert(2)]
7144        );
7145        assert_eq!(
7146            patience,
7147            vec![DiffOp::Insert(2), DiffOp::Equal(1), DiffOp::Delete(1)]
7148        );
7149        assert_ne!(myers, patience);
7150    }
7151
7152    #[test]
7153    fn realistic_function_insertion_all_valid() {
7154        // A more lifelike example: a new function is inserted ahead of an
7155        // existing one that shares structural lines ("}", blank line). We don't
7156        // pin exact shapes (they depend on trim interactions) but every
7157        // algorithm must produce a valid LCS-correct script.
7158        let old = b"int f() {\n    return 1;\n}\n";
7159        let new = b"int g() {\n    return 2;\n}\n\nint f() {\n    return 1;\n}\n";
7160        check_all_algorithms(old, new);
7161    }
7162
7163    #[test]
7164    fn histogram_anchors_on_rare_line_when_no_unique_line_exists() {
7165        // No line is globally unique on both sides (every distinct line repeats
7166        // on at least one side), so plain patience would fall straight to Myers.
7167        // Histogram still anchors on the least-frequent shared line. We assert
7168        // both produce valid, reconstructing scripts.
7169        check_all_algorithms(b"x\nx\nmid\nx\nx\n", b"x\nmid\nx\nx\nx\n");
7170        check_all_algorithms(
7171            b"dup\ndup\nrare\ndup\ndup\n",
7172            b"dup\nrare\ndup\ndup\ndup\ndup\n",
7173        );
7174    }
7175
7176    #[test]
7177    fn all_algorithms_treat_missing_final_newline_as_change() {
7178        // "b" (no newline) vs "b\n" is a real change for every algorithm.
7179        let old = split_lines(b"a\nb");
7180        let new = split_lines(b"a\nb\n");
7181        for algo in [
7182            DiffAlgorithm::Myers,
7183            DiffAlgorithm::Minimal,
7184            DiffAlgorithm::Patience,
7185            DiffAlgorithm::Histogram,
7186        ] {
7187            let ops = diff_lines_with_algorithm(&old, &new, algo);
7188            assert_eq!(
7189                ops,
7190                vec![DiffOp::Equal(1), DiffOp::Delete(1), DiffOp::Insert(1)],
7191                "algorithm {:?} mishandled missing final newline",
7192                algo
7193            );
7194        }
7195    }
7196
7197    #[test]
7198    fn dispatcher_routes_each_variant() {
7199        let old = split_lines(b"a\nb\nc\n");
7200        let new = split_lines(b"a\nx\nc\n");
7201        assert_eq!(
7202            diff_lines_with_algorithm(&old, &new, DiffAlgorithm::Myers),
7203            myers_diff_lines(&old, &new)
7204        );
7205        // Minimal aliases Myers (the Myers search is already a minimal SES).
7206        assert_eq!(
7207            diff_lines_with_algorithm(&old, &new, DiffAlgorithm::Minimal),
7208            myers_diff_lines(&old, &new)
7209        );
7210        assert_eq!(
7211            diff_lines_with_algorithm(&old, &new, DiffAlgorithm::Patience),
7212            patience_diff_lines(&old, &new)
7213        );
7214        assert_eq!(
7215            diff_lines_with_algorithm(&old, &new, DiffAlgorithm::Histogram),
7216            histogram_diff_lines(&old, &new)
7217        );
7218    }
7219
7220    #[test]
7221    fn patience_recurses_into_gaps_between_anchors() {
7222        // Unique anchors `head`/`tail` bracket an inner edit; patience must
7223        // recurse into the middle gap and diff `mid1`->`MID` there.
7224        let old = b"head\nmid1\nmid2\ntail\n";
7225        let new = b"head\nMID\nmid2\ntail\n";
7226        let old_l = split_lines(old);
7227        let new_l = split_lines(new);
7228        let ops = patience_diff_lines(&old_l, &new_l);
7229        assert_eq!(
7230            ops,
7231            vec![
7232                DiffOp::Equal(1),
7233                DiffOp::Delete(1),
7234                DiffOp::Insert(1),
7235                DiffOp::Equal(2),
7236            ]
7237        );
7238        assert_valid_script(old, new, &ops);
7239    }
7240
7241    #[test]
7242    fn patience_falls_back_to_myers_with_no_unique_lines() {
7243        // Every line is duplicated within its own side, so there are no
7244        // unique-in-both anchors; patience must defer to Myers but still return
7245        // a valid script.
7246        let old = b"a\na\nb\nb\n";
7247        let new = b"a\na\na\nb\n";
7248        let old_l = split_lines(old);
7249        let new_l = split_lines(new);
7250        let ops = patience_diff_lines(&old_l, &new_l);
7251        // The contract for the fallback path is validity, not minimality: after
7252        // the greedy prefix/suffix trim (which git's patience does too) the
7253        // leftover block is handed to Myers, and the whole script must still
7254        // reconstruct `new`.
7255        assert_valid_script(old, new, &ops);
7256    }
7257
7258    #[test]
7259    fn algorithms_agree_with_myers_when_all_lines_distinct() {
7260        // When every line is globally unique, patience's anchor set is the full
7261        // LCS, so patience and histogram must produce exactly the Myers script.
7262        let cases: &[(&[u8], &[u8])] = &[
7263            (b"a\nb\nc\nd\ne\n", b"a\nc\nd\nf\ne\n"),
7264            (b"1\n2\n3\n4\n5\n6\n", b"1\n3\n2\n4\n6\n5\n"),
7265            (b"q\nw\ne\nr\nt\ny\n", b"q\nw\nx\nr\nt\nz\n"),
7266        ];
7267        for (old_bytes, new_bytes) in cases {
7268            let old = split_lines(old_bytes);
7269            let new = split_lines(new_bytes);
7270            let myers = myers_diff_lines(&old, &new);
7271            assert_eq!(
7272                patience_diff_lines(&old, &new),
7273                myers,
7274                "patience must equal Myers when all lines are distinct: {:?}",
7275                old_bytes
7276            );
7277            assert_eq!(
7278                histogram_diff_lines(&old, &new),
7279                myers,
7280                "histogram must equal Myers when all lines are distinct: {:?}",
7281                old_bytes
7282            );
7283        }
7284    }
7285
7286    #[test]
7287    fn fuzz_all_algorithms_reconstruct_new() {
7288        // A small deterministic LCG drives many random small inputs over a tiny
7289        // alphabet (so lines repeat and exercise the anchor/fallback paths).
7290        // Every algorithm must produce a valid LCS-correct script for each pair.
7291        let mut state: u64 = 0x9E37_79B9_7F4A_7C15;
7292        let mut next = || {
7293            state = state
7294                .wrapping_mul(6364136223846793005)
7295                .wrapping_add(1442695040888963407);
7296            (state >> 33) as u32
7297        };
7298        let alphabet = [b"a\n", b"b\n", b"c\n", b"d\n"];
7299        let build = |rng: &mut dyn FnMut() -> u32| -> Vec<u8> {
7300            let len = (rng() % 9) as usize; // 0..=8 lines
7301            let mut buf = Vec::new();
7302            for _ in 0..len {
7303                let pick = (rng() % alphabet.len() as u32) as usize;
7304                buf.extend_from_slice(alphabet[pick]);
7305            }
7306            // Occasionally drop the trailing newline to exercise that path.
7307            if !buf.is_empty() && rng().is_multiple_of(4) {
7308                buf.pop();
7309            }
7310            buf
7311        };
7312        for _ in 0..400 {
7313            let old_bytes = build(&mut next);
7314            let new_bytes = build(&mut next);
7315            check_all_algorithms(&old_bytes, &new_bytes);
7316        }
7317    }
7318
7319    #[test]
7320    fn exhaustive_small_inputs_all_algorithms_reconstruct() {
7321        // Brute force over a 3-symbol alphabet up to 5 lines per side: every
7322        // algorithm must produce a valid LCS-correct script for *every* pair.
7323        // This is the strongest correctness net for the recursion/fallback
7324        // paths; apply_ops asserts both reconstruction and Equal-correctness.
7325        let syms = [b"a\n".to_vec(), b"b\n".to_vec(), b"c\n".to_vec()];
7326        let make = |n: usize, mut code: usize| -> Vec<u8> {
7327            let mut v = Vec::new();
7328            for _ in 0..n {
7329                v.extend_from_slice(&syms[code % 3]);
7330                code /= 3;
7331            }
7332            v
7333        };
7334        for la in 0..=5usize {
7335            for lb in 0..=5usize {
7336                for ca in 0..3usize.pow(la as u32) {
7337                    for cb in 0..3usize.pow(lb as u32) {
7338                        let ob = make(la, ca);
7339                        let nb = make(lb, cb);
7340                        let ol = split_lines(&ob);
7341                        let nl = split_lines(&nb);
7342                        assert_eq!(apply_ops(&ol, &nl, &myers_diff_lines(&ol, &nl)), nb);
7343                        assert_eq!(apply_ops(&ol, &nl, &patience_diff_lines(&ol, &nl)), nb);
7344                        assert_eq!(apply_ops(&ol, &nl, &histogram_diff_lines(&ol, &nl)), nb);
7345                    }
7346                }
7347            }
7348        }
7349    }
7350
7351    #[test]
7352    fn fuzz_distinct_lines_patience_histogram_equal_myers() {
7353        // When inputs are permutations/subsequences of globally-unique lines,
7354        // patience and histogram must match Myers exactly. We generate sequences
7355        // of distinct tokens to guarantee global uniqueness on both sides.
7356        let mut state: u64 = 0x1234_5678_9ABC_DEF0;
7357        let mut next = || {
7358            state = state
7359                .wrapping_mul(6364136223846793005)
7360                .wrapping_add(1442695040888963407);
7361            (state >> 33) as u32
7362        };
7363        for _ in 0..200 {
7364            // Random subset+order of tokens "0\n".."9\n" for each side; tokens
7365            // are globally unique, so any common line is unique in both.
7366            let pick_subseq = |rng: &mut dyn FnMut() -> u32| -> Vec<u8> {
7367                let mut buf = Vec::new();
7368                for t in 0..10u32 {
7369                    if rng().is_multiple_of(2) {
7370                        buf.extend_from_slice(format!("{t}\n").as_bytes());
7371                    }
7372                }
7373                buf
7374            };
7375            let old_bytes = pick_subseq(&mut next);
7376            let new_bytes = pick_subseq(&mut next);
7377            let old = split_lines(&old_bytes);
7378            let new = split_lines(&new_bytes);
7379            let myers = myers_diff_lines(&old, &new);
7380            assert_eq!(patience_diff_lines(&old, &new), myers);
7381            assert_eq!(histogram_diff_lines(&old, &new), myers);
7382        }
7383    }
7384
7385    // ===================================================================
7386    // Subtree-skip-by-OID tree-diff optimization: the pruned simultaneous
7387    // walk (`changed_tree_entries`) must produce byte-identical name-status
7388    // output to the legacy "flatten both sides fully" walk
7389    // (`collect_full_tree_pair`) on every representative diff shape.
7390    // ===================================================================
7391
7392    /// Format a name-status result into stable, comparable lines.
7393    fn status_lines(entries: &[NameStatusEntry]) -> Vec<String> {
7394        entries.iter().map(|entry| entry.line()).collect()
7395    }
7396
7397    /// Assert the pruned walk and the full flatten agree, both as raw map diffs
7398    /// and through the public tree-diff entry points, for the given options.
7399    fn assert_tree_diff_matches_full(
7400        db: &FileObjectDatabase,
7401        left: &ObjectId,
7402        right: &ObjectId,
7403        options: DiffNameStatusOptions,
7404    ) {
7405        // Reference ("old") behaviour: fully flatten both trees, then diff.
7406        let (full_left, full_right) = collect_full_tree_pair(db, ObjectFormat::Sha1, left, right)
7407            .expect("test operation should succeed");
7408        let reference = diff_name_status_maps(
7409            &full_left,
7410            &full_right,
7411            full_left.keys().chain(full_right.keys()),
7412            options,
7413        )
7414        .expect("test operation should succeed");
7415
7416        // Optimized ("new") behaviour: prune identical subtrees, then diff.
7417        let (pruned_left, pruned_right) = changed_tree_entries(db, ObjectFormat::Sha1, left, right)
7418            .expect("test operation should succeed");
7419        let pruned = diff_name_status_maps(
7420            &pruned_left,
7421            &pruned_right,
7422            pruned_left.keys().chain(pruned_right.keys()),
7423            options,
7424        )
7425        .expect("test operation should succeed");
7426
7427        assert_eq!(
7428            status_lines(&reference),
7429            status_lines(&pruned),
7430            "pruned map diff diverged from full map diff for {options:?}"
7431        );
7432
7433        // And the public entry point (which itself selects pruned vs full) must
7434        // match the reference too.
7435        let public =
7436            diff_name_status_trees_with_options(db, ObjectFormat::Sha1, left, right, options)
7437                .expect("test operation should succeed");
7438        assert_eq!(
7439            status_lines(&reference),
7440            status_lines(&public),
7441            "public tree diff diverged from full map diff for {options:?}"
7442        );
7443
7444        // The pruned maps must be a subset of the full maps and must contain
7445        // exactly the paths that actually changed (no identical entries leak in,
7446        // no changed entries get dropped).
7447        for (path, tracked) in &pruned_left {
7448            assert_eq!(
7449                full_left.get(path),
7450                Some(tracked),
7451                "pruned left entry not present (or differs) in full left map: {:?}",
7452                String::from_utf8_lossy(path)
7453            );
7454        }
7455        for (path, tracked) in &pruned_right {
7456            assert_eq!(
7457                full_right.get(path),
7458                Some(tracked),
7459                "pruned right entry not present (or differs) in full right map: {:?}",
7460                String::from_utf8_lossy(path)
7461            );
7462        }
7463        // Every path the full diff reports as changed must survive pruning on
7464        // whichever side(s) it lives.
7465        for entry in &reference {
7466            let path = entry.path.as_bytes();
7467            match entry.status {
7468                NameStatus::Added => assert!(
7469                    pruned_right.contains_key(path),
7470                    "added path dropped by pruning: {:?}",
7471                    String::from_utf8_lossy(path)
7472                ),
7473                NameStatus::Deleted => assert!(
7474                    pruned_left.contains_key(path),
7475                    "deleted path dropped by pruning: {:?}",
7476                    String::from_utf8_lossy(path)
7477                ),
7478                NameStatus::Modified => {
7479                    assert!(
7480                        pruned_left.contains_key(path) && pruned_right.contains_key(path),
7481                        "modified path dropped by pruning: {:?}",
7482                        String::from_utf8_lossy(path)
7483                    );
7484                }
7485                _ => {}
7486            }
7487        }
7488    }
7489
7490    /// Run the equivalence assertion across the option matrix that the pruned
7491    /// path serves (everything except `--find-copies-harder`, which uses the
7492    /// full maps and is checked separately).
7493    fn assert_tree_diff_matches_full_all_modes(
7494        db: &FileObjectDatabase,
7495        left: &ObjectId,
7496        right: &ObjectId,
7497    ) {
7498        for detect_renames in [false, true] {
7499            for detect_copies in [false, true] {
7500                let options = DiffNameStatusOptions {
7501                    detect_renames,
7502                    detect_copies,
7503                    find_copies_harder: false,
7504                    rename_empty: true,
7505                };
7506                assert_tree_diff_matches_full(db, left, right, options);
7507            }
7508        }
7509    }
7510
7511    /// Build a DB pre-seeded with a fixed bank of blobs for the structural tests.
7512    fn structural_db() -> (PathBuf, FileObjectDatabase) {
7513        let root = temp_root();
7514        let layout = RepositoryLayout::init_at(&root, ObjectFormat::Sha1, false)
7515            .expect("test operation should succeed");
7516        let db = FileObjectDatabase::from_git_dir(&layout.git_dir, ObjectFormat::Sha1);
7517        (root, db)
7518    }
7519
7520    #[test]
7521    fn pruned_walk_skips_identical_subtree_and_matches_full() {
7522        // A large shared subtree (`shared/`) is byte-identical on both sides; the
7523        // only change lives in `app/`. The pruned walk must skip `shared/`
7524        // entirely yet still produce the exact same diff as flattening it.
7525        let (root, mut db) = structural_db();
7526
7527        // shared/ — identical on both sides, several nested files.
7528        let s1 = write_blob(&mut db, b"shared one\n");
7529        let s2 = write_blob(&mut db, b"shared two\n");
7530        let s3 = write_blob(&mut db, b"deep nested\n");
7531        let shared_inner = write_tree(&mut db, &[(b"c.txt", 0o100644, s3.clone())]);
7532        let shared = write_tree(
7533            &mut db,
7534            &[
7535                (b"a.txt", 0o100644, s1.clone()),
7536                (b"b.txt", 0o100644, s2.clone()),
7537                (b"inner", 0o040000, shared_inner.clone()),
7538            ],
7539        );
7540
7541        // app/ — one file modified between sides.
7542        let app_old = write_blob(&mut db, b"version 1\n");
7543        let app_new = write_blob(&mut db, b"version 2\n");
7544        let app_left = write_tree(&mut db, &[(b"main.rs", 0o100644, app_old)]);
7545        let app_right = write_tree(&mut db, &[(b"main.rs", 0o100644, app_new)]);
7546
7547        let left = write_tree(
7548            &mut db,
7549            &[
7550                (b"app", 0o040000, app_left),
7551                (b"shared", 0o040000, shared.clone()),
7552            ],
7553        );
7554        let right = write_tree(
7555            &mut db,
7556            &[(b"app", 0o040000, app_right), (b"shared", 0o040000, shared)],
7557        );
7558
7559        // Sanity: the only change is the nested app/main.rs modification.
7560        let (pruned_left, pruned_right) =
7561            changed_tree_entries(&db, ObjectFormat::Sha1, &left, &right)
7562                .expect("test operation should succeed");
7563        assert_eq!(
7564            pruned_left.keys().collect::<Vec<_>>(),
7565            vec![&b"app/main.rs".to_vec()],
7566            "pruning should leave only the changed path on the left"
7567        );
7568        assert_eq!(
7569            pruned_right.keys().collect::<Vec<_>>(),
7570            vec![&b"app/main.rs".to_vec()],
7571            "pruning should leave only the changed path on the right"
7572        );
7573        assert!(
7574            !pruned_left.contains_key(b"shared/a.txt".as_slice()),
7575            "identical shared subtree must not appear in pruned maps"
7576        );
7577
7578        assert_tree_diff_matches_full_all_modes(&db, &left, &right);
7579        fs::remove_dir_all(root).expect("test operation should succeed");
7580    }
7581
7582    #[test]
7583    fn pruned_walk_matches_full_for_add_delete_modify_nested() {
7584        // Mixed shape: a top-level add, a top-level delete, a nested modify, and
7585        // an untouched nested subtree that must be skipped.
7586        let (root, mut db) = structural_db();
7587
7588        let keep = write_blob(&mut db, b"unchanged\n");
7589        let untouched_dir = write_tree(&mut db, &[(b"keep.txt", 0o100644, keep.clone())]);
7590
7591        let nested_old = write_blob(&mut db, b"nested old\n");
7592        let nested_new = write_blob(&mut db, b"nested new\n");
7593        let dir_left = write_tree(
7594            &mut db,
7595            &[
7596                (b"changed.txt", 0o100644, nested_old),
7597                (b"stable.txt", 0o100644, keep.clone()),
7598            ],
7599        );
7600        let dir_right = write_tree(
7601            &mut db,
7602            &[
7603                (b"changed.txt", 0o100644, nested_new),
7604                (b"stable.txt", 0o100644, keep.clone()),
7605            ],
7606        );
7607
7608        let only_left = write_blob(&mut db, b"will be deleted\n");
7609        let only_right = write_blob(&mut db, b"freshly added\n");
7610
7611        let left = write_tree(
7612            &mut db,
7613            &[
7614                (b"dir", 0o040000, dir_left),
7615                (b"gone.txt", 0o100644, only_left),
7616                (b"untouched", 0o040000, untouched_dir.clone()),
7617            ],
7618        );
7619        let right = write_tree(
7620            &mut db,
7621            &[
7622                (b"dir", 0o040000, dir_right),
7623                (b"new.txt", 0o100644, only_right),
7624                (b"untouched", 0o040000, untouched_dir),
7625            ],
7626        );
7627
7628        let entries = diff_name_status_trees_with_options(
7629            &db,
7630            ObjectFormat::Sha1,
7631            &left,
7632            &right,
7633            DiffNameStatusOptions {
7634                detect_renames: false,
7635                detect_copies: false,
7636                find_copies_harder: false,
7637                rename_empty: true,
7638            },
7639        )
7640        .expect("test operation should succeed");
7641        assert_eq!(
7642            status_lines(&entries),
7643            vec![
7644                "M\tdir/changed.txt".to_string(),
7645                "D\tgone.txt".to_string(),
7646                "A\tnew.txt".to_string(),
7647            ],
7648            "unexpected raw status for mixed nested diff"
7649        );
7650
7651        assert_tree_diff_matches_full_all_modes(&db, &left, &right);
7652        fs::remove_dir_all(root).expect("test operation should succeed");
7653    }
7654
7655    #[test]
7656    fn pruned_walk_matches_full_for_rename_across_dirs() {
7657        // An exact rename (same blob oid) moving between directories. Rename
7658        // detection runs on the pruned add/delete set and must match the full
7659        // walk's result exactly.
7660        let (root, mut db) = structural_db();
7661
7662        let moved = write_blob(&mut db, b"i get moved across directories\n");
7663        let companion = write_blob(&mut db, b"i stay put\n");
7664        let stable_dir = write_tree(&mut db, &[(b"keep.txt", 0o100644, companion.clone())]);
7665
7666        let src_dir = write_tree(&mut db, &[(b"file.txt", 0o100644, moved.clone())]);
7667        let dst_dir = write_tree(&mut db, &[(b"renamed.txt", 0o100644, moved.clone())]);
7668
7669        let left = write_tree(
7670            &mut db,
7671            &[
7672                (b"src", 0o040000, src_dir),
7673                (b"stable", 0o040000, stable_dir.clone()),
7674            ],
7675        );
7676        let right = write_tree(
7677            &mut db,
7678            &[
7679                (b"dst", 0o040000, dst_dir),
7680                (b"stable", 0o040000, stable_dir),
7681            ],
7682        );
7683
7684        let entries = diff_name_status_trees_with_options(
7685            &db,
7686            ObjectFormat::Sha1,
7687            &left,
7688            &right,
7689            DiffNameStatusOptions {
7690                detect_renames: true,
7691                detect_copies: false,
7692                find_copies_harder: false,
7693                rename_empty: true,
7694            },
7695        )
7696        .expect("test operation should succeed");
7697        assert_eq!(
7698            status_lines(&entries),
7699            vec!["R100\tsrc/file.txt\tdst/renamed.txt".to_string()],
7700            "rename across dirs should be detected on pruned set"
7701        );
7702
7703        assert_tree_diff_matches_full_all_modes(&db, &left, &right);
7704        fs::remove_dir_all(root).expect("test operation should succeed");
7705    }
7706
7707    #[test]
7708    fn pruned_walk_matches_full_for_binary_and_mode_change() {
7709        // Binary blob modification plus an executable-bit (mode) change on an
7710        // otherwise-identical blob. Mode-only changes must still register as a
7711        // Modify (the pruned walk compares mode + oid, like the full map).
7712        let (root, mut db) = structural_db();
7713
7714        let bin_old = write_blob(&mut db, &[0u8, 159, 146, 150, 0, 255, 1, 2, 3]);
7715        let bin_new = write_blob(&mut db, &[0u8, 159, 146, 150, 0, 254, 9, 8, 7]);
7716        let script = write_blob(&mut db, b"#!/bin/sh\necho hi\n");
7717
7718        let left = write_tree(
7719            &mut db,
7720            &[
7721                (b"image.bin", 0o100644, bin_old),
7722                (b"run.sh", 0o100644, script.clone()),
7723            ],
7724        );
7725        let right = write_tree(
7726            &mut db,
7727            &[
7728                (b"image.bin", 0o100644, bin_new),
7729                // same blob oid, executable bit flipped on
7730                (b"run.sh", 0o100755, script),
7731            ],
7732        );
7733
7734        let entries = diff_name_status_trees_with_options(
7735            &db,
7736            ObjectFormat::Sha1,
7737            &left,
7738            &right,
7739            DiffNameStatusOptions {
7740                detect_renames: false,
7741                detect_copies: false,
7742                find_copies_harder: false,
7743                rename_empty: true,
7744            },
7745        )
7746        .expect("test operation should succeed");
7747        assert_eq!(
7748            status_lines(&entries),
7749            vec!["M\timage.bin".to_string(), "M\trun.sh".to_string()],
7750            "binary edit and mode-only change should both be Modify"
7751        );
7752
7753        assert_tree_diff_matches_full_all_modes(&db, &left, &right);
7754        fs::remove_dir_all(root).expect("test operation should succeed");
7755    }
7756
7757    #[test]
7758    fn pruned_walk_matches_full_for_dir_replaced_by_file() {
7759        // A name that is a directory on the left and a regular file on the right
7760        // (and vice versa). The flattened paths differ (`thing/...` vs `thing`),
7761        // so the pruned walk must treat them as unrelated add/delete pairs,
7762        // exactly as the full flatten does.
7763        let (root, mut db) = structural_db();
7764
7765        let inner_a = write_blob(&mut db, b"inner a\n");
7766        let inner_b = write_blob(&mut db, b"inner b\n");
7767        let thing_dir = write_tree(
7768            &mut db,
7769            &[(b"a.txt", 0o100644, inner_a), (b"b.txt", 0o100644, inner_b)],
7770        );
7771        let thing_file = write_blob(&mut db, b"now i am a file\n");
7772
7773        // other/ is a file on the left, a directory on the right.
7774        let other_file = write_blob(&mut db, b"i was a file\n");
7775        let other_inner = write_blob(&mut db, b"now nested\n");
7776        let other_dir = write_tree(&mut db, &[(b"x.txt", 0o100644, other_inner)]);
7777
7778        let left = write_tree(
7779            &mut db,
7780            &[
7781                (b"other", 0o100644, other_file),
7782                (b"thing", 0o040000, thing_dir),
7783            ],
7784        );
7785        let right = write_tree(
7786            &mut db,
7787            &[
7788                (b"other", 0o040000, other_dir),
7789                (b"thing", 0o100644, thing_file),
7790            ],
7791        );
7792
7793        let entries = diff_name_status_trees_with_options(
7794            &db,
7795            ObjectFormat::Sha1,
7796            &left,
7797            &right,
7798            DiffNameStatusOptions {
7799                detect_renames: false,
7800                detect_copies: false,
7801                find_copies_harder: false,
7802                rename_empty: true,
7803            },
7804        )
7805        .expect("test operation should succeed");
7806        assert_eq!(
7807            status_lines(&entries),
7808            vec![
7809                "D\tother".to_string(),
7810                "A\tother/x.txt".to_string(),
7811                "A\tthing".to_string(),
7812                "D\tthing/a.txt".to_string(),
7813                "D\tthing/b.txt".to_string(),
7814            ],
7815            "dir<->file swap should flatten to independent adds/deletes"
7816        );
7817
7818        assert_tree_diff_matches_full_all_modes(&db, &left, &right);
7819        fs::remove_dir_all(root).expect("test operation should succeed");
7820    }
7821
7822    #[test]
7823    fn pruned_walk_matches_full_for_identical_trees() {
7824        // Two identical root trees: zero changes, and the root must be skipped
7825        // without reading anything below it.
7826        let (root, mut db) = structural_db();
7827
7828        let blob = write_blob(&mut db, b"same\n");
7829        let sub = write_tree(&mut db, &[(b"f.txt", 0o100644, blob.clone())]);
7830        let tree = write_tree(
7831            &mut db,
7832            &[(b"sub", 0o040000, sub), (b"top.txt", 0o100644, blob)],
7833        );
7834
7835        let (pruned_left, pruned_right) =
7836            changed_tree_entries(&db, ObjectFormat::Sha1, &tree, &tree)
7837                .expect("test operation should succeed");
7838        assert!(
7839            pruned_left.is_empty() && pruned_right.is_empty(),
7840            "identical trees must produce no changed entries"
7841        );
7842
7843        let entries = diff_name_status_trees_with_options(
7844            &db,
7845            ObjectFormat::Sha1,
7846            &tree,
7847            &tree,
7848            DiffNameStatusOptions::default(),
7849        )
7850        .expect("test operation should succeed");
7851        assert!(entries.is_empty(), "identical trees must produce no diff");
7852
7853        assert_tree_diff_matches_full_all_modes(&db, &tree, &tree);
7854        fs::remove_dir_all(root).expect("test operation should succeed");
7855    }
7856
7857    #[test]
7858    fn find_copies_harder_uses_full_left_map_and_finds_unchanged_source() {
7859        // `--find-copies-harder` must still see an *unchanged* file as a copy
7860        // source. This is the case where the public entry point deliberately
7861        // falls back to the full flatten; verify the full-map fallback both
7862        // behaves correctly and matches a direct full-map computation.
7863        let (root, mut db) = structural_db();
7864
7865        // `template.txt` is unchanged between sides (lives in an untouched
7866        // subtree), and `copy.txt` is added on the right with the same content.
7867        let template = write_blob(&mut db, b"reusable boilerplate content\n");
7868        let lib_dir = write_tree(&mut db, &[(b"template.txt", 0o100644, template.clone())]);
7869
7870        let trigger_old = write_blob(&mut db, b"trigger old\n");
7871        let trigger_new = write_blob(&mut db, b"trigger new\n");
7872
7873        let left = write_tree(
7874            &mut db,
7875            &[
7876                (b"lib", 0o040000, lib_dir.clone()),
7877                (b"trigger.txt", 0o100644, trigger_old),
7878            ],
7879        );
7880        let right = write_tree(
7881            &mut db,
7882            &[
7883                (b"copy.txt", 0o100644, template.clone()),
7884                (b"lib", 0o040000, lib_dir),
7885                (b"trigger.txt", 0o100644, trigger_new),
7886            ],
7887        );
7888
7889        let options = DiffNameStatusOptions {
7890            detect_renames: true,
7891            detect_copies: true,
7892            find_copies_harder: true,
7893            rename_empty: true,
7894        };
7895
7896        // Reference via the full flatten (the old algorithm).
7897        let (full_left, full_right) =
7898            collect_full_tree_pair(&db, ObjectFormat::Sha1, &left, &right)
7899                .expect("test operation should succeed");
7900        let reference = diff_name_status_maps(
7901            &full_left,
7902            &full_right,
7903            full_left.keys().chain(full_right.keys()),
7904            options,
7905        )
7906        .expect("test operation should succeed");
7907
7908        let public =
7909            diff_name_status_trees_with_options(&db, ObjectFormat::Sha1, &left, &right, options)
7910                .expect("test operation should succeed");
7911        assert_eq!(
7912            status_lines(&reference),
7913            status_lines(&public),
7914            "find-copies-harder public diff must match full-map reference"
7915        );
7916        // The copy must be detected from the unchanged template source.
7917        assert!(
7918            public
7919                .iter()
7920                .any(|entry| matches!(entry.status, NameStatus::Copied(_))
7921                    && entry.old_path.as_ref().map(|p| p.as_bytes())
7922                        == Some(b"lib/template.txt".as_slice())
7923                    && entry.path == b"copy.txt"),
7924            "copy from unchanged source must be found with find_copies_harder: {public:?}"
7925        );
7926        fs::remove_dir_all(root).expect("test operation should succeed");
7927    }
7928
7929    #[test]
7930    fn pruned_walk_matches_full_with_inexact_rename_options() {
7931        // Exercise the rename-options entry point (which also selects pruned vs
7932        // full) with inexact detection enabled, across an untouched subtree.
7933        let (root, mut db) = structural_db();
7934
7935        let untouched = write_blob(&mut db, b"untouched file\n");
7936        let untouched_dir = write_tree(&mut db, &[(b"u.txt", 0o100644, untouched.clone())]);
7937
7938        // a.txt -> b.txt with one changed line (a 75% inexact rename).
7939        let old = write_blob(&mut db, b"one\ntwo\nthree\nfour\nfive\n");
7940        let new = write_blob(&mut db, b"one\ntwo\nTHREE\nfour\nfive\n");
7941
7942        let left = write_tree(
7943            &mut db,
7944            &[
7945                (b"a.txt", 0o100644, old),
7946                (b"keep", 0o040000, untouched_dir.clone()),
7947            ],
7948        );
7949        let right = write_tree(
7950            &mut db,
7951            &[
7952                (b"b.txt", 0o100644, new),
7953                (b"keep", 0o040000, untouched_dir),
7954            ],
7955        );
7956
7957        let options = RenameDetectionOptions {
7958            base: DiffNameStatusOptions {
7959                detect_renames: true,
7960                detect_copies: false,
7961                find_copies_harder: false,
7962                rename_empty: true,
7963            },
7964            detect_inexact: true,
7965            rename_threshold: DEFAULT_RENAME_THRESHOLD,
7966            copy_threshold: DEFAULT_RENAME_THRESHOLD,
7967        };
7968
7969        // Reference: full flatten + same detection.
7970        let (full_left, full_right) =
7971            collect_full_tree_pair(&db, ObjectFormat::Sha1, &left, &right)
7972                .expect("test operation should succeed");
7973        let reference = diff_name_status_maps_with_renames(
7974            &full_left,
7975            &full_right,
7976            full_left.keys().chain(full_right.keys()),
7977            options,
7978            |oid| read_blob_bytes(&db, oid),
7979        )
7980        .expect("test operation should succeed");
7981
7982        let public = diff_name_status_trees_with_rename_options(
7983            &db,
7984            ObjectFormat::Sha1,
7985            &left,
7986            &right,
7987            options,
7988        )
7989        .expect("test operation should succeed");
7990
7991        assert_eq!(
7992            status_lines(&reference),
7993            status_lines(&public),
7994            "inexact rename via pruned walk must match full-map reference"
7995        );
7996        assert_eq!(
7997            status_lines(&public),
7998            vec!["R075\ta.txt\tb.txt".to_string()],
7999            "expected a 75% inexact rename"
8000        );
8001        fs::remove_dir_all(root).expect("test operation should succeed");
8002    }
8003}
sley_diff_merge/lib.rs

sley_diff_merge/
lib.rs