mkit_core/ops/
diff.rs

1//! Tree-level structural diff.
2//!
3//! Compares two trees identified by their object hash and returns the
4//! minimal list of leaf-level changes (`added` / `removed` / `modified`
5//! / `mode_changed`). The walk is lockstep over the two sorted entry
6//! arrays, recurses into matching subtrees only when their hashes
7//! differ, and treats added/removed subtrees as bulk operations on every
8//! contained leaf.
9//!
10//! Also contains `status_diff` — the working-tree vs HEAD diff that
11//! powers `mkit status`.
12
13use std::path::Path;
14
15use crate::hash::Hash;
16use crate::index::{Index, IndexError};
17use crate::object::{EntryMode, Object, TreeEntry};
18use crate::store::{MAX_TREE_DEPTH, ObjectSource, ObjectStore, StoreError};
19use crate::worktree::{self, WorktreeError};
20
21/// What kind of change a [`DiffEntry`] represents.
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
23pub enum DiffKind {
24    /// Path was not present in the old tree, present in the new.
25    Added,
26    /// Path was present in the old tree, absent in the new.
27    Removed,
28    /// Same path, different content hash (and possibly different mode).
29    Modified,
30    /// Same path, same content hash, different [`EntryMode`].
31    ModeChanged,
32}
33
34/// One leaf-level change. `path` is `/`-joined relative to the root
35/// of the compared trees; subtree directories are NOT emitted as their
36/// own entries — only the leaves they contain.
37#[derive(Debug, Clone, PartialEq, Eq)]
38pub struct DiffEntry {
39    pub path: String,
40    pub kind: DiffKind,
41    pub old_hash: Option<Hash>,
42    pub new_hash: Option<Hash>,
43    /// File mode on each side (`None` when the side is absent), used to
44    /// render git-shaped `new file mode`/`index` header lines.
45    pub old_mode: Option<EntryMode>,
46    pub new_mode: Option<EntryMode>,
47}
48
49/// Sorted (by path) sequence of [`DiffEntry`].
50#[derive(Debug, Clone, Default, PartialEq, Eq)]
51pub struct DiffResult {
52    pub entries: Vec<DiffEntry>,
53}
54
55impl DiffResult {
56    #[must_use]
57    pub fn is_empty(&self) -> bool {
58        self.entries.is_empty()
59    }
60
61    #[must_use]
62    pub fn len(&self) -> usize {
63        self.entries.len()
64    }
65}
66
67/// Compare two trees and return their [`DiffResult`]. `None` for either
68/// hash represents the empty tree (use cases: comparing against the
69/// initial commit, against a rolled-back state).
70///
71/// # Errors
72///
73/// Propagates [`StoreError`] when an expected tree object is missing or
74/// fails its read-time hash check.
75pub fn diff_trees<S: ObjectSource + ?Sized>(
76    store: &S,
77    old_hash: Option<Hash>,
78    new_hash: Option<Hash>,
79) -> Result<DiffResult, StoreError> {
80    diff_trees_inner(store, old_hash, new_hash, false)
81}
82
83fn diff_trees_inner<S: ObjectSource + ?Sized>(
84    store: &S,
85    old_hash: Option<Hash>,
86    new_hash: Option<Hash>,
87    ignore_regular_executable_mode: bool,
88) -> Result<DiffResult, StoreError> {
89    // Trivial cases: both empty, or identical hashes -> empty diff.
90    match (old_hash, new_hash) {
91        (None, None) => return Ok(DiffResult::default()),
92        (Some(a), Some(b)) if a == b => return Ok(DiffResult::default()),
93        _ => {}
94    }
95
96    let old_entries = load_entries(store, old_hash)?;
97    let new_entries = load_entries(store, new_hash)?;
98
99    let mut out: Vec<DiffEntry> = Vec::new();
100    diff_entries_recursive(
101        store,
102        &old_entries,
103        &new_entries,
104        "",
105        &mut out,
106        ignore_regular_executable_mode,
107        0,
108    )?;
109    // git orders diff entries by pathname. The name-sorted walk is already
110    // sorted for the common cases; sort to also cover a dir/file replacement
111    // (`d` sorts before `d/x.txt`), where a single tree position emits both a
112    // shallower and a deeper path.
113    out.sort_by(|a, b| a.path.cmp(&b.path));
114    Ok(DiffResult { entries: out })
115}
116
117/// Lockstep walk of two name-sorted entry arrays.
118fn diff_entries_recursive<S: ObjectSource + ?Sized>(
119    store: &S,
120    old_entries: &[TreeEntry],
121    new_entries: &[TreeEntry],
122    prefix: &str,
123    out: &mut Vec<DiffEntry>,
124    ignore_regular_executable_mode: bool,
125    depth: usize,
126) -> Result<(), StoreError> {
127    if depth > MAX_TREE_DEPTH {
128        return Err(StoreError::TreeTooDeep);
129    }
130    let mut i = 0usize;
131    let mut j = 0usize;
132
133    while i < old_entries.len() && j < new_entries.len() {
134        let o = &old_entries[i];
135        let n = &new_entries[j];
136        match o.name.as_slice().cmp(n.name.as_slice()) {
137            std::cmp::Ordering::Less => {
138                add_removed_entries(store, o, prefix, out, depth)?;
139                i += 1;
140            }
141            std::cmp::Ordering::Greater => {
142                add_added_entries(store, n, prefix, out, depth)?;
143                j += 1;
144            }
145            std::cmp::Ordering::Equal => {
146                if o.mode == EntryMode::Tree && n.mode == EntryMode::Tree {
147                    if o.object_hash != n.object_hash {
148                        let sub_prefix = join_path(prefix, &o.name);
149                        let old_sub = load_tree(store, o.object_hash)?;
150                        let new_sub = load_tree(store, n.object_hash)?;
151                        diff_entries_recursive(
152                            store,
153                            &old_sub,
154                            &new_sub,
155                            &sub_prefix,
156                            out,
157                            ignore_regular_executable_mode,
158                            depth + 1,
159                        )?;
160                    }
161                    // identical subtree hashes -> nothing changed below
162                } else if o.mode == EntryMode::Tree || n.mode == EntryMode::Tree {
163                    // A directory replaced by a file (or vice versa). git
164                    // models this as the old subtree's leaves all deleted plus
165                    // the new entry added — never a single Modified blob whose
166                    // tree hash would be misread as a blob by the patch
167                    // renderer.
168                    add_removed_entries(store, o, prefix, out, depth)?;
169                    add_added_entries(store, n, prefix, out, depth)?;
170                } else if o.mode != n.mode && o.object_hash == n.object_hash {
171                    if !ignore_regular_executable_mode || !regular_executable_pair(o.mode, n.mode) {
172                        out.push(DiffEntry {
173                            path: join_path(prefix, &o.name),
174                            kind: DiffKind::ModeChanged,
175                            old_hash: Some(o.object_hash),
176                            new_hash: Some(n.object_hash),
177                            old_mode: Some(o.mode),
178                            new_mode: Some(n.mode),
179                        });
180                    }
181                } else if o.object_hash != n.object_hash || o.mode != n.mode {
182                    out.push(DiffEntry {
183                        path: join_path(prefix, &o.name),
184                        kind: DiffKind::Modified,
185                        old_hash: Some(o.object_hash),
186                        new_hash: Some(n.object_hash),
187                        old_mode: Some(o.mode),
188                        new_mode: Some(n.mode),
189                    });
190                }
191                i += 1;
192                j += 1;
193            }
194        }
195    }
196
197    while i < old_entries.len() {
198        add_removed_entries(store, &old_entries[i], prefix, out, depth)?;
199        i += 1;
200    }
201    while j < new_entries.len() {
202        add_added_entries(store, &new_entries[j], prefix, out, depth)?;
203        j += 1;
204    }
205    Ok(())
206}
207
208fn regular_executable_pair(a: EntryMode, b: EntryMode) -> bool {
209    matches!(
210        (a, b),
211        (EntryMode::Blob, EntryMode::Executable) | (EntryMode::Executable, EntryMode::Blob)
212    )
213}
214
215fn add_removed_entries<S: ObjectSource + ?Sized>(
216    store: &S,
217    entry: &TreeEntry,
218    prefix: &str,
219    out: &mut Vec<DiffEntry>,
220    depth: usize,
221) -> Result<(), StoreError> {
222    if depth > MAX_TREE_DEPTH {
223        return Err(StoreError::TreeTooDeep);
224    }
225    if entry.mode == EntryMode::Tree {
226        let sub_prefix = join_path(prefix, &entry.name);
227        let sub = load_tree(store, entry.object_hash)?;
228        for sub_entry in &sub {
229            add_removed_entries(store, sub_entry, &sub_prefix, out, depth + 1)?;
230        }
231    } else {
232        out.push(DiffEntry {
233            path: join_path(prefix, &entry.name),
234            kind: DiffKind::Removed,
235            old_hash: Some(entry.object_hash),
236            new_hash: None,
237            old_mode: Some(entry.mode),
238            new_mode: None,
239        });
240    }
241    Ok(())
242}
243
244fn add_added_entries<S: ObjectSource + ?Sized>(
245    store: &S,
246    entry: &TreeEntry,
247    prefix: &str,
248    out: &mut Vec<DiffEntry>,
249    depth: usize,
250) -> Result<(), StoreError> {
251    if depth > MAX_TREE_DEPTH {
252        return Err(StoreError::TreeTooDeep);
253    }
254    if entry.mode == EntryMode::Tree {
255        let sub_prefix = join_path(prefix, &entry.name);
256        let sub = load_tree(store, entry.object_hash)?;
257        for sub_entry in &sub {
258            add_added_entries(store, sub_entry, &sub_prefix, out, depth + 1)?;
259        }
260    } else {
261        out.push(DiffEntry {
262            path: join_path(prefix, &entry.name),
263            kind: DiffKind::Added,
264            old_hash: None,
265            new_hash: Some(entry.object_hash),
266            old_mode: None,
267            new_mode: Some(entry.mode),
268        });
269    }
270    Ok(())
271}
272
273fn load_entries<S: ObjectSource + ?Sized>(
274    store: &S,
275    hash: Option<Hash>,
276) -> Result<Vec<TreeEntry>, StoreError> {
277    match hash {
278        Some(h) => load_tree(store, h),
279        None => Ok(Vec::new()),
280    }
281}
282
283fn load_tree<S: ObjectSource + ?Sized>(store: &S, h: Hash) -> Result<Vec<TreeEntry>, StoreError> {
284    match store.read_object(&h)? {
285        Object::Tree(t) => Ok(t.entries),
286        other => Err(StoreError::Decode(
287            crate::object::MkitError::InvalidObjectType(other.object_type() as u8),
288        )),
289    }
290}
291
292/// Join a path prefix and an entry name with `/`. Lossy on non-UTF-8
293/// names: we use `String` rather than `Path` to avoid platform-specific
294/// separator handling. Tree names are constrained at the object layer
295/// to forbid `/` and `\\`, so the only lossy case is non-UTF-8 byte
296/// sequences in legacy data — the caller's `path` field will then be
297/// `String::from_utf8_lossy`'s replacement, which is acceptable for a
298/// diagnostic.
299fn join_path(prefix: &str, name: &[u8]) -> String {
300    let name_str = String::from_utf8_lossy(name);
301    if prefix.is_empty() {
302        name_str.into_owned()
303    } else {
304        let mut s = String::with_capacity(prefix.len() + 1 + name_str.len());
305        s.push_str(prefix);
306        s.push('/');
307        s.push_str(&name_str);
308        s
309    }
310}
311
312// =====================================================================
313// text_patch — line-based unified diff (for `mkit diff` hunks)
314// =====================================================================
315
316/// Number of unchanged context lines emitted on each side of a hunk.
317const PATCH_CONTEXT: usize = 3;
318
319/// Render a unified-diff patch between two byte blobs.
320///
321/// `old_path` / `new_path` are the `a/…` and `b/…` labels for the
322/// `---`/`+++` headers (callers conventionally pass the same repo path
323/// for both). The output is a Git-compatible unified diff:
324///
325/// ```text
326/// --- a/<old_path>
327/// +++ b/<new_path>
328/// @@ -<l>,<n> +<l>,<n> @@
329///  context
330/// -removed
331/// +added
332/// ```
333///
334/// Either side that is **binary** by Git's heuristic — a NUL byte in the
335/// first 8000 bytes — yields a single `Binary files a/<old> and b/<new>
336/// differ` line instead of hunks, matching Git.
337///
338/// The algorithm is the greedy Myers diff with Git-style hunk compaction
339/// (see [`unified_hunks`]); the hunks byte-match `git diff`. Trailing-newline
340/// handling follows Git's `\ No newline at end of file` convention.
341///
342/// Note this returns a `String` via lossy UTF-8 conversion, so a non-UTF-8
343/// (but non-binary) blob's raw bytes are not preserved here — use
344/// [`unified_hunks`] for byte-exact output.
345#[must_use]
346pub fn text_patch(old_bytes: &[u8], new_bytes: &[u8], old_path: &str, new_path: &str) -> String {
347    match unified_hunks(old_bytes, new_bytes) {
348        None => format!("Binary files a/{old_path} and b/{new_path} differ\n"),
349        Some(hunks) if hunks.is_empty() => String::new(),
350        Some(hunks) => {
351            format!(
352                "--- a/{old_path}\n+++ b/{new_path}\n{}",
353                String::from_utf8_lossy(&hunks)
354            )
355        }
356    }
357}
358
359/// The hunk body of a unified diff between two blobs: the `@@ … @@` headers
360/// and their `+`/`-`/context lines, with no `---`/`+++` file headers, as raw
361/// bytes (git diffs are byte-oriented and do not require UTF-8). Returns
362/// `None` when either side is **binary** by git's heuristic — a NUL byte in
363/// the first 8000 bytes — so a caller emits a `Binary files …` line instead.
364/// An empty result means the blobs are textually identical.
365///
366/// Splitting the hunk body out lets callers wrap it in a git-shaped
367/// `diff --git` header (with `/dev/null` for adds/deletes) while keeping the
368/// algorithm — Myers diff with git-style hunk compaction — in one place.
369#[must_use]
370pub fn unified_hunks(old_bytes: &[u8], new_bytes: &[u8]) -> Option<Vec<u8>> {
371    if is_binary(old_bytes) || is_binary(new_bytes) {
372        return None;
373    }
374    let old_lines = split_lines(old_bytes);
375    let new_lines = split_lines(new_bytes);
376    let ops = edit_script(&old_lines, &new_lines);
377    let hunks = group_hunks(&ops, PATCH_CONTEXT);
378    let mut out = Vec::new();
379    for hunk in &hunks {
380        render_hunk(&mut out, hunk, &old_lines, &new_lines);
381    }
382    Some(out)
383}
384
385/// Origin of a line within a [`PatchHunk`] — context (unchanged), added on
386/// the new side, or removed from the old side.
387#[derive(Debug, Clone, Copy, PartialEq, Eq)]
388pub enum HunkLineKind {
389    /// Unchanged line present on both sides.
390    Context,
391    /// Line added on the new side (`+`).
392    Added,
393    /// Line removed from the old side (`-`).
394    Removed,
395}
396
397/// One line of a hunk: its origin plus the raw bytes (no `+`/`-`/space
398/// prefix) and whether the source line had a trailing newline.
399#[derive(Debug, Clone, PartialEq, Eq)]
400pub struct HunkLine {
401    /// Whether the line is context, added, or removed.
402    pub kind: HunkLineKind,
403    /// Raw line bytes, without the unified-diff prefix or trailing `\n`.
404    pub text: Vec<u8>,
405    /// `true` when the source had a `\n` after this line.
406    pub has_newline: bool,
407}
408
409/// A single hunk for interactive staging (`add -p`): the 1-based old/new
410/// line ranges (as in the `@@` header) plus the ordered context/added/
411/// removed lines. Use [`apply_hunks_subset`] to materialize a chosen subset.
412#[derive(Debug, Clone, PartialEq, Eq)]
413pub struct PatchHunk {
414    /// 1-based first old-side line (0 when the old side is empty).
415    pub old_start: usize,
416    /// Number of old-side lines covered (context + removed).
417    pub old_len: usize,
418    /// 1-based first new-side line (0 when the new side is empty).
419    pub new_start: usize,
420    /// Number of new-side lines covered (context + added).
421    pub new_len: usize,
422    /// Ordered lines making up the hunk body.
423    pub lines: Vec<HunkLine>,
424}
425
426/// Enumerate the hunks between two blobs as structured [`PatchHunk`]s, using
427/// the same Myers diff + git-style compaction as [`unified_hunks`]. Returns
428/// `None` when either side is **binary** (git's NUL heuristic); an empty
429/// vector means the blobs are textually identical. Unlike `unified_hunks`,
430/// which renders bytes, this exposes each hunk's lines so a caller can let
431/// the user pick a subset to stage and rebuild the partial blob with
432/// [`apply_hunks_subset`].
433#[must_use]
434pub fn enumerate_hunks(old_bytes: &[u8], new_bytes: &[u8]) -> Option<Vec<PatchHunk>> {
435    if is_binary(old_bytes) || is_binary(new_bytes) {
436        return None;
437    }
438    let old_lines = split_lines(old_bytes);
439    let new_lines = split_lines(new_bytes);
440    let ops = edit_script(&old_lines, &new_lines);
441    let hunks = group_hunks(&ops, PATCH_CONTEXT);
442    Some(
443        hunks
444            .iter()
445            .map(|h| to_patch_hunk(h, &old_lines, &new_lines))
446            .collect(),
447    )
448}
449
450fn to_patch_hunk(hunk: &Hunk, old: &[DiffLine<'_>], new: &[DiffLine<'_>]) -> PatchHunk {
451    let lines = hunk
452        .ops
453        .iter()
454        .map(|op| {
455            let (kind, dl) = match *op {
456                DiffOp::Equal(oi, _) => (HunkLineKind::Context, &old[oi]),
457                DiffOp::Delete(oi) => (HunkLineKind::Removed, &old[oi]),
458                DiffOp::Insert(ni) => (HunkLineKind::Added, &new[ni]),
459            };
460            HunkLine {
461                kind,
462                text: dl.text.to_vec(),
463                has_newline: dl.has_newline,
464            }
465        })
466        .collect();
467    PatchHunk {
468        old_start: hunk.old_start,
469        old_len: hunk.old_len,
470        new_start: hunk.new_start,
471        new_len: hunk.new_len,
472        lines,
473    }
474}
475
476/// Rebuild a blob from `base_bytes` applying only the hunks whose index (into
477/// the slice returned by [`enumerate_hunks`]) is in `selected`. Selected
478/// hunks have their additions applied and removals dropped; unselected hunks
479/// keep the base content unchanged. `selected` order does not matter — hunks
480/// are always applied in file order — and out-of-range indices are ignored.
481///
482/// This is the staging primitive for `add -p`: pass the index/HEAD blob as
483/// the base and the accepted hunk indices to produce the partially-staged
484/// blob. Applying *all* hunks reproduces the new blob exactly; applying
485/// *none* reproduces the base.
486#[must_use]
487pub fn apply_hunks_subset(base_bytes: &[u8], hunks: &[PatchHunk], selected: &[usize]) -> Vec<u8> {
488    let old_lines = split_lines(base_bytes);
489    let sel: std::collections::HashSet<usize> = selected.iter().copied().collect();
490    let mut out: Vec<u8> = Vec::with_capacity(base_bytes.len());
491    let mut cursor = 0usize; // 0-based index into old_lines
492    for (i, h) in hunks.iter().enumerate() {
493        // 0-based start of this hunk's old-side region. A pure insertion into
494        // an empty base has old_len == 0 and old_start == 0; otherwise the
495        // hunk always carries context, so old_start is 1-based.
496        let region_start = if h.old_len == 0 {
497            h.old_start
498        } else {
499            h.old_start - 1
500        }
501        .min(old_lines.len());
502        let region_end = (region_start + h.old_len).min(old_lines.len());
503        // Untouched base lines before this hunk.
504        for dl in &old_lines[cursor..region_start] {
505            emit_raw_line(&mut out, dl.text, dl.has_newline);
506        }
507        if sel.contains(&i) {
508            // Apply: emit the new side (context + added).
509            for l in &h.lines {
510                if l.kind != HunkLineKind::Removed {
511                    emit_raw_line(&mut out, &l.text, l.has_newline);
512                }
513            }
514        } else {
515            // Keep the base region verbatim (identical to context + removed).
516            for dl in &old_lines[region_start..region_end] {
517                emit_raw_line(&mut out, dl.text, dl.has_newline);
518            }
519        }
520        cursor = region_end;
521    }
522    for dl in &old_lines[cursor..] {
523        emit_raw_line(&mut out, dl.text, dl.has_newline);
524    }
525    out
526}
527
528fn emit_raw_line(out: &mut Vec<u8>, text: &[u8], has_newline: bool) {
529    out.extend_from_slice(text);
530    if has_newline {
531        out.push(b'\n');
532    }
533}
534
535/// A contiguous run of base lines that one side changed, anchored on base
536/// line coordinates: `base_len` base lines (0 for a pure insertion) are
537/// replaced by `new`. Used by the 3-way merge to detect overlap and splice.
538struct MergeRegion {
539    base_start: usize,
540    base_len: usize,
541    new: Vec<(Vec<u8>, bool)>,
542}
543
544/// Line-level 3-way merge of three blobs (diff3-style, conservative).
545///
546/// Returns `Some(merged)` when `ours` and `theirs` changed **disjoint**
547/// regions of `base` (so the merge is unambiguous), and `None` when their
548/// changes overlap or any input is binary — the caller then records a
549/// conflict. Identical `ours`/`theirs` merge trivially. Unlike a full
550/// diff3 this does not auto-resolve *identical overlapping* edits (those
551/// stay a conflict): it errs toward conflict, never toward a wrong merge.
552/// (#298)
553#[must_use]
554pub fn merge_blob_3way(base: &[u8], ours: &[u8], theirs: &[u8]) -> Option<Vec<u8>> {
555    if ours == theirs {
556        return Some(ours.to_vec());
557    }
558    if is_binary(base) || is_binary(ours) || is_binary(theirs) {
559        return None;
560    }
561    let base_lines = split_lines(base);
562    let ours_regions = changed_regions(&base_lines, &split_lines(ours));
563    let theirs_regions = changed_regions(&base_lines, &split_lines(theirs));
564    if regions_overlap(&ours_regions, &theirs_regions) {
565        return None;
566    }
567    Some(apply_merge_regions(
568        &base_lines,
569        &ours_regions,
570        &theirs_regions,
571    ))
572}
573
574/// Group the edit script `base → side` into base-anchored changed regions
575/// (no context — adjacent equal lines bound each region).
576fn changed_regions(base: &[DiffLine<'_>], side: &[DiffLine<'_>]) -> Vec<MergeRegion> {
577    let mut regions: Vec<MergeRegion> = Vec::new();
578    let mut cur: Option<MergeRegion> = None;
579    let mut base_idx = 0usize; // next unconsumed base line
580    for op in edit_script(base, side) {
581        match op {
582            DiffOp::Equal(bi, _) => {
583                if let Some(r) = cur.take() {
584                    regions.push(r);
585                }
586                base_idx = bi + 1;
587            }
588            DiffOp::Delete(bi) => {
589                cur.get_or_insert(MergeRegion {
590                    base_start: base_idx,
591                    base_len: 0,
592                    new: Vec::new(),
593                })
594                .base_len += 1;
595                base_idx = bi + 1;
596            }
597            DiffOp::Insert(si) => {
598                let line = &side[si];
599                cur.get_or_insert(MergeRegion {
600                    base_start: base_idx,
601                    base_len: 0,
602                    new: Vec::new(),
603                })
604                .new
605                .push((line.text.to_vec(), line.has_newline));
606            }
607        }
608    }
609    if let Some(r) = cur.take() {
610        regions.push(r);
611    }
612    regions
613}
614
615/// `true` if any ours-region overlaps any theirs-region on base lines.
616/// Modify spans use half-open `[start, start+len)` intersection; a pure
617/// insertion conflicts only when it lands strictly inside a modify span,
618/// or coincides with another insertion (ambiguous order) — insertions at a
619/// modify boundary are adjacent and merge cleanly.
620fn regions_overlap(ours: &[MergeRegion], theirs: &[MergeRegion]) -> bool {
621    ours.iter().any(|a| {
622        theirs.iter().any(|b| {
623            let (a_s, a_e) = (a.base_start, a.base_start + a.base_len);
624            let (b_s, b_e) = (b.base_start, b.base_start + b.base_len);
625            match (a.base_len == 0, b.base_len == 0) {
626                (true, true) => a_s == b_s,
627                (true, false) => b_s < a_s && a_s < b_e,
628                (false, true) => a_s < b_s && b_s < a_e,
629                (false, false) => a_s < b_e && b_s < a_e,
630            }
631        })
632    })
633}
634
635/// Splice non-overlapping ours/theirs regions back onto `base`. Regions are
636/// applied in base order; at a shared start a pure insertion (len 0) sorts
637/// before a modify so inserted lines precede the replaced ones.
638fn apply_merge_regions(
639    base: &[DiffLine<'_>],
640    ours: &[MergeRegion],
641    theirs: &[MergeRegion],
642) -> Vec<u8> {
643    let mut all: Vec<&MergeRegion> = ours.iter().chain(theirs.iter()).collect();
644    all.sort_by_key(|r| (r.base_start, r.base_len));
645    let mut out: Vec<u8> = Vec::new();
646    let mut cursor = 0usize;
647    for r in all {
648        for line in &base[cursor..r.base_start] {
649            emit_raw_line(&mut out, line.text, line.has_newline);
650        }
651        for (text, nl) in &r.new {
652            emit_raw_line(&mut out, text, *nl);
653        }
654        cursor = r.base_start + r.base_len;
655    }
656    for line in &base[cursor..] {
657        emit_raw_line(&mut out, line.text, line.has_newline);
658    }
659    out
660}
661
662/// Added / deleted line counts between two blobs, from the same Myers edit
663/// script the unified patch uses. `None` when either side is **binary** —
664/// matching Git's heuristic of a NUL byte within the first 8000 bytes
665/// (independent of UTF-8 validity), so `diff --stat` renders `Bin …` for
666/// exactly the blobs Git would.
667///
668/// Used by `diff --stat`; kept here so the stat counts always agree with
669/// the `+`/`-` lines `text_patch` would emit for the same blobs. Lines are
670/// split on `\n` bytes, so the counts hold for any non-binary blob, including
671/// non-UTF-8 text.
672#[must_use]
673pub fn diff_line_counts(old_bytes: &[u8], new_bytes: &[u8]) -> Option<(usize, usize)> {
674    if is_binary(old_bytes) || is_binary(new_bytes) {
675        return None;
676    }
677    let old_lines = split_lines(old_bytes);
678    let new_lines = split_lines(new_bytes);
679    let mut added = 0;
680    let mut deleted = 0;
681    for op in edit_script(&old_lines, &new_lines) {
682        match op {
683            DiffOp::Insert(_) => added += 1,
684            DiffOp::Delete(_) => deleted += 1,
685            DiffOp::Equal(_, _) => {}
686        }
687    }
688    Some((added, deleted))
689}
690
691/// Git's binary heuristic (`buffer_is_binary`): a NUL byte within the
692/// first 8000 bytes marks the blob binary, regardless of UTF-8 validity.
693fn is_binary(bytes: &[u8]) -> bool {
694    const FIRST_FEW_BYTES: usize = 8000;
695    bytes.iter().take(FIRST_FEW_BYTES).any(|&b| b == 0)
696}
697
698/// A single line (raw bytes) plus whether the source had a trailing newline
699/// after it.
700struct DiffLine<'a> {
701    text: &'a [u8],
702    /// `true` when this line was terminated by `\n` in the source.
703    has_newline: bool,
704}
705
706/// Split bytes into lines on `\n`, preserving whether the final line had a
707/// trailing newline. An empty input yields no lines.
708fn split_lines(text: &[u8]) -> Vec<DiffLine<'_>> {
709    let mut lines = Vec::new();
710    let mut rest = text;
711    while !rest.is_empty() {
712        if let Some(idx) = rest.iter().position(|&b| b == b'\n') {
713            lines.push(DiffLine {
714                text: &rest[..idx],
715                has_newline: true,
716            });
717            rest = &rest[idx + 1..];
718        } else {
719            lines.push(DiffLine {
720                text: rest,
721                has_newline: false,
722            });
723            rest = b"";
724        }
725    }
726    lines
727}
728
729/// One element of a line-level edit script.
730#[derive(Debug, Clone, Copy, PartialEq, Eq)]
731enum DiffOp {
732    /// Line present in both sides (indices into old, new).
733    Equal(usize, usize),
734    /// Line only in the old side (index into old).
735    Delete(usize),
736    /// Line only in the new side (index into new).
737    Insert(usize),
738}
739
740/// Compute a line-level edit script using the greedy Myers diff, then
741/// canonicalize hunk boundaries the way git's xdiff does (slide each run of
742/// changed lines as far down as identical neighbours allow). The result
743/// matches `git diff`'s hunks for the common cases — same algorithm, same
744/// boundary convention — modulo git's optional indent heuristic.
745fn edit_script(old: &[DiffLine<'_>], new: &[DiffLine<'_>]) -> Vec<DiffOp> {
746    let (mut old_changed, mut new_changed) = myers_changed(old, new);
747    compact_changes(old, &mut old_changed);
748    compact_changes(new, &mut new_changed);
749    script_from_flags(old, new, &old_changed, &new_changed)
750}
751
752/// Run the greedy Myers diff and mark which old lines are deletions and which
753/// new lines are insertions. Lines left unmarked are the matched (equal)
754/// lines that pair up in order.
755//
756// Myers indexes paths by signed diagonal `k = x - y`, so the V array and
757// backtrack inherently convert between `isize` (diagonals, offsets) and
758// `usize` (line indices). The values are bounded by `n + m`, well within
759// range, so the sign/wrap casts are safe; `x`/`y`/`k`/`d`/`v` are the
760// algorithm's canonical names.
761#[allow(
762    clippy::cast_sign_loss,
763    clippy::cast_possible_wrap,
764    clippy::many_single_char_names
765)]
766fn myers_changed(old: &[DiffLine<'_>], new: &[DiffLine<'_>]) -> (Vec<bool>, Vec<bool>) {
767    let n = old.len();
768    let m = new.len();
769    let mut old_changed = vec![false; n];
770    let mut new_changed = vec![false; m];
771    if n == 0 {
772        new_changed.fill(true);
773        return (old_changed, new_changed);
774    }
775    if m == 0 {
776        old_changed.fill(true);
777        return (old_changed, new_changed);
778    }
779
780    let max = n + m;
781    let offset = max as isize; // shift so diagonal k maps to a non-negative index
782    let mut v = vec![0isize; 2 * max + 1];
783    let mut trace: Vec<Vec<isize>> = Vec::new();
784
785    let idx = |k: isize| (k + offset) as usize;
786    let mut found = max as isize;
787    'outer: for d in 0..=max as isize {
788        trace.push(v.clone());
789        let mut k = -d;
790        while k <= d {
791            // Greedy: extend the furthest-reaching path on diagonal k.
792            let mut x = if k == -d || (k != d && v[idx(k - 1)] < v[idx(k + 1)]) {
793                v[idx(k + 1)] // down → an insertion (consume a new line)
794            } else {
795                v[idx(k - 1)] + 1 // right → a deletion (consume an old line)
796            };
797            let mut y = x - k;
798            while (x as usize) < n
799                && (y as usize) < m
800                && lines_equal(&old[x as usize], &new[y as usize])
801            {
802                x += 1;
803                y += 1;
804            }
805            v[idx(k)] = x;
806            if x as usize >= n && y as usize >= m {
807                found = d;
808                break 'outer;
809            }
810            k += 2;
811        }
812    }
813
814    // Backtrack through the saved V snapshots to recover the edits.
815    let mut x = n as isize;
816    let mut y = m as isize;
817    for d in (0..=found).rev() {
818        let vd = &trace[d as usize];
819        let k = x - y;
820        let down = k == -d || (k != d && vd[idx(k - 1)] < vd[idx(k + 1)]);
821        let prev_k = if down { k + 1 } else { k - 1 };
822        let prev_x = vd[idx(prev_k)];
823        let prev_y = prev_x - prev_k;
824        // Walk back down the snake (matched lines) — no flags set there.
825        while x > prev_x && y > prev_y {
826            x -= 1;
827            y -= 1;
828        }
829        if d > 0 {
830            if down {
831                new_changed[(y - 1) as usize] = true; // insertion
832                y -= 1;
833            } else {
834                old_changed[(x - 1) as usize] = true; // deletion
835                x -= 1;
836            }
837        }
838    }
839    (old_changed, new_changed)
840}
841
842/// Slide each maximal run of changed lines downward while the line leaving the
843/// top of the run equals the line entering at the bottom — git's
844/// `xdl_change_compact` canonical placement, so a change among identical
845/// neighbours lands where git puts it.
846fn compact_changes(lines: &[DiffLine<'_>], changed: &mut [bool]) {
847    let n = lines.len();
848    let mut i = 0;
849    while i < n {
850        if !changed[i] {
851            i += 1;
852            continue;
853        }
854        // [start, end) is a run of changed lines.
855        let start = i;
856        let mut end = i;
857        while end < n && changed[end] {
858            end += 1;
859        }
860        // Slide down: the line at `start` leaves the run and the line at
861        // `end` joins it, valid only when they are identical.
862        let (mut s, mut e) = (start, end);
863        while e < n && lines_equal(&lines[s], &lines[e]) {
864            changed[s] = false;
865            changed[e] = true;
866            s += 1;
867            e += 1;
868        }
869        i = e;
870    }
871}
872
873/// Build the `DiffOp` sequence from the per-side changed flags, in git's
874/// order: within each change region, all deletions precede all insertions;
875/// matched lines pair up as `Equal`.
876fn script_from_flags(
877    old: &[DiffLine<'_>],
878    new: &[DiffLine<'_>],
879    old_changed: &[bool],
880    new_changed: &[bool],
881) -> Vec<DiffOp> {
882    let (n, m) = (old.len(), new.len());
883    let mut ops = Vec::new();
884    let (mut i, mut j) = (0usize, 0usize);
885    while i < n || j < m {
886        if i < n && j < m && !old_changed[i] && !new_changed[j] {
887            ops.push(DiffOp::Equal(i, j));
888            i += 1;
889            j += 1;
890        } else {
891            while i < n && old_changed[i] {
892                ops.push(DiffOp::Delete(i));
893                i += 1;
894            }
895            while j < m && new_changed[j] {
896                ops.push(DiffOp::Insert(j));
897                j += 1;
898            }
899        }
900    }
901    ops
902}
903
904fn lines_equal(a: &DiffLine<'_>, b: &DiffLine<'_>) -> bool {
905    a.text == b.text && a.has_newline == b.has_newline
906}
907
908/// A contiguous group of edits plus surrounding context, with the
909/// 1-based starting line numbers and lengths for the `@@` header.
910struct Hunk {
911    old_start: usize,
912    old_len: usize,
913    new_start: usize,
914    new_len: usize,
915    ops: Vec<DiffOp>,
916}
917
918/// Group an edit script into hunks, each padded with up to `context`
919/// unchanged lines and merged when their context windows touch.
920fn group_hunks(ops: &[DiffOp], context: usize) -> Vec<Hunk> {
921    // Indices of ops that are actual changes.
922    let change_positions: Vec<usize> = ops
923        .iter()
924        .enumerate()
925        .filter(|(_, op)| !matches!(op, DiffOp::Equal(_, _)))
926        .map(|(idx, _)| idx)
927        .collect();
928    if change_positions.is_empty() {
929        return Vec::new();
930    }
931
932    // Build [start,end) op-index ranges around each change, merging
933    // ranges whose context windows overlap or abut.
934    let mut ranges: Vec<(usize, usize)> = Vec::new();
935    for &pos in &change_positions {
936        let start = pos.saturating_sub(context);
937        let end = (pos + context + 1).min(ops.len());
938        match ranges.last_mut() {
939            Some(last) if start <= last.1 => last.1 = last.1.max(end),
940            _ => ranges.push((start, end)),
941        }
942    }
943
944    ranges
945        .into_iter()
946        .map(|(start, end)| build_hunk(&ops[start..end]))
947        .collect()
948}
949
950fn build_hunk(slice: &[DiffOp]) -> Hunk {
951    let mut old_start = None;
952    let mut new_start = None;
953    let mut old_len = 0usize;
954    let mut new_len = 0usize;
955    for op in slice {
956        match *op {
957            DiffOp::Equal(oi, ni) => {
958                old_start.get_or_insert(oi);
959                new_start.get_or_insert(ni);
960                old_len += 1;
961                new_len += 1;
962            }
963            DiffOp::Delete(oi) => {
964                old_start.get_or_insert(oi);
965                old_len += 1;
966            }
967            DiffOp::Insert(ni) => {
968                new_start.get_or_insert(ni);
969                new_len += 1;
970            }
971        }
972    }
973    Hunk {
974        // Convert 0-based to 1-based; empty side starts at 0.
975        old_start: old_start.map_or(0, |s| s + 1),
976        old_len,
977        new_start: new_start.map_or(0, |s| s + 1),
978        new_len,
979        ops: slice.to_vec(),
980    }
981}
982
983/// Format one side of an `@@` range as git does: `start,len`, but the `,len`
984/// is omitted when `len == 1` (`@@ -1 +1 @@`).
985fn hunk_range(start: usize, len: usize) -> String {
986    if len == 1 {
987        start.to_string()
988    } else {
989        format!("{start},{len}")
990    }
991}
992
993fn render_hunk(out: &mut Vec<u8>, hunk: &Hunk, old: &[DiffLine<'_>], new: &[DiffLine<'_>]) {
994    let header = format!(
995        "@@ -{} +{} @@\n",
996        hunk_range(hunk.old_start, hunk.old_len),
997        hunk_range(hunk.new_start, hunk.new_len)
998    );
999    out.extend_from_slice(header.as_bytes());
1000    for op in &hunk.ops {
1001        match *op {
1002            DiffOp::Equal(oi, _) => emit_line(out, b' ', &old[oi]),
1003            DiffOp::Delete(oi) => emit_line(out, b'-', &old[oi]),
1004            DiffOp::Insert(ni) => emit_line(out, b'+', &new[ni]),
1005        }
1006    }
1007}
1008
1009fn emit_line(out: &mut Vec<u8>, prefix: u8, line: &DiffLine<'_>) {
1010    out.push(prefix);
1011    out.extend_from_slice(line.text);
1012    out.push(b'\n');
1013    if !line.has_newline {
1014        out.extend_from_slice(b"\\ No newline at end of file\n");
1015    }
1016}
1017
1018// =====================================================================
1019// status_diff — working-tree vs HEAD (for `mkit status`)
1020// =====================================================================
1021
1022/// Staging state of a [`StatusEntry`] relative to the index.
1023///
1024/// When no index is passed to [`status_diff`], every entry has
1025/// `StatusStaging::Unstaged`.
1026#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1027pub enum StatusStaging {
1028    /// Change is not staged (worktree differs from HEAD, not in index).
1029    Unstaged,
1030    /// Change is staged (in the index, matching the worktree).
1031    Staged,
1032    /// Change exists in both index and worktree with different content
1033    /// (partially staged scenario).
1034    PartiallyStaged,
1035}
1036
1037/// One entry in the `mkit status` output. Combines a [`DiffEntry`] with
1038/// index-awareness so the caller can render three-way status output.
1039#[derive(Debug, Clone, PartialEq, Eq)]
1040pub struct StatusEntry {
1041    /// Underlying diff entry (path, kind, old/new hashes).
1042    pub diff: DiffEntry,
1043    /// Relationship of this entry to the staging index.
1044    pub staging: StatusStaging,
1045}
1046
1047/// Error type for [`status_diff`].
1048#[derive(Debug, thiserror::Error)]
1049pub enum DiffError {
1050    /// Underlying object-store error.
1051    #[error(transparent)]
1052    Store(#[from] StoreError),
1053    /// Error building the worktree snapshot.
1054    #[error(transparent)]
1055    Worktree(#[from] WorktreeError),
1056    /// Error seeding the tracked set from a tree.
1057    #[error(transparent)]
1058    Index(#[from] IndexError),
1059}
1060
1061/// Compare HEAD ↔ index and index ↔ worktree, returning a list of
1062/// [`StatusEntry`] grouped by staging state.
1063///
1064/// Pre-#102 this function diffed only HEAD↔worktree and annotated
1065/// each entry with index-state. That hid one hazard: a path staged
1066/// to the index whose worktree was later reverted to match HEAD
1067/// would diff to nothing — but `mkit commit` (which signs HEAD↔
1068/// index post-#102) would still commit the staged content. The
1069/// staged change was invisible to `mkit status`.
1070///
1071/// New shape:
1072///
1073/// - `Staged` — path differs between HEAD and the index-built tree
1074///   (the change is what `mkit commit` will sign).
1075/// - `Unstaged` — path differs between the index-built tree and the
1076///   worktree (changes the user has not yet `mkit add`-ed).
1077/// - When the same path appears in both legs (e.g. staged v2, then
1078///   worktree edited to v3), one entry is emitted per leg so callers
1079///   render both sections — matching git's two-section layout. The
1080///   `PartiallyStaged` enum variant is retained for back-compat but
1081///   no longer produced by this function.
1082///
1083/// When `index` is `None`, falls back to the legacy HEAD↔worktree
1084/// diff and labels every entry `Unstaged` — used by callers that
1085/// haven't initialized a staging index yet.
1086///
1087/// # Errors
1088///
1089/// Propagates [`WorktreeError`] (I/O, symlink validation, chunker
1090/// limit) and [`StoreError`] (missing or corrupt objects).
1091#[allow(clippy::too_many_lines)]
1092pub fn status_diff(
1093    store: &ObjectStore,
1094    head_tree: Option<&Hash>,
1095    worktree_root: &Path,
1096    index: Option<&Index>,
1097) -> Result<Vec<StatusEntry>, DiffError> {
1098    status_diff_observed(store, head_tree, worktree_root, index).map(|(entries, _)| entries)
1099}
1100
1101/// [`status_diff`] that additionally returns the worktree walk's
1102/// [`worktree::StatObservation`]s — entries whose cache was absent or
1103/// racy-smudged but whose re-hash matched the staged hash. Callers
1104/// (the `status` CLI) use them to heal the stat cache from hash-time
1105/// stats; pairing a *later* stat with the earlier hash is unsound.
1106///
1107/// # Errors
1108/// See [`status_diff`].
1109#[allow(clippy::type_complexity)]
1110pub fn status_diff_observed(
1111    store: &ObjectStore,
1112    head_tree: Option<&Hash>,
1113    worktree_root: &Path,
1114    index: Option<&Index>,
1115) -> Result<(Vec<StatusEntry>, Vec<worktree::StatObservation>), DiffError> {
1116    // Always snapshot the worktree — the index↔worktree leg uses it. The
1117    // tracked set for ignore exemption is the staging index if present, else
1118    // the HEAD tree's paths (seeded here) — without it a tracked file
1119    // matching an ignore rule would be dropped and misreported as a deletion.
1120    let head_seed;
1121    let tracked = if let Some(i) = index {
1122        Some(i)
1123    } else if let Some(ht) = head_tree {
1124        head_seed = crate::index::from_tree(store, *ht)?;
1125        Some(&head_seed)
1126    } else {
1127        None
1128    };
1129    // Snapshot objects are ephemeral: they live in an in-memory overlay
1130    // (EphemeralSink) and never touch the durable store — status pays
1131    // no durability cost, leaves no garbage in objects/, and can never
1132    // make a non-durable object visible to another writer's dedup.
1133    // Reads fall through to the store for committed objects.
1134    let snapshot = crate::store::EphemeralSink::new(store);
1135    let mut observations = Vec::new();
1136    let work_tree_hash = worktree::build_tree_filtered_observed(
1137        &snapshot,
1138        worktree_root,
1139        tracked,
1140        &mut observations,
1141    )?;
1142
1143    let Some(idx) = index else {
1144        // Legacy fallback: HEAD↔worktree, everything labeled Unstaged.
1145        let diff = diff_worktree_trees(&snapshot, head_tree.copied(), Some(work_tree_hash))?;
1146        return Ok((
1147            diff.entries
1148                .into_iter()
1149                .map(|d| StatusEntry {
1150                    diff: d,
1151                    staging: StatusStaging::Unstaged,
1152                })
1153                .collect(),
1154            observations,
1155        ));
1156    };
1157
1158    // Build the index tree exactly the way `mkit commit` builds it.
1159    // This is the authoritative "what would be committed right now."
1160    // Ephemeral diff snapshot (no durable publish) — cheap shape check.
1161    let index_tree = worktree::build_tree_from_index_with(store, &snapshot, idx, false)?;
1162
1163    let staged = diff_trees(&snapshot, head_tree.copied(), Some(index_tree))?;
1164    let unstaged = diff_worktree_trees(&snapshot, Some(index_tree), Some(work_tree_hash))?;
1165
1166    // Emit one entry per (path, leg). A path appearing in both legs
1167    // produces two entries — one `Staged` and one `Unstaged` — so the
1168    // status renderer shows it under BOTH "Changes to be committed"
1169    // and "Changes not staged for commit", matching git's two-section
1170    // layout. The `PartiallyStaged` enum variant is retained for API
1171    // back-compat but no longer produced.
1172    let mut out: Vec<StatusEntry> =
1173        Vec::with_capacity(staged.entries.len() + unstaged.entries.len());
1174    for d in staged.entries {
1175        out.push(StatusEntry {
1176            diff: d,
1177            staging: StatusStaging::Staged,
1178        });
1179    }
1180    for d in unstaged.entries {
1181        out.push(StatusEntry {
1182            diff: d,
1183            staging: StatusStaging::Unstaged,
1184        });
1185    }
1186    out.sort_by(|a, b| {
1187        // Stable rendering order: by path, then staged before unstaged.
1188        a.diff.path.cmp(&b.diff.path).then_with(|| {
1189            #[allow(clippy::match_same_arms)]
1190            match (a.staging, b.staging) {
1191                (StatusStaging::Staged, StatusStaging::Staged) => std::cmp::Ordering::Equal,
1192                (StatusStaging::Staged, _) => std::cmp::Ordering::Less,
1193                (_, StatusStaging::Staged) => std::cmp::Ordering::Greater,
1194                _ => std::cmp::Ordering::Equal,
1195            }
1196        })
1197    });
1198    Ok((out, observations))
1199}
1200
1201#[cfg(unix)]
1202fn diff_worktree_trees<S: ObjectSource + ?Sized>(
1203    store: &S,
1204    old_hash: Option<Hash>,
1205    new_hash: Option<Hash>,
1206) -> Result<DiffResult, StoreError> {
1207    diff_trees(store, old_hash, new_hash)
1208}
1209
1210#[cfg(not(unix))]
1211fn diff_worktree_trees<S: ObjectSource + ?Sized>(
1212    store: &S,
1213    old_hash: Option<Hash>,
1214    new_hash: Option<Hash>,
1215) -> Result<DiffResult, StoreError> {
1216    diff_trees_inner(store, old_hash, new_hash, true)
1217}
1218
1219// =====================================================================
1220// Tests
1221// =====================================================================
1222
1223#[cfg(test)]
1224#[allow(clippy::many_single_char_names)] // single-letter blob/entry names keep the tables compact
1225mod tests {
1226    use super::*;
1227    use crate::object::{Blob, Tree};
1228    use crate::serialize;
1229    use tempfile::TempDir;
1230
1231    #[test]
1232    fn diff_line_counts_counts_text_and_flags_binary() {
1233        // Plain text: +2/-1 (b→B replaced, d,e added).
1234        assert_eq!(
1235            diff_line_counts(b"a\nb\nc\n", b"a\nB\nc\nd\ne\n"),
1236            Some((3, 1))
1237        );
1238        // A NUL byte → binary by git's heuristic, even though valid UTF-8.
1239        assert_eq!(diff_line_counts(b"a\nb\n", b"a\x00b\n"), None);
1240        assert_eq!(diff_line_counts(b"x\x00y", b"z"), None);
1241        // No NUL but invalid UTF-8 → still counted as text (lossy), not binary.
1242        assert!(diff_line_counts(b"\xff\xfe\n", b"\xff\xfe\nmore\n").is_some());
1243    }
1244
1245    fn fresh_store() -> (TempDir, ObjectStore) {
1246        let dir = TempDir::new().unwrap();
1247        let store = ObjectStore::init(dir.path()).unwrap();
1248        (dir, store)
1249    }
1250
1251    fn put_blob(store: &ObjectStore, data: &[u8]) -> Hash {
1252        let obj = Object::Blob(Blob {
1253            data: data.to_vec(),
1254        });
1255        let bytes = serialize::serialize(&obj).unwrap();
1256        store.write(&bytes).unwrap()
1257    }
1258
1259    fn put_tree(store: &ObjectStore, entries: Vec<TreeEntry>) -> Hash {
1260        let obj = Object::Tree(Tree { entries });
1261        let bytes = serialize::serialize(&obj).unwrap();
1262        store.write(&bytes).unwrap()
1263    }
1264
1265    fn entry(name: &[u8], mode: EntryMode, h: Hash) -> TreeEntry {
1266        TreeEntry {
1267            name: name.to_vec(),
1268            mode,
1269            object_hash: h,
1270        }
1271    }
1272
1273    #[test]
1274    fn identical_trees_no_diff() {
1275        let (_d, s) = fresh_store();
1276        let blob = put_blob(&s, b"content");
1277        let tree = put_tree(&s, vec![entry(b"a.txt", EntryMode::Blob, blob)]);
1278        let result = diff_trees(&s, Some(tree), Some(tree)).unwrap();
1279        assert_eq!(result.len(), 0);
1280    }
1281
1282    #[test]
1283    fn added_file_detected() {
1284        let (_d, s) = fresh_store();
1285        let blob_a = put_blob(&s, b"aaa");
1286        let blob_b = put_blob(&s, b"bbb");
1287        let old = put_tree(&s, vec![entry(b"a.txt", EntryMode::Blob, blob_a)]);
1288        let new = put_tree(
1289            &s,
1290            vec![
1291                entry(b"a.txt", EntryMode::Blob, blob_a),
1292                entry(b"b.txt", EntryMode::Blob, blob_b),
1293            ],
1294        );
1295        let r = diff_trees(&s, Some(old), Some(new)).unwrap();
1296        assert_eq!(r.entries.len(), 1);
1297        assert_eq!(r.entries[0].path, "b.txt");
1298        assert_eq!(r.entries[0].kind, DiffKind::Added);
1299        assert_eq!(r.entries[0].old_hash, None);
1300        assert_eq!(r.entries[0].new_hash, Some(blob_b));
1301    }
1302
1303    #[test]
1304    fn removed_file_detected() {
1305        let (_d, s) = fresh_store();
1306        let blob_a = put_blob(&s, b"aaa");
1307        let blob_b = put_blob(&s, b"bbb");
1308        let old = put_tree(
1309            &s,
1310            vec![
1311                entry(b"a.txt", EntryMode::Blob, blob_a),
1312                entry(b"b.txt", EntryMode::Blob, blob_b),
1313            ],
1314        );
1315        let new = put_tree(&s, vec![entry(b"a.txt", EntryMode::Blob, blob_a)]);
1316        let r = diff_trees(&s, Some(old), Some(new)).unwrap();
1317        assert_eq!(r.entries.len(), 1);
1318        assert_eq!(r.entries[0].path, "b.txt");
1319        assert_eq!(r.entries[0].kind, DiffKind::Removed);
1320        assert_eq!(r.entries[0].old_hash, Some(blob_b));
1321        assert_eq!(r.entries[0].new_hash, None);
1322    }
1323
1324    #[test]
1325    fn modified_file_detected() {
1326        let (_d, s) = fresh_store();
1327        let v1 = put_blob(&s, b"version 1");
1328        let v2 = put_blob(&s, b"version 2");
1329        let old = put_tree(&s, vec![entry(b"file.txt", EntryMode::Blob, v1)]);
1330        let new = put_tree(&s, vec![entry(b"file.txt", EntryMode::Blob, v2)]);
1331        let r = diff_trees(&s, Some(old), Some(new)).unwrap();
1332        assert_eq!(r.entries.len(), 1);
1333        assert_eq!(r.entries[0].path, "file.txt");
1334        assert_eq!(r.entries[0].kind, DiffKind::Modified);
1335        assert_eq!(r.entries[0].old_hash, Some(v1));
1336        assert_eq!(r.entries[0].new_hash, Some(v2));
1337    }
1338
1339    #[test]
1340    fn mode_change_detected() {
1341        let (_d, s) = fresh_store();
1342        let blob = put_blob(&s, b"content");
1343        let old = put_tree(&s, vec![entry(b"link", EntryMode::Blob, blob)]);
1344        let new = put_tree(&s, vec![entry(b"link", EntryMode::Symlink, blob)]);
1345        let r = diff_trees(&s, Some(old), Some(new)).unwrap();
1346        assert_eq!(r.entries.len(), 1);
1347        assert_eq!(r.entries[0].path, "link");
1348        assert_eq!(r.entries[0].kind, DiffKind::ModeChanged);
1349        assert_eq!(r.entries[0].old_hash, Some(blob));
1350        assert_eq!(r.entries[0].new_hash, Some(blob));
1351    }
1352
1353    #[test]
1354    fn nested_tree_diff() {
1355        let (_d, s) = fresh_store();
1356        let v1 = put_blob(&s, b"old content");
1357        let v2 = put_blob(&s, b"new content");
1358        let other = put_blob(&s, b"unchanged");
1359        let old_sub = put_tree(
1360            &s,
1361            vec![
1362                entry(b"file.txt", EntryMode::Blob, v1),
1363                entry(b"other.txt", EntryMode::Blob, other),
1364            ],
1365        );
1366        let new_sub = put_tree(
1367            &s,
1368            vec![
1369                entry(b"file.txt", EntryMode::Blob, v2),
1370                entry(b"other.txt", EntryMode::Blob, other),
1371            ],
1372        );
1373        let old_root = put_tree(&s, vec![entry(b"subdir", EntryMode::Tree, old_sub)]);
1374        let new_root = put_tree(&s, vec![entry(b"subdir", EntryMode::Tree, new_sub)]);
1375        let r = diff_trees(&s, Some(old_root), Some(new_root)).unwrap();
1376        assert_eq!(r.entries.len(), 1);
1377        assert_eq!(r.entries[0].path, "subdir/file.txt");
1378        assert_eq!(r.entries[0].kind, DiffKind::Modified);
1379    }
1380
1381    #[test]
1382    fn diff_against_empty_tree() {
1383        let (_d, s) = fresh_store();
1384        let blob_a = put_blob(&s, b"aaa");
1385        let blob_b = put_blob(&s, b"bbb");
1386        let new = put_tree(
1387            &s,
1388            vec![
1389                entry(b"a.txt", EntryMode::Blob, blob_a),
1390                entry(b"b.txt", EntryMode::Blob, blob_b),
1391            ],
1392        );
1393        let r = diff_trees(&s, None, Some(new)).unwrap();
1394        assert_eq!(r.entries.len(), 2);
1395        assert_eq!(r.entries[0].path, "a.txt");
1396        assert_eq!(r.entries[0].kind, DiffKind::Added);
1397        assert_eq!(r.entries[1].path, "b.txt");
1398        assert_eq!(r.entries[1].kind, DiffKind::Added);
1399    }
1400
1401    #[test]
1402    fn empty_tree_against_non_empty() {
1403        let (_d, s) = fresh_store();
1404        let blob_a = put_blob(&s, b"aaa");
1405        let blob_b = put_blob(&s, b"bbb");
1406        let old = put_tree(
1407            &s,
1408            vec![
1409                entry(b"a.txt", EntryMode::Blob, blob_a),
1410                entry(b"b.txt", EntryMode::Blob, blob_b),
1411            ],
1412        );
1413        let r = diff_trees(&s, Some(old), None).unwrap();
1414        assert_eq!(r.entries.len(), 2);
1415        assert_eq!(r.entries[0].kind, DiffKind::Removed);
1416        assert_eq!(r.entries[1].kind, DiffKind::Removed);
1417    }
1418
1419    #[test]
1420    fn sorted_output() {
1421        let (_d, s) = fresh_store();
1422        let a = put_blob(&s, b"a");
1423        let b = put_blob(&s, b"b");
1424        let c = put_blob(&s, b"c");
1425        let new = put_tree(
1426            &s,
1427            vec![
1428                entry(b"a.txt", EntryMode::Blob, a),
1429                entry(b"m.txt", EntryMode::Blob, b),
1430                entry(b"z.txt", EntryMode::Blob, c),
1431            ],
1432        );
1433        let r = diff_trees(&s, None, Some(new)).unwrap();
1434        assert_eq!(r.entries.len(), 3);
1435        assert_eq!(r.entries[0].path, "a.txt");
1436        assert_eq!(r.entries[1].path, "m.txt");
1437        assert_eq!(r.entries[2].path, "z.txt");
1438    }
1439
1440    #[test]
1441    fn max_length_entry_names() {
1442        let (_d, s) = fresh_store();
1443        let blob = put_blob(&s, b"data");
1444        let long_name = vec![b'A'; 255];
1445        let new = put_tree(&s, vec![entry(&long_name, EntryMode::Blob, blob)]);
1446        let r = diff_trees(&s, None, Some(new)).unwrap();
1447        assert_eq!(r.entries.len(), 1);
1448        assert_eq!(r.entries[0].path.len(), 255);
1449        assert_eq!(r.entries[0].kind, DiffKind::Added);
1450    }
1451
1452    #[test]
1453    fn both_none_is_empty() {
1454        let (_d, s) = fresh_store();
1455        let r = diff_trees(&s, None, None).unwrap();
1456        assert!(r.is_empty());
1457    }
1458
1459    // -----------------------------------------------------------------
1460    // status_diff unit tests
1461    // -----------------------------------------------------------------
1462
1463    fn fresh_workdir() -> TempDir {
1464        TempDir::new().unwrap()
1465    }
1466
1467    #[test]
1468    fn status_diff_does_not_fsync() {
1469        // `status` is read-only from the user's perspective; its
1470        // worktree-snapshot objects are ephemeral and must never pay
1471        // durability costs (no barriers, no full flushes, no dir
1472        // flushes) — renames only.
1473        use crate::batch::testing::{Ev, RecordingSyncer};
1474        use std::sync::Arc;
1475
1476        let (_sd, mut store) = fresh_store();
1477        let work = fresh_workdir();
1478        std::fs::write(work.path().join("a.txt"), b"some content").unwrap();
1479        std::fs::write(work.path().join("b.txt"), b"other content").unwrap();
1480
1481        let rec = Arc::new(RecordingSyncer::default());
1482        store.set_syncer(rec.clone());
1483
1484        let result = status_diff(&store, None, work.path(), None).unwrap();
1485        assert!(!result.is_empty(), "untracked files must surface");
1486
1487        let evs = rec.events();
1488        assert!(
1489            evs.iter().all(|e| matches!(e, Ev::Rename { .. })),
1490            "status must not emit any flush events; got {evs:?}"
1491        );
1492    }
1493
1494    #[test]
1495    fn status_empty_worktree_no_head() {
1496        // Empty worktree, no HEAD → nothing to report.
1497        let (_sd, store) = fresh_store();
1498        let work = fresh_workdir();
1499        let result = status_diff(&store, None, work.path(), None).unwrap();
1500        assert!(result.is_empty());
1501    }
1502
1503    #[test]
1504    fn status_worktree_equals_head_is_clean() {
1505        // Worktree identical to HEAD → no changes.
1506        let (_sd, store) = fresh_store();
1507        let work = fresh_workdir();
1508        std::fs::write(work.path().join("a.txt"), b"hello").unwrap();
1509        // Build a tree from the worktree and use it as HEAD.
1510        let head_hash = worktree::build_tree(&store, work.path()).unwrap();
1511        let result = status_diff(&store, Some(&head_hash), work.path(), None).unwrap();
1512        assert!(result.is_empty(), "expected clean, got {result:?}");
1513    }
1514
1515    #[cfg(not(unix))]
1516    #[test]
1517    fn status_no_index_ignores_unrepresentable_executable_mode_on_non_unix() {
1518        let (_sd, store) = fresh_store();
1519        let work = fresh_workdir();
1520        std::fs::write(work.path().join("run.sh"), b"#!/bin/sh\n").unwrap();
1521        let h = worktree::hash_file(&store, &work.path().join("run.sh")).unwrap();
1522        let head_hash = put_tree(&store, vec![entry(b"run.sh", EntryMode::Executable, h)]);
1523
1524        let result = status_diff(&store, Some(&head_hash), work.path(), None).unwrap();
1525        assert!(
1526            result.is_empty(),
1527            "non-Unix should not report executable-only noise, got {result:?}"
1528        );
1529    }
1530
1531    #[test]
1532    fn status_added_only() {
1533        // HEAD has {a.txt}; worktree has {a.txt, b.txt} → b.txt added.
1534        let (_sd, store) = fresh_store();
1535        let work = fresh_workdir();
1536        std::fs::write(work.path().join("a.txt"), b"hello").unwrap();
1537        let head_hash = worktree::build_tree(&store, work.path()).unwrap();
1538        std::fs::write(work.path().join("b.txt"), b"world").unwrap();
1539        let result = status_diff(&store, Some(&head_hash), work.path(), None).unwrap();
1540        assert_eq!(result.len(), 1);
1541        assert_eq!(result[0].diff.path, "b.txt");
1542        assert_eq!(result[0].diff.kind, DiffKind::Added);
1543        assert_eq!(result[0].staging, StatusStaging::Unstaged);
1544    }
1545
1546    #[test]
1547    fn status_removed_only() {
1548        // HEAD has {a.txt, b.txt}; worktree has only {a.txt} → b.txt removed.
1549        let (_sd, store) = fresh_store();
1550        let work = fresh_workdir();
1551        std::fs::write(work.path().join("a.txt"), b"hello").unwrap();
1552        std::fs::write(work.path().join("b.txt"), b"world").unwrap();
1553        let head_hash = worktree::build_tree(&store, work.path()).unwrap();
1554        std::fs::remove_file(work.path().join("b.txt")).unwrap();
1555        let result = status_diff(&store, Some(&head_hash), work.path(), None).unwrap();
1556        assert_eq!(result.len(), 1);
1557        assert_eq!(result[0].diff.path, "b.txt");
1558        assert_eq!(result[0].diff.kind, DiffKind::Removed);
1559        assert_eq!(result[0].staging, StatusStaging::Unstaged);
1560    }
1561
1562    #[test]
1563    fn status_modified_only() {
1564        // HEAD has {a.txt="old"}; worktree has {a.txt="new"} → a.txt modified.
1565        let (_sd, store) = fresh_store();
1566        let work = fresh_workdir();
1567        std::fs::write(work.path().join("a.txt"), b"old").unwrap();
1568        let head_hash = worktree::build_tree(&store, work.path()).unwrap();
1569        std::fs::write(work.path().join("a.txt"), b"new").unwrap();
1570        let result = status_diff(&store, Some(&head_hash), work.path(), None).unwrap();
1571        assert_eq!(result.len(), 1);
1572        assert_eq!(result[0].diff.path, "a.txt");
1573        assert_eq!(result[0].diff.kind, DiffKind::Modified);
1574        assert_eq!(result[0].staging, StatusStaging::Unstaged);
1575    }
1576
1577    #[test]
1578    fn status_mixed_changes() {
1579        // HEAD: {a.txt, b.txt}. Worktree: a.txt modified, b.txt removed, c.txt added.
1580        let (_sd, store) = fresh_store();
1581        let work = fresh_workdir();
1582        std::fs::write(work.path().join("a.txt"), b"original").unwrap();
1583        std::fs::write(work.path().join("b.txt"), b"stays").unwrap();
1584        let head_hash = worktree::build_tree(&store, work.path()).unwrap();
1585        std::fs::write(work.path().join("a.txt"), b"changed").unwrap();
1586        std::fs::remove_file(work.path().join("b.txt")).unwrap();
1587        std::fs::write(work.path().join("c.txt"), b"new").unwrap();
1588        let result = status_diff(&store, Some(&head_hash), work.path(), None).unwrap();
1589        assert_eq!(result.len(), 3);
1590        let paths: Vec<&str> = result.iter().map(|e| e.diff.path.as_str()).collect();
1591        assert!(paths.contains(&"a.txt"), "missing a.txt: {paths:?}");
1592        assert!(paths.contains(&"b.txt"), "missing b.txt: {paths:?}");
1593        assert!(paths.contains(&"c.txt"), "missing c.txt: {paths:?}");
1594    }
1595
1596    #[test]
1597    fn status_no_head_shows_all_as_added() {
1598        // No HEAD (initial repo state) → every file shows as added.
1599        let (_sd, store) = fresh_store();
1600        let work = fresh_workdir();
1601        std::fs::write(work.path().join("a.txt"), b"aaa").unwrap();
1602        std::fs::write(work.path().join("b.txt"), b"bbb").unwrap();
1603        let result = status_diff(&store, None, work.path(), None).unwrap();
1604        assert_eq!(result.len(), 2);
1605        for e in &result {
1606            assert_eq!(e.diff.kind, DiffKind::Added);
1607            assert_eq!(e.staging, StatusStaging::Unstaged);
1608        }
1609    }
1610
1611    /// HEAD is empty; index has b.txt; worktree has b.txt with the
1612    /// same content. The HEAD↔index leg picks up b.txt as Added
1613    /// (Staged); the index↔worktree leg sees no delta. Single Staged
1614    /// entry.
1615    #[test]
1616    fn status_staged_entry_is_classified_staged() {
1617        use crate::index::{EntryStatus, Index, IndexEntry};
1618        let (_sd, store) = fresh_store();
1619        let work = fresh_workdir();
1620        std::fs::write(work.path().join("b.txt"), b"world").unwrap();
1621        let b_hash = worktree::hash_file(&store, &work.path().join("b.txt")).unwrap();
1622        let mut idx = Index::new();
1623        idx.entries.push(IndexEntry {
1624            path: "b.txt".to_string(),
1625            status: EntryStatus::Blob,
1626            object_hash: b_hash,
1627            mtime_ns: 0,
1628            size: 0,
1629            ino: 0,
1630            ctime_ns: 0,
1631        });
1632        // No HEAD — first commit scenario.
1633        let result = status_diff(&store, None, work.path(), Some(&idx)).unwrap();
1634        assert_eq!(result.len(), 1);
1635        assert_eq!(result[0].diff.path, "b.txt");
1636        assert_eq!(result[0].staging, StatusStaging::Staged);
1637    }
1638
1639    #[cfg(not(unix))]
1640    #[test]
1641    fn status_ignores_unrepresentable_executable_mode_on_non_unix_worktree() {
1642        use crate::index::{EntryStatus, Index, IndexEntry};
1643
1644        let (_sd, store) = fresh_store();
1645        let work = fresh_workdir();
1646        std::fs::write(work.path().join("run.sh"), b"#!/bin/sh\n").unwrap();
1647        let h = worktree::hash_file(&store, &work.path().join("run.sh")).unwrap();
1648        let mut idx = Index::new();
1649        idx.entries.push(IndexEntry {
1650            path: "run.sh".to_string(),
1651            status: EntryStatus::Executable,
1652            object_hash: h,
1653            mtime_ns: 0,
1654            size: 0,
1655            ino: 0,
1656            ctime_ns: 0,
1657        });
1658
1659        let result = status_diff(&store, None, work.path(), Some(&idx)).unwrap();
1660        assert_eq!(result.len(), 1, "expected only the staged addition");
1661        assert_eq!(result[0].staging, StatusStaging::Staged);
1662    }
1663
1664    // -----------------------------------------------------------------
1665    // text_patch unit tests
1666    // -----------------------------------------------------------------
1667
1668    #[test]
1669    fn text_patch_modified_line_emits_hunk() {
1670        let old = b"line1\nline2\nline3\n";
1671        let new = b"line1\nCHANGED\nline3\n";
1672        let patch = text_patch(old, new, "f.txt", "f.txt");
1673        assert!(patch.starts_with("--- a/f.txt\n+++ b/f.txt\n"), "{patch}");
1674        assert!(patch.contains("@@ -1,3 +1,3 @@"), "{patch}");
1675        assert!(patch.contains("-line2\n"), "{patch}");
1676        assert!(patch.contains("+CHANGED\n"), "{patch}");
1677        assert!(patch.contains(" line1\n"), "{patch}");
1678        assert!(patch.contains(" line3\n"), "{patch}");
1679    }
1680
1681    #[test]
1682    fn text_patch_pure_addition() {
1683        let old = b"a\nb\n";
1684        let new = b"a\nb\nc\n";
1685        let patch = text_patch(old, new, "f", "f");
1686        assert!(patch.contains("+c\n"), "{patch}");
1687        // No deletion lines in the hunk body (the `---` header aside).
1688        assert!(
1689            !patch
1690                .lines()
1691                .any(|l| l.starts_with('-') && !l.starts_with("---")),
1692            "should be no deletions: {patch}"
1693        );
1694    }
1695
1696    #[test]
1697    fn text_patch_identical_is_empty() {
1698        let patch = text_patch(b"same\n", b"same\n", "f", "f");
1699        assert!(patch.is_empty(), "{patch}");
1700    }
1701
1702    #[test]
1703    fn text_patch_binary_reports_differ() {
1704        let old = &[0x00, 0xff, 0x01][..];
1705        let new = &[0x00, 0xfe, 0x02][..];
1706        let patch = text_patch(old, new, "bin", "bin");
1707        assert_eq!(patch, "Binary files a/bin and b/bin differ\n");
1708    }
1709
1710    #[test]
1711    fn text_patch_nul_byte_is_binary_like_git() {
1712        // A NUL byte makes the blob binary by git's heuristic even though it
1713        // is valid UTF-8 — must NOT emit a textual hunk with an embedded NUL.
1714        let patch = text_patch(b"a\0b\n", b"a\0c\n", "f", "f");
1715        assert_eq!(patch, "Binary files a/f and b/f differ\n");
1716    }
1717
1718    #[test]
1719    fn unified_hunks_non_utf8_without_nul_is_text_like_git() {
1720        // Invalid UTF-8 but no NUL: git treats it as text and diffs it
1721        // byte-wise. The raw bytes survive into the hunk (single-line hunk →
1722        // `@@ -1 +1 @@`, no `,1`).
1723        let hunks = unified_hunks(b"\xff\n", b"\xfe\n").expect("not binary");
1724        assert_eq!(
1725            hunks,
1726            b"@@ -1 +1 @@\n-\xff\n+\xfe\n".to_vec(),
1727            "got {hunks:?}"
1728        );
1729    }
1730
1731    #[test]
1732    fn hunk_header_omits_count_one_like_git() {
1733        // A one-line-each change: git writes `@@ -1 +1 @@`, not `-1,1 +1,1`.
1734        let patch = text_patch(b"old\n", b"new\n", "f", "f");
1735        assert_eq!(
1736            patch, "--- a/f\n+++ b/f\n@@ -1 +1 @@\n-old\n+new\n",
1737            "{patch}"
1738        );
1739    }
1740
1741    #[test]
1742    fn text_patch_no_trailing_newline_marker() {
1743        let old = b"x\ny";
1744        let new = b"x\nz";
1745        let patch = text_patch(old, new, "f", "f");
1746        assert!(patch.contains("\\ No newline at end of file\n"), "{patch}");
1747    }
1748
1749    #[test]
1750    fn text_patch_separate_hunks_for_distant_changes() {
1751        let old = b"a\nb\nc\nd\ne\nf\ng\nh\ni\nj\n";
1752        // Change line 1 and line 10; far enough apart for two hunks.
1753        let new = b"A\nb\nc\nd\ne\nf\ng\nh\ni\nJ\n";
1754        let patch = text_patch(old, new, "f", "f");
1755        let hunk_count = patch.matches("@@ ").count();
1756        assert_eq!(hunk_count, 2, "expected two hunks: {patch}");
1757    }
1758
1759    #[test]
1760    fn text_patch_inserts_compact_to_git_position() {
1761        // Inserting a line into a run of identical lines: git's change
1762        // compaction places the `+` at the *bottom* of the run (just before
1763        // the differing line). Verifies the Myers + compaction pipeline picks
1764        // the same canonical boundary git does.
1765        let patch = text_patch(b"a\na\nb\n", b"a\na\na\nb\n", "f", "f");
1766        assert_eq!(
1767            patch, "--- a/f\n+++ b/f\n@@ -1,3 +1,4 @@\n a\n a\n+a\n b\n",
1768            "{patch}"
1769        );
1770    }
1771
1772    #[test]
1773    fn text_patch_deletes_compact_to_git_position() {
1774        // The dual: deleting from a run of identical lines removes the bottom
1775        // one (the `-` lands just before the differing line).
1776        let patch = text_patch(b"a\na\na\nb\n", b"a\na\nb\n", "f", "f");
1777        assert_eq!(
1778            patch, "--- a/f\n+++ b/f\n@@ -1,4 +1,3 @@\n a\n a\n-a\n b\n",
1779            "{patch}"
1780        );
1781    }
1782
1783    /// HEAD empty; index has b.txt at v1; worktree has b.txt at v2.
1784    /// The HEAD↔index leg yields one Added (Staged) entry; the
1785    /// index↔worktree leg yields one Modified (Unstaged) entry. Same
1786    /// path appears in both sections — git's two-section layout.
1787    /// Pre-fix this collapsed to a single `PartiallyStaged` entry
1788    /// which hid the staged-vs-worktree distinction.
1789    #[test]
1790    fn status_partially_staged_entry_emits_both_legs() {
1791        use crate::index::{EntryStatus, Index, IndexEntry};
1792        let (_sd, store) = fresh_store();
1793        let work = fresh_workdir();
1794        // Write v1, hash it, then overwrite with v2.
1795        std::fs::write(work.path().join("b.txt"), b"v1").unwrap();
1796        let b_v1_hash = worktree::hash_file(&store, &work.path().join("b.txt")).unwrap();
1797        std::fs::write(work.path().join("b.txt"), b"v2").unwrap();
1798        let mut idx = Index::new();
1799        idx.entries.push(IndexEntry {
1800            path: "b.txt".to_string(),
1801            status: EntryStatus::Blob,
1802            object_hash: b_v1_hash,
1803            mtime_ns: 0,
1804            size: 0,
1805            ino: 0,
1806            ctime_ns: 0,
1807        });
1808        let result = status_diff(&store, None, work.path(), Some(&idx)).unwrap();
1809        assert_eq!(result.len(), 2, "expected staged + unstaged entries");
1810        let stagings: Vec<_> = result.iter().map(|e| e.staging).collect();
1811        assert!(stagings.contains(&StatusStaging::Staged));
1812        assert!(stagings.contains(&StatusStaging::Unstaged));
1813        assert!(result.iter().all(|e| e.diff.path == "b.txt"));
1814    }
1815
1816    // ---- enumerate_hunks / apply_hunks_subset (add -p primitives) ----
1817
1818    #[test]
1819    fn enumerate_hunks_binary_returns_none() {
1820        assert!(enumerate_hunks(b"a\0b", b"c").is_none());
1821        assert!(enumerate_hunks(b"text\n", b"with\0nul").is_none());
1822    }
1823
1824    #[test]
1825    fn enumerate_hunks_identical_is_empty() {
1826        assert_eq!(enumerate_hunks(b"a\nb\n", b"a\nb\n"), Some(vec![]));
1827    }
1828
1829    // 14 lines; edits at line 2 and line 13 are >2*context apart, so they
1830    // stay as two distinct hunks (changes <7 lines apart would merge).
1831    const HUNK2_OLD: &[u8] = b"l1\nl2\nl3\nl4\nl5\nl6\nl7\nl8\nl9\nl10\nl11\nl12\nl13\nl14\n";
1832    const HUNK2_NEW: &[u8] = b"l1\nL2\nl3\nl4\nl5\nl6\nl7\nl8\nl9\nl10\nl11\nl12\nL13\nl14\n";
1833
1834    #[test]
1835    fn apply_all_hunks_reproduces_new_apply_none_reproduces_base() {
1836        let hunks = enumerate_hunks(HUNK2_OLD, HUNK2_NEW).unwrap();
1837        assert_eq!(hunks.len(), 2, "expected two distinct hunks");
1838        let all: Vec<usize> = (0..hunks.len()).collect();
1839        assert_eq!(
1840            apply_hunks_subset(HUNK2_OLD, &hunks, &all),
1841            HUNK2_NEW,
1842            "apply all == new"
1843        );
1844        assert_eq!(
1845            apply_hunks_subset(HUNK2_OLD, &hunks, &[]),
1846            HUNK2_OLD,
1847            "apply none == old"
1848        );
1849    }
1850
1851    #[test]
1852    fn apply_single_hunk_picks_only_that_region() {
1853        let hunks = enumerate_hunks(HUNK2_OLD, HUNK2_NEW).unwrap();
1854        // Stage only the first hunk: line 2 → L2, line 13 stays l13.
1855        let staged = apply_hunks_subset(HUNK2_OLD, &hunks, &[0]);
1856        assert_eq!(
1857            staged, b"l1\nL2\nl3\nl4\nl5\nl6\nl7\nl8\nl9\nl10\nl11\nl12\nl13\nl14\n",
1858            "only the first hunk applied"
1859        );
1860        // Stage only the second hunk: line 13 → L13, line 2 stays l2.
1861        let staged = apply_hunks_subset(HUNK2_OLD, &hunks, &[1]);
1862        assert_eq!(
1863            staged, b"l1\nl2\nl3\nl4\nl5\nl6\nl7\nl8\nl9\nl10\nl11\nl12\nL13\nl14\n",
1864            "only the second hunk applied"
1865        );
1866    }
1867
1868    #[test]
1869    fn apply_hunks_into_empty_base_adds_lines() {
1870        let old = b"";
1871        let new = b"alpha\nbeta\n";
1872        let hunks = enumerate_hunks(old, new).unwrap();
1873        assert_eq!(apply_hunks_subset(old, &hunks, &[0]), new);
1874        assert_eq!(apply_hunks_subset(old, &hunks, &[]), old);
1875    }
1876
1877    #[test]
1878    fn apply_hunks_preserves_missing_eof_newline() {
1879        let old = b"a\nb\nc";
1880        let new = b"a\nB\nc"; // edit the middle line; file still lacks final \n
1881        let hunks = enumerate_hunks(old, new).unwrap();
1882        let staged = apply_hunks_subset(old, &hunks, &[0]);
1883        assert_eq!(staged, new, "no trailing newline must be preserved");
1884    }
1885
1886    // ---- merge_blob_3way (#298) ----
1887
1888    #[test]
1889    fn merge3_identical_sides_merge_trivially() {
1890        let x = b"a\nb\n";
1891        assert_eq!(merge_blob_3way(b"base\n", x, x), Some(x.to_vec()));
1892    }
1893
1894    #[test]
1895    fn merge3_disjoint_line_edits_auto_merge() {
1896        let base = b"a\nb\nc\nd\ne\n";
1897        let ours = b"A\nb\nc\nd\ne\n"; // line 1
1898        let theirs = b"a\nb\nc\nd\nE\n"; // line 5
1899        assert_eq!(
1900            merge_blob_3way(base, ours, theirs),
1901            Some(b"A\nb\nc\nd\nE\n".to_vec())
1902        );
1903    }
1904
1905    #[test]
1906    fn merge3_adjacent_line_edits_auto_merge() {
1907        // The motivating #298 case: line 1 changed on one side, line 2 on the
1908        // other. Adjacent but non-overlapping → clean merge, no conflict.
1909        let base = b"x\ny\n";
1910        let ours = b"X\ny\n"; // line 1
1911        let theirs = b"x\nY\n"; // line 2
1912        assert_eq!(
1913            merge_blob_3way(base, ours, theirs),
1914            Some(b"X\nY\n".to_vec())
1915        );
1916    }
1917
1918    #[test]
1919    fn merge3_overlapping_line_edits_conflict() {
1920        let base = b"a\nb\nc\n";
1921        let ours = b"A\nb\nc\n"; // line 1 -> A
1922        let theirs = b"Z\nb\nc\n"; // line 1 -> Z
1923        assert_eq!(merge_blob_3way(base, ours, theirs), None);
1924    }
1925
1926    #[test]
1927    fn merge3_one_side_only_takes_that_side() {
1928        // theirs == base (no change); ours changed → take ours.
1929        let base = b"a\nb\nc\n";
1930        let ours = b"a\nB\nc\n";
1931        assert_eq!(merge_blob_3way(base, ours, base), Some(ours.to_vec()));
1932    }
1933
1934    #[test]
1935    fn merge3_disjoint_insertions_auto_merge() {
1936        let base = b"a\nb\n";
1937        let ours = b"a\nX\nb\n"; // insert X after a
1938        let theirs = b"a\nb\nY\n"; // insert Y after b
1939        assert_eq!(
1940            merge_blob_3way(base, ours, theirs),
1941            Some(b"a\nX\nb\nY\n".to_vec())
1942        );
1943    }
1944
1945    #[test]
1946    fn merge3_coincident_insertions_conflict() {
1947        let base = b"a\nb\n";
1948        let ours = b"a\nX\nb\n"; // both insert at the same gap
1949        let theirs = b"a\nY\nb\n";
1950        assert_eq!(merge_blob_3way(base, ours, theirs), None);
1951    }
1952
1953    #[test]
1954    fn merge3_binary_side_conflicts() {
1955        assert_eq!(merge_blob_3way(b"a\nb\n", b"a\0\nb\n", b"a\nb\nc\n"), None);
1956    }
1957}
mkit_core/ops/diff.rs

mkit_core/ops/
diff.rs