Skip to main content

heddle_core/diff/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Embeddable diff facade and report model.
3
4use std::{
5    collections::BTreeSet,
6    path::{Path, PathBuf},
7};
8
9use anyhow::{Result, anyhow};
10use objects::{
11    HeddleError, RecoveryDetails,
12    object::{
13        AnnotationStatus, Blob, ChangeId, ContextTarget, DiffKind, EntryType, FileChangeSet,
14        FileMode, SemanticChange, State, Tree, TreeEntry,
15    },
16    store::ObjectStore,
17    worktree::{WorktreeStatus, diff_blobs},
18};
19use repo::{
20    ResolvePolicy, Repository, StateResolveError, StateResolveFailure,
21    resolve_state_for_command,
22};
23#[cfg(feature = "semantic")]
24use semantic::diff::{SemanticDiffOptions, WorktreeStatus as SemanticWorktreeStatus};
25use sley::{EntryKind, Repository as SleyRepository};
26
27use crate::ExecutionContext;
28
29mod patch;
30mod types;
31
32pub use patch::{render_diff_patch, render_diff_patch_bytes, write_diff_patch};
33pub use types::*;
34
35const BINARY_DIFF_ERROR: &str = "binary file";
36
37#[derive(Clone, Debug, Default)]
38struct SemanticDiffResult {
39    changes: Vec<SemanticChange>,
40    file_changes: FileChangeSet,
41}
42
43/// Options for computing a diff report through the embeddable facade.
44#[derive(Clone, Debug)]
45pub struct DiffOptions {
46    pub from: Option<String>,
47    pub to: Option<String>,
48    pub semantic: bool,
49    pub stat: bool,
50    pub name_only: bool,
51    pub unified: usize,
52    pub show_context: bool,
53    /// Whether the report should include the top-level patch string when a
54    /// patch-compatible representation is available. CLI callers set this for
55    /// `--patch` and for JSON output, preserving the existing machine contract.
56    pub include_patch_text: bool,
57}
58
59impl Default for DiffOptions {
60    fn default() -> Self {
61        Self {
62            from: None,
63            to: None,
64            semantic: false,
65            stat: false,
66            name_only: false,
67            unified: 3,
68            show_context: false,
69            include_patch_text: false,
70        }
71    }
72}
73
74/// Core-friendly view of the plain-Git probe the CLI health layer discovers.
75#[derive(Debug)]
76pub struct PlainGitDiffProbe {
77    pub root: PathBuf,
78    pub changes: WorktreeStatus,
79}
80
81/// Compute a Heddle diff report without rendering to stdout.
82pub fn diff(ctx: &ExecutionContext, options: DiffOptions) -> Result<DiffReport> {
83    let repo = ctx.require_repo().map_err(anyhow::Error::new)?;
84    let to = options.to.as_ref();
85    let git_overlay_head_worktree_diff = repo.current_state()?.is_none()
86        && to.is_none()
87        && matches!(options.from.as_deref(), Some("HEAD" | "@"));
88
89    let from_id = if git_overlay_head_worktree_diff {
90        None
91    } else if let Some(ref spec) = options.from {
92        Some(resolve_state_id(repo, spec)?)
93    } else {
94        repo.head()?
95    };
96
97    let from_state = if let Some(id) = from_id {
98        Some(require_resolved_state(repo, &id)?)
99    } else {
100        None
101    };
102
103    let from_tree = if let Some(ref state) = from_state {
104        repo.store().get_tree(&state.tree)?
105    } else {
106        None
107    };
108    let to_state = if let Some(to_spec) = to {
109        let to_id = resolve_state_id(repo, to_spec)?;
110        Some(require_resolved_state(repo, &to_id)?)
111    } else {
112        None
113    };
114    let to_tree = if let Some(ref state) = to_state {
115        repo.store().get_tree(&state.tree)?
116    } else {
117        None
118    };
119    let status_options = ctx.config().worktree_status_options(Some(repo.config()));
120    let from_hash = from_state
121        .as_ref()
122        .map(|state| state.tree)
123        .unwrap_or_else(|| Tree::new().hash());
124
125    let semantic_diff_result = if options.semantic {
126        if let Some(ref to_state) = to_state {
127            Some(run_semantic_diff(repo, &from_hash, &to_state.tree)?)
128        } else {
129            Some(run_semantic_worktree_diff(
130                repo,
131                &from_hash,
132                &status_options,
133            )?)
134        }
135    } else {
136        None
137    };
138
139    let changes: FileChangeSet = if let Some(ref result) = semantic_diff_result {
140        result.file_changes.clone()
141    } else if let Some(ref to_state) = to_state {
142        repo.diff_trees(&from_hash, &to_state.tree)?
143    } else if git_overlay_head_worktree_diff {
144        file_change_set_from_status(&repo.git_overlay_worktree_status()?.unwrap_or_default())
145    } else {
146        let tree = from_tree.clone().unwrap_or_default();
147        file_change_set_from_status(
148            &repo.compare_worktree_cached_with_options(&tree, &status_options)?,
149        )
150    };
151
152    let patch_text_needed = options.include_patch_text;
153    let want_hunks = patch_text_needed || !(options.name_only || options.stat);
154    let file_changes = file_changes_from_change_set(
155        repo,
156        from_tree.as_ref(),
157        to_tree.as_ref(),
158        &changes,
159        &options,
160        want_hunks,
161        patch_text_needed,
162    )?;
163
164    let semantic_changes = semantic_diff_result.map(|result| {
165        result
166            .changes
167            .into_iter()
168            .map(SemanticChangeEntry::from)
169            .collect()
170    });
171
172    let context_state = if options.show_context {
173        if let Some(ref state) = to_state {
174            Some(state.clone())
175        } else if let Some(state) = from_state.clone() {
176            Some(state)
177        } else {
178            repo.current_state()?
179        }
180    } else {
181        None
182    };
183
184    let stats = DiffStats::from_changes(&file_changes, semantic_changes.as_deref());
185    let mut output = DiffReport::with_stats(
186        from_id.map(|id| id.short()),
187        options.to.clone(),
188        file_changes,
189        semantic_changes,
190        context_state
191            .as_ref()
192            .map(|state| collect_file_context(repo, state, &changes))
193            .transpose()?,
194        context_state
195            .as_ref()
196            .map(|state| collect_state_guidance(repo, state))
197            .transpose()?,
198        stats,
199    );
200    output.worktree_mode = options.to.is_none();
201    finalize_diff_report(output, &options)
202}
203
204fn file_changes_from_change_set(
205    repo: &Repository,
206    from_tree: Option<&Tree>,
207    to_tree: Option<&Tree>,
208    changes: &FileChangeSet,
209    options: &DiffOptions,
210    want_hunks: bool,
211    patch_text_needed: bool,
212) -> Result<Vec<FileChange>> {
213    let file_changes: Vec<FileChange> = if options.name_only && !patch_text_needed {
214        changes
215            .iter()
216            .map(|change| {
217                make_status_only_change(
218                    Some(repo),
219                    from_tree,
220                    to_tree,
221                    &change.path,
222                    &change.kind.to_string(),
223                )
224            })
225            .collect()
226    } else {
227        changes
228            .iter()
229            .map(|change| {
230                let effective_kind = if to_tree.is_none() {
231                    worktree_modified_type_change(repo.root(), &change.path, change.kind)
232                        .map(|(_, diff_kind)| diff_kind)
233                        .unwrap_or(change.kind)
234                } else {
235                    change.kind
236                };
237                let diff_result = if let Some(tree) = to_tree {
238                    get_state_diff(repo, from_tree, tree, &change.path, &effective_kind)
239                } else {
240                    get_worktree_diff(repo, from_tree, &change.path, &effective_kind)
241                };
242                let binary = diff_result.as_ref().err().is_some_and(is_binary_diff_error);
243                let (raw_lines, eol) = match diff_result {
244                    Ok((lines, eol)) => (Some(lines), eol),
245                    Err(_) => (None, FileEolState::default()),
246                };
247                let (lines, line_counts) = if options.stat && !patch_text_needed {
248                    let counts = change_line_counts(raw_lines.as_deref());
249                    (None, Some(counts))
250                } else {
251                    (
252                        raw_lines.map(|lines| unified_hunks(lines, options.unified, &eol)),
253                        None,
254                    )
255                };
256
257                let kind = effective_kind.to_string();
258                let (old_mode, mode) =
259                    change_file_modes(repo, from_tree, to_tree, &change.path, &kind);
260                let symlink = symlink_change_for_paths(
261                    repo,
262                    from_tree,
263                    to_tree,
264                    &kind,
265                    &change.path,
266                    &change.path,
267                    old_mode,
268                    mode,
269                );
270                FileChange {
271                    path: change.path.clone(),
272                    kind,
273                    binary: binary && symlink.is_none(),
274                    lines,
275                    line_counts,
276                    eol,
277                    mode,
278                    old_mode,
279                    symlink,
280                    ..Default::default()
281                }
282            })
283            .collect()
284    };
285    let file_changes = sort_changes_by_path(file_changes);
286    let file_changes = expand_type_changes(
287        repo,
288        from_tree,
289        to_tree,
290        file_changes,
291        want_hunks,
292        options.unified,
293    )?;
294    detect_clear_renames(
295        repo,
296        from_tree,
297        to_tree,
298        file_changes,
299        want_hunks,
300        options.unified,
301    )
302}
303
304/// Compute a HEAD-vs-worktree report from an existing status scan.
305pub fn diff_worktree_status(
306    status: &WorktreeStatus,
307    options: &DiffOptions,
308    repo: Option<&Repository>,
309    detect_renames: bool,
310) -> Result<DiffReport> {
311    let want_hunks = options.include_patch_text && repo.is_some();
312    let from_tree = match repo {
313        Some(repo) => head_from_tree(repo)?,
314        None => None,
315    };
316    let changes = file_changes_from_status(
317        status,
318        want_hunks,
319        repo,
320        from_tree.as_ref(),
321        options.unified,
322    );
323    let changes = match repo {
324        Some(repo) => expand_type_changes(
325            repo,
326            from_tree.as_ref(),
327            None,
328            changes,
329            want_hunks,
330            options.unified,
331        )?,
332        None => changes,
333    };
334    let changes = if detect_renames {
335        match repo {
336            Some(repo) => detect_clear_renames(
337                repo,
338                from_tree.as_ref(),
339                None,
340                changes,
341                want_hunks,
342                options.unified,
343            )?,
344            None => changes,
345        }
346    } else {
347        changes
348    };
349    let mut output = DiffReport::new(Some("HEAD".to_string()), None, changes, None, None, None);
350    output.worktree_mode = true;
351    finalize_diff_report(output, options)
352}
353
354/// Compute a HEAD-vs-worktree report for a plain Git repository discovered by
355/// the CLI health layer.
356pub fn plain_git_head_diff(probe: &PlainGitDiffProbe, options: &DiffOptions) -> Result<DiffReport> {
357    if options.include_patch_text {
358        let changes = plain_git_file_changes_with_hunks(probe, options.unified)?;
359        let mut output = DiffReport::new(Some("HEAD".to_string()), None, changes, None, None, None);
360        output.worktree_mode = true;
361        return finalize_diff_report(output, options);
362    }
363    diff_worktree_status(&probe.changes, options, None, false)
364}
365
366fn finalize_diff_report(mut output: DiffReport, options: &DiffOptions) -> Result<DiffReport> {
367    if options.include_patch_text {
368        populate_patch_text(&mut output);
369    }
370    if options.stat {
371        output.changes = strip_line_hunks(std::mem::take(&mut output.changes));
372    }
373    Ok(output)
374}
375
376/// Render and stash the standard unified-diff text on the output payload.
377fn populate_patch_text(output: &mut DiffReport) {
378    let text = render_diff_patch(output);
379    if !text.is_empty() {
380        output.patch = Some(text);
381    }
382}
383
384fn file_change_set_from_status(status: &WorktreeStatus) -> FileChangeSet {
385    let mut changes = FileChangeSet::with_capacity(status.change_count());
386    for path in &status.modified {
387        changes.push_modified(path.display().to_string());
388    }
389    for path in &status.added {
390        changes.push_added(path.display().to_string());
391    }
392    for path in &status.deleted {
393        changes.push_deleted(path.display().to_string());
394    }
395    changes
396}
397
398fn resolve_state_id(repository: &Repository, spec: &str) -> Result<ChangeId> {
399    resolve_state_for_command(repository, spec, ResolvePolicy::minimal())
400        .map(|resolved| resolved.change_id)
401        .map_err(|error| match error {
402            StateResolveError::Repository(err) => err.into(),
403            StateResolveError::Failure(StateResolveFailure::NotFound { spec }) => {
404                anyhow!(HeddleError::recovery(RecoveryDetails::state_not_found(spec)))
405            }
406            StateResolveError::Failure(other) => anyhow!("{other}"),
407        })
408}
409
410fn require_resolved_state(repo: &Repository, id: &ChangeId) -> Result<State> {
411    repo.store().get_state(id)?.ok_or_else(|| {
412        anyhow!(HeddleError::MissingObject {
413            object_type: "state".to_string(),
414            id: id.to_string_full(),
415        })
416    })
417}
418
419#[cfg(feature = "semantic")]
420fn run_semantic_diff(
421    repo: &Repository,
422    from_tree_hash: &objects::object::ContentHash,
423    to_tree_hash: &objects::object::ContentHash,
424) -> Result<SemanticDiffResult> {
425    let options = SemanticDiffOptions::default();
426    let result =
427        semantic::diff::semantic_diff(repo.store(), from_tree_hash, to_tree_hash, &options)?;
428    Ok(SemanticDiffResult {
429        changes: result.changes,
430        file_changes: result.file_changes,
431    })
432}
433
434#[cfg(not(feature = "semantic"))]
435fn run_semantic_diff(
436    _repo: &Repository,
437    _from_tree_hash: &objects::object::ContentHash,
438    _to_tree_hash: &objects::object::ContentHash,
439) -> Result<SemanticDiffResult> {
440    Err(anyhow!(HeddleError::recovery(
441        RecoveryDetails::feature_unavailable("semantic diff", "semantic")
442    )))
443}
444
445#[cfg(feature = "semantic")]
446fn run_semantic_worktree_diff(
447    repo: &Repository,
448    from_tree_hash: &objects::object::ContentHash,
449    status_options: &repo::WorktreeStatusOptions,
450) -> Result<SemanticDiffResult> {
451    let from_tree = repo.require_tree(from_tree_hash)?;
452    let status = repo.compare_worktree_cached_with_options(&from_tree, status_options)?;
453    let status = SemanticWorktreeStatus {
454        modified: status.modified,
455        added: status.added,
456        deleted: status.deleted,
457    };
458    let options = SemanticDiffOptions::default();
459    let result = semantic::diff::semantic_diff_worktree(
460        repo.store(),
461        from_tree_hash,
462        repo.root(),
463        &status,
464        &options,
465    )?;
466    Ok(SemanticDiffResult {
467        changes: result.changes,
468        file_changes: result.file_changes,
469    })
470}
471
472#[cfg(not(feature = "semantic"))]
473fn run_semantic_worktree_diff(
474    _repo: &Repository,
475    _from_tree_hash: &objects::object::ContentHash,
476    _status_options: &repo::WorktreeStatusOptions,
477) -> Result<SemanticDiffResult> {
478    Err(anyhow!(HeddleError::recovery(
479        RecoveryDetails::feature_unavailable("semantic diff", "semantic")
480    )))
481}
482
483/// Order a state-to-state change list by flat path. `diff_trees` emits a
484/// deterministic merge-join order over sorted tree entries, but recursive
485/// directory descent can still differ from this flat `String::cmp` order for
486/// paths such as `a.txt` and `a/file.txt`. git emits diff entries in flat path
487/// order; sorting here matches that and keeps every render of the same diff
488/// byte-identical. Sort *before* `expand_type_changes` so each type change's
489/// local delete-before-add ordering stays intact (the expansion replaces a
490/// single entry in place).
491fn sort_changes_by_path(mut changes: Vec<FileChange>) -> Vec<FileChange> {
492    changes.sort_by(|a, b| a.path.cmp(&b.path));
493    changes
494}
495/// Build one `FileChange` per status entry in the plain-Git probe,
496/// computing real hunks against the sley-read HEAD blobs so `--patch`
497/// emits a body the regular renderer can stamp newline markers onto.
498///
499/// Unborn HEAD (plain `git init` + staged file, no commit yet) has
500/// no tree to read; in that case we skip old-side lookup and the add-only path
501/// in `compute_plain_git_hunks` renders against `/dev/null`. Without
502/// this check, resolving old-side blobs propagates a "no HEAD commit" error and
503/// the whole `--patch` render fails, even though the only honest diff
504/// is "everything is new."
505fn plain_git_file_changes_with_hunks(
506    probe: &PlainGitDiffProbe,
507    unified: usize,
508) -> Result<Vec<FileChange>> {
509    let git_repo = SleyRepository::discover(&probe.root)?;
510    let head_has_tree = !git_repo.head()?.is_unborn();
511    // `plain_git_worktree_status` can report the same path as BOTH
512    // deleted (index-vs-HEAD) and added (untracked worktree) — e.g.
513    // `git rm --cached f` followed by editing the still-present untracked
514    // `f`. Emitting an add patch and a separate delete patch for one path
515    // produces a conflicting pair `git apply` rejects; git renders that
516    // state as a single modify (HEAD content -> worktree content), so we
517    // coalesce here.
518    let added_set: BTreeSet<&Path> = probe.changes.added.iter().map(PathBuf::as_path).collect();
519    let deleted_set: BTreeSet<&Path> = probe.changes.deleted.iter().map(PathBuf::as_path).collect();
520
521    let mut changes = Vec::with_capacity(probe.changes.change_count());
522    for path in &probe.changes.modified {
523        push_plain_git_modified(
524            &git_repo,
525            head_has_tree,
526            &probe.root,
527            path,
528            unified,
529            &mut changes,
530        )?;
531    }
532    for path in &probe.changes.added {
533        if deleted_set.contains(path.as_path()) {
534            // Coalesced HEAD→worktree modify (see above): route through the
535            // type-change classifier so a coalesced regular↔symlink swap
536            // splits into delete+add rather than emitting a cross-type chmod.
537            push_plain_git_modified(
538                &git_repo,
539                head_has_tree,
540                &probe.root,
541                path,
542                unified,
543                &mut changes,
544            )?;
545        } else {
546            changes.push(plain_git_file_change(
547                &git_repo,
548                head_has_tree,
549                &probe.root,
550                path,
551                "added",
552                DiffKind::Added,
553                unified,
554            )?);
555        }
556    }
557    for path in &probe.changes.deleted {
558        // Already emitted as a coalesced modify in the added loop.
559        if added_set.contains(path.as_path()) {
560            continue;
561        }
562        changes.push(plain_git_file_change(
563            &git_repo,
564            head_has_tree,
565            &probe.root,
566            path,
567            "deleted",
568            DiffKind::Deleted,
569            unified,
570        )?);
571    }
572    Ok(changes)
573}
574
575#[allow(clippy::too_many_arguments)]
576fn plain_git_file_change(
577    git_repo: &SleyRepository,
578    head_has_tree: bool,
579    root: &Path,
580    path: &std::path::Path,
581    kind: &str,
582    diff_kind: DiffKind,
583    unified: usize,
584) -> Result<FileChange> {
585    let (old_blob, old_mode) = match (head_has_tree, &diff_kind) {
586        (true, DiffKind::Modified | DiffKind::Deleted) => {
587            match plain_git_lookup_blob_and_mode(git_repo, path)? {
588                Some((blob, mode)) => (Some(blob), Some(mode)),
589                None => (None, None),
590            }
591        }
592        _ => (None, None),
593    };
594    let new_blob = match diff_kind {
595        DiffKind::Added | DiffKind::Modified => {
596            // A read error here means the file vanished between the
597            // status scan and the diff attempt — fall back to status-
598            // only so the rendered patch at least names the path.
599            read_worktree_blob_for_diff(&root.join(path)).ok()
600        }
601        _ => None,
602    };
603    // Added files take their mode from the live worktree; deleted files
604    // from the HEAD tree entry resolved above. A modify carries both: the
605    // HEAD-tree old mode and the live-worktree new mode, so a chmod
606    // (exec-bit flip) surfaces as `old mode`/`new mode`.
607    let (old_mode_field, mode) = match diff_kind {
608        DiffKind::Added => (None, worktree_file_mode(&root.join(path))),
609        DiffKind::Deleted => (None, old_mode),
610        DiffKind::Modified => (old_mode, worktree_file_mode(&root.join(path))),
611        DiffKind::Unchanged => (None, None),
612    };
613    let (lines, eol, binary) =
614        compute_plain_git_hunks(old_blob.as_ref(), new_blob.as_ref(), &diff_kind, unified);
615    let symlink = symlink_change_from_blobs(
616        kind,
617        old_blob.as_ref(),
618        old_mode_field,
619        new_blob.as_ref(),
620        mode,
621    );
622    Ok(FileChange {
623        path: path.display().to_string(),
624        kind: kind.to_string(),
625        binary: binary && symlink.is_none(),
626        lines,
627        eol,
628        mode,
629        old_mode: old_mode_field,
630        symlink,
631        ..Default::default()
632    })
633}
634
635fn plain_git_lookup_blob_and_mode(
636    git_repo: &SleyRepository,
637    path: &std::path::Path,
638) -> Result<Option<(Blob, FileMode)>> {
639    let tree_path = plain_git_tree_path(path);
640    let Ok(entry) = git_repo.resolve_path("HEAD", &tree_path) else {
641        return Ok(None);
642    };
643    let Some(entry_mode) = entry.mode else {
644        return Ok(None);
645    };
646    let mode = match EntryKind::from_mode(entry_mode) {
647        Some(EntryKind::Symlink) => FileMode::Symlink,
648        Some(EntryKind::BlobExecutable) => FileMode::Executable,
649        Some(EntryKind::Blob) => FileMode::Normal,
650        _ => return Ok(None),
651    };
652    let object = git_repo.read_object(&entry.oid)?;
653    Ok(Some((Blob::new(object.body.clone()), mode)))
654}
655
656fn plain_git_tree_path(path: &std::path::Path) -> String {
657    path.components()
658        .map(|component| component.as_os_str().to_string_lossy())
659        .collect::<Vec<_>>()
660        .join("/")
661}
662
663/// Classify the HEAD-tree side of a plain-Git path. A tracked entry is a
664/// blob or symlink — git records no directory entries — so this returns
665/// `Regular` or `Symlink`; an absent entry (unborn HEAD, or a path not in
666/// HEAD) is `Absent`, which `is_type_change` treats as no type change so
667/// the modify renders as content.
668fn plain_git_old_side_kind(
669    git_repo: &SleyRepository,
670    head_has_tree: bool,
671    path: &std::path::Path,
672) -> Result<SideKind> {
673    if !head_has_tree {
674        return Ok(SideKind::Absent);
675    }
676    let tree_path = plain_git_tree_path(path);
677    let Ok(entry) = git_repo.resolve_path("HEAD", &tree_path) else {
678        return Ok(SideKind::Absent);
679    };
680    Ok(match entry.mode.and_then(EntryKind::from_mode) {
681        Some(EntryKind::Symlink) => SideKind::Symlink,
682        Some(EntryKind::Tree) => SideKind::Dir,
683        _ => SideKind::Regular,
684    })
685}
686
687/// Emit the plain-Git `FileChange`(s) for one `modified` (or coalesced-
688/// modify) path, splitting a *type change* into the delete+add pair git
689/// records rather than a cross-type chmod `git apply` rejects.
690///
691/// This is the plain-Git mirror of the heddle path's
692/// `worktree_modified_type_change` + `expand_type_changes`: it reuses the
693/// same `worktree_side_kind` / `is_type_change` decision so both backends
694/// classify identical input identically (a regular↔symlink swap splits, a
695/// file→dir change downgrades to a deletion whose new leaves arrive as
696/// their own `added` entries from status). A tracked old side is always a
697/// single blob/symlink, so there is never an old subtree to expand here.
698fn push_plain_git_modified(
699    git_repo: &SleyRepository,
700    head_has_tree: bool,
701    root: &Path,
702    path: &std::path::Path,
703    unified: usize,
704    out: &mut Vec<FileChange>,
705) -> Result<()> {
706    let new_kind = worktree_side_kind(&root.join(path));
707    let old_kind = plain_git_old_side_kind(git_repo, head_has_tree, path)?;
708    if is_type_change(old_kind, new_kind) {
709        out.push(plain_git_file_change(
710            git_repo,
711            head_has_tree,
712            root,
713            path,
714            "deleted",
715            DiffKind::Deleted,
716            unified,
717        )?);
718        // A new-side directory's leaves arrive as separate `added` status
719        // entries; only a non-directory new side adds here.
720        if new_kind != SideKind::Dir {
721            out.push(plain_git_file_change(
722                git_repo,
723                head_has_tree,
724                root,
725                path,
726                "added",
727                DiffKind::Added,
728                unified,
729            )?);
730        }
731    } else {
732        out.push(plain_git_file_change(
733            git_repo,
734            head_has_tree,
735            root,
736            path,
737            "modified",
738            DiffKind::Modified,
739            unified,
740        )?);
741    }
742    Ok(())
743}
744
745fn compute_plain_git_hunks(
746    old: Option<&Blob>,
747    new: Option<&Blob>,
748    diff_kind: &DiffKind,
749    unified: usize,
750) -> (Option<Vec<LineDiff>>, FileEolState, bool) {
751    let attempt = || -> Result<(Vec<LineDiff>, FileEolState)> {
752        match diff_kind {
753            DiffKind::Added => {
754                let Some(new) = new else {
755                    return Ok((Vec::new(), FileEolState::default()));
756                };
757                ensure_text_diffable(new)?;
758                let eol = eol_for_added(new);
759                Ok((number_lines(blob_lines(new, "+")?), eol))
760            }
761            DiffKind::Deleted => {
762                let Some(old) = old else {
763                    return Ok((Vec::new(), FileEolState::default()));
764                };
765                ensure_text_diffable(old)?;
766                let eol = eol_for_deleted(old);
767                Ok((number_lines(blob_lines(old, "-")?), eol))
768            }
769            DiffKind::Modified => match (old, new) {
770                (Some(old), Some(new)) => modified_blob_hunks(old, new),
771                (None, Some(new)) => {
772                    ensure_text_diffable(new)?;
773                    let eol = eol_for_added(new);
774                    Ok((number_lines(blob_lines(new, "+")?), eol))
775                }
776                (Some(old), None) => {
777                    ensure_text_diffable(old)?;
778                    let eol = eol_for_deleted(old);
779                    Ok((number_lines(blob_lines(old, "-")?), eol))
780                }
781                (None, None) => Ok((Vec::new(), FileEolState::default())),
782            },
783            DiffKind::Unchanged => Ok((Vec::new(), FileEolState::default())),
784        }
785    };
786    match attempt() {
787        Ok((lines, eol)) => (Some(unified_hunks(lines, unified, &eol)), eol, false),
788        Err(error) if is_binary_diff_error(&error) => (None, FileEolState::default(), true),
789        Err(_) => (None, FileEolState::default(), false),
790    }
791}
792/// Build `FileChange` entries from a `WorktreeStatus`, optionally
793/// computing the per-file hunk vector (with EOL metadata) so the
794/// patch renderer has something to render. When `want_hunks` is
795/// false the entries are status-only — same as the old behaviour.
796fn file_changes_from_status(
797    status: &objects::worktree::WorktreeStatus,
798    want_hunks: bool,
799    repo: Option<&Repository>,
800    from_tree: Option<&Tree>,
801    unified: usize,
802) -> Vec<FileChange> {
803    let mut changes = Vec::with_capacity(status.change_count());
804    for path in &status.modified {
805        changes.push(make_status_file_change(
806            path,
807            "modified",
808            DiffKind::Modified,
809            want_hunks,
810            repo,
811            from_tree,
812            unified,
813        ));
814    }
815    for path in &status.added {
816        changes.push(make_status_file_change(
817            path,
818            "added",
819            DiffKind::Added,
820            want_hunks,
821            repo,
822            from_tree,
823            unified,
824        ));
825    }
826    for path in &status.deleted {
827        changes.push(make_status_file_change(
828            path,
829            "deleted",
830            DiffKind::Deleted,
831            want_hunks,
832            repo,
833            from_tree,
834            unified,
835        ));
836    }
837    changes
838}
839
840#[allow(clippy::too_many_arguments)]
841fn make_status_file_change(
842    path: &std::path::Path,
843    kind: &str,
844    diff_kind: DiffKind,
845    want_hunks: bool,
846    repo: Option<&Repository>,
847    from_tree: Option<&Tree>,
848    unified: usize,
849) -> FileChange {
850    let path_str = path.display().to_string();
851    // Reclassify a `modified` path that is now a directory (file→dir type
852    // change) into a deletion so the renderer emits `+++ /dev/null` and
853    // `git apply` removes the blocking file before the nested adds land.
854    let (kind, diff_kind) = match repo
855        .and_then(|repo| worktree_modified_type_change(repo.root(), &path_str, diff_kind))
856    {
857        Some(reclassified) => reclassified,
858        None => (kind, diff_kind),
859    };
860    match repo {
861        Some(repo) if want_hunks => {
862            build_worktree_change(repo, from_tree, &path_str, kind, diff_kind, unified)
863        }
864        _ => make_status_only_change(repo, from_tree, None, &path_str, kind),
865    }
866}
867
868/// Build a status-only `FileChange` (no hunk body) that still carries its
869/// `(old_mode, mode)` pair. Modes are cheap metadata that *every* output mode
870/// needs, not just `--patch`/JSON: rename detection rejects a cross-type
871/// (regular↔symlink) collapse by comparing the two sides' modes, and the
872/// renderers stamp rename+mode headers from them. Gating mode capture on the
873/// hunk-only flag dropped them on the default/`--stat`/`--name-only` paths, so
874/// a cross-type move silently re-collapsed into a rename there while `--patch`
875/// (which kept the modes) correctly stayed split (cid 3321103601). This is the
876/// single chokepoint every status-only construction site routes through — the
877/// worktree-status path, the type-change split, and the `--name-only` builder
878/// — so the capture can't diverge between them again. `repo == None` is the
879/// plain-Git fast path, which has no object store to resolve modes from (and
880/// runs no rename collapse), so it stays modeless.
881fn make_status_only_change(
882    repo: Option<&Repository>,
883    from_tree: Option<&Tree>,
884    to_tree: Option<&Tree>,
885    path_str: &str,
886    kind: &str,
887) -> FileChange {
888    let (old_mode, mode) = match repo {
889        Some(repo) => change_file_modes(repo, from_tree, to_tree, path_str, kind),
890        None => (None, None),
891    };
892    FileChange {
893        path: path_str.to_string(),
894        kind: kind.to_string(),
895        mode,
896        old_mode,
897        ..Default::default()
898    }
899}
900
901/// Build a worktree-side `FileChange` with its hunk vector, EOL metadata,
902/// and `(old_mode, mode)` pair. Worktree status diffs have no `to_tree`:
903/// the new-side mode comes from the live worktree, the old-side mode from
904/// `from_tree`.
905fn build_worktree_change(
906    repo: &Repository,
907    from_tree: Option<&Tree>,
908    path_str: &str,
909    kind: &str,
910    diff_kind: DiffKind,
911    unified: usize,
912) -> FileChange {
913    let (old_mode, mode) = change_file_modes(repo, from_tree, None, path_str, kind);
914    let (lines, eol, binary) = match get_worktree_diff(repo, from_tree, path_str, &diff_kind) {
915        Ok((raw, eol)) => (Some(unified_hunks(raw, unified, &eol)), eol, false),
916        Err(error) if is_binary_diff_error(&error) => (None, FileEolState::default(), true),
917        // Worktree read errors on a status-listed file mean the file
918        // vanished between the status scan and the diff attempt. Fall back
919        // to status-only; the renderer prints the file header without a
920        // body, matching git's behaviour for transient races.
921        Err(_) => (None, FileEolState::default(), false),
922    };
923    let symlink = symlink_change_for_paths(
924        repo, from_tree, None, kind, path_str, path_str, old_mode, mode,
925    );
926    FileChange {
927        path: path_str.to_string(),
928        kind: kind.to_string(),
929        binary: binary && symlink.is_none(),
930        lines,
931        eol,
932        mode,
933        old_mode,
934        symlink,
935        ..Default::default()
936    }
937}
938
939/// The object kind a path resolves to on one side of a diff.
940#[derive(Clone, Copy, PartialEq, Eq, Debug)]
941enum SideKind {
942    Absent,
943    Dir,
944    /// A regular or executable file (`100644` / `100755`).
945    Regular,
946    Symlink,
947}
948
949/// Classify a path's kind within a tree (the old side of a diff, or the
950/// new side of a state-to-state diff). `find_entry_in_tree` resolves blob
951/// and symlink leaves; a `None` there means either a directory or a
952/// missing path, disambiguated by `dir_subtree_in_tree`.
953fn tree_side_kind(repo: &Repository, tree: Option<&Tree>, path: &str) -> Result<SideKind> {
954    let Some(tree) = tree else {
955        return Ok(SideKind::Absent);
956    };
957    if let Some(entry) = find_entry_in_tree(repo, tree, path)? {
958        return Ok(if entry.entry_type() == EntryType::Symlink {
959            SideKind::Symlink
960        } else {
961            SideKind::Regular
962        });
963    }
964    if dir_subtree_in_tree(repo, tree, path)?.is_some() {
965        Ok(SideKind::Dir)
966    } else {
967        Ok(SideKind::Absent)
968    }
969}
970
971/// Classify a path's new-side kind: the `to_tree` entry for a
972/// state-to-state diff, otherwise the live worktree.
973fn new_side_kind(repo: &Repository, to_tree: Option<&Tree>, path: &str) -> Result<SideKind> {
974    match to_tree {
975        Some(tree) => tree_side_kind(repo, Some(tree), path),
976        None => Ok(worktree_side_kind(&repo.root().join(path))),
977    }
978}
979
980/// Classify a worktree path. `symlink_metadata` does not follow links, so
981/// a symlink (even one pointing at a directory) reports `Symlink`, not
982/// `Dir`. A missing path is `Absent`.
983fn worktree_side_kind(path: &Path) -> SideKind {
984    let Ok(meta) = std::fs::symlink_metadata(path) else {
985        return SideKind::Absent;
986    };
987    if meta.file_type().is_symlink() {
988        SideKind::Symlink
989    } else if meta.is_dir() {
990        SideKind::Dir
991    } else {
992        SideKind::Regular
993    }
994}
995
996/// A `modified` entry whose two sides are different object *kinds* — git
997/// can't represent it as a chmod and `git apply` rejects the attempt.
998fn is_type_change(old: SideKind, new: SideKind) -> bool {
999    use SideKind::{Dir, Regular, Symlink};
1000    matches!(
1001        (old, new),
1002        (Dir, Regular)
1003            | (Dir, Symlink)
1004            | (Regular, Dir)
1005            | (Symlink, Dir)
1006            | (Regular, Symlink)
1007            | (Symlink, Regular)
1008    )
1009}
1010
1011/// Rewrite a `modified` entry that is actually a *type change* into the
1012/// delete-old + add-new pair `git diff` emits, so `git apply` can swap one
1013/// object kind for another instead of attempting a cross-type chmod.
1014///
1015/// Two shapes need this (both verified against `git diff`):
1016/// * **dir ↔ file/symlink** — a tracked directory replaced by a file (or
1017///   the reverse). git emits a deletion of every leaf under the old
1018///   directory plus an add of the new file (or vice versa); a bare
1019///   `old mode`/`new mode` chmod cannot turn a directory into a file
1020///   (cid 3319484717 — the committed-diff side dropped this entirely).
1021/// * **regular ↔ symlink** — `100644`/`100755` ⇄ `120000`. git emits a
1022///   delete of the old object and an add of the new; `git apply` rejects
1023///   the `old mode 100644`/`new mode 120000` chmod form across this
1024///   boundary (cid 3319484727).
1025///
1026/// Shared by the worktree path (`to_tree == None`, new side read from
1027/// disk) and the state-to-state path (`to_tree == Some`, new side read
1028/// from the object store) so the split is byte-identical on both — fixing
1029/// it in only one place would leave committed diffs (`heddle diff HEAD~1
1030/// HEAD --patch`) emitting the form git rejects.
1031///
1032/// The worktree path never sees a *file → dir* `modified` entry here:
1033/// `worktree_modified_type_change` downgrades it to a deletion upstream
1034/// and the directory's new leaves arrive as separate `added` entries from
1035/// status. The state path has no such upstream pass, so both directions
1036/// are handled below.
1037fn expand_type_changes(
1038    repo: &Repository,
1039    from_tree: Option<&Tree>,
1040    to_tree: Option<&Tree>,
1041    changes: Vec<FileChange>,
1042    want_hunks: bool,
1043    unified: usize,
1044) -> Result<Vec<FileChange>> {
1045    let mut output = Vec::with_capacity(changes.len());
1046    for change in changes {
1047        if change.kind != "modified" {
1048            output.push(change);
1049            continue;
1050        }
1051        let old_kind = tree_side_kind(repo, from_tree, &change.path)?;
1052        let new_kind = new_side_kind(repo, to_tree, &change.path)?;
1053        if !is_type_change(old_kind, new_kind) {
1054            output.push(change);
1055            continue;
1056        }
1057
1058        // Delete the old side: every leaf under a directory, else the
1059        // single old object.
1060        if old_kind == SideKind::Dir {
1061            if let Some(from_tree) = from_tree
1062                && let Some(subtree) = dir_subtree_in_tree(repo, from_tree, &change.path)?
1063            {
1064                let mut nested = Vec::new();
1065                collect_subtree_blob_paths(repo, &subtree, &change.path, &mut nested)?;
1066                for nested_path in nested {
1067                    output.push(make_type_change_part(
1068                        repo,
1069                        Some(from_tree),
1070                        to_tree,
1071                        &nested_path,
1072                        DiffKind::Deleted,
1073                        want_hunks,
1074                        unified,
1075                    ));
1076                }
1077            }
1078        } else {
1079            output.push(make_type_change_part(
1080                repo,
1081                from_tree,
1082                to_tree,
1083                &change.path,
1084                DiffKind::Deleted,
1085                want_hunks,
1086                unified,
1087            ));
1088        }
1089
1090        // Add the new side: every leaf under a directory, else the single
1091        // new object. A new-side directory only occurs in the state path
1092        // (the worktree path reclassifies file→dir upstream), so its
1093        // leaves come from `to_tree`.
1094        if new_kind == SideKind::Dir {
1095            if let Some(to_tree) = to_tree
1096                && let Some(subtree) = dir_subtree_in_tree(repo, to_tree, &change.path)?
1097            {
1098                let mut nested = Vec::new();
1099                collect_subtree_blob_paths(repo, &subtree, &change.path, &mut nested)?;
1100                for nested_path in nested {
1101                    output.push(make_type_change_part(
1102                        repo,
1103                        from_tree,
1104                        Some(to_tree),
1105                        &nested_path,
1106                        DiffKind::Added,
1107                        want_hunks,
1108                        unified,
1109                    ));
1110                }
1111            }
1112        } else {
1113            output.push(make_type_change_part(
1114                repo,
1115                from_tree,
1116                to_tree,
1117                &change.path,
1118                DiffKind::Added,
1119                want_hunks,
1120                unified,
1121            ));
1122        }
1123    }
1124    Ok(output)
1125}
1126
1127fn make_type_change_part(
1128    repo: &Repository,
1129    from_tree: Option<&Tree>,
1130    to_tree: Option<&Tree>,
1131    path_str: &str,
1132    diff_kind: DiffKind,
1133    want_hunks: bool,
1134    unified: usize,
1135) -> FileChange {
1136    let kind = diff_kind.to_string();
1137    if !want_hunks {
1138        return make_status_only_change(Some(repo), from_tree, to_tree, path_str, &kind);
1139    }
1140    match to_tree {
1141        Some(to_tree) => build_state_change(
1142            repo, from_tree, to_tree, path_str, &kind, diff_kind, unified,
1143        ),
1144        None => build_worktree_change(repo, from_tree, path_str, &kind, diff_kind, unified),
1145    }
1146}
1147
1148/// State-to-state analogue of `build_worktree_change`: both sides come
1149/// from the object store, so the new-side mode and content are read from
1150/// `to_tree` rather than the live worktree.
1151fn build_state_change(
1152    repo: &Repository,
1153    from_tree: Option<&Tree>,
1154    to_tree: &Tree,
1155    path_str: &str,
1156    kind: &str,
1157    diff_kind: DiffKind,
1158    unified: usize,
1159) -> FileChange {
1160    let (old_mode, mode) = change_file_modes(repo, from_tree, Some(to_tree), path_str, kind);
1161    let (lines, eol, binary) = match get_state_diff(repo, from_tree, to_tree, path_str, &diff_kind)
1162    {
1163        Ok((raw, eol)) => (Some(unified_hunks(raw, unified, &eol)), eol, false),
1164        Err(error) if is_binary_diff_error(&error) => (None, FileEolState::default(), true),
1165        Err(_) => (None, FileEolState::default(), false),
1166    };
1167    let symlink = symlink_change_for_paths(
1168        repo,
1169        from_tree,
1170        Some(to_tree),
1171        kind,
1172        path_str,
1173        path_str,
1174        old_mode,
1175        mode,
1176    );
1177    FileChange {
1178        path: path_str.to_string(),
1179        kind: kind.to_string(),
1180        binary: binary && symlink.is_none(),
1181        lines,
1182        eol,
1183        mode,
1184        old_mode,
1185        symlink,
1186        ..Default::default()
1187    }
1188}
1189
1190/// Resolve `path` to its subtree if it names a directory in `tree`,
1191/// descending component by component. Returns `None` for a missing path or
1192/// a blob/symlink leaf.
1193fn dir_subtree_in_tree(repo: &Repository, tree: &Tree, path: &str) -> Result<Option<Tree>> {
1194    let mut current = tree.clone();
1195    let mut parts = path.split('/').peekable();
1196    while let Some(name) = parts.next() {
1197        let Some(entry) = current.get(name) else {
1198            return Ok(None);
1199        };
1200        if !entry.is_tree() {
1201            return Ok(None);
1202        }
1203        let Some(hash) = entry.tree_hash() else {
1204            return Ok(None);
1205        };
1206        let Some(subtree) = repo.store().get_tree(&hash)? else {
1207            return Ok(None);
1208        };
1209        if parts.peek().is_none() {
1210            return Ok(Some(subtree));
1211        }
1212        current = subtree;
1213    }
1214    Ok(None)
1215}
1216
1217/// Collect every blob/symlink leaf path under `subtree`, prefixed with the
1218/// subtree's path, so a dir→file type change can emit a deletion per file.
1219fn collect_subtree_blob_paths(
1220    repo: &Repository,
1221    subtree: &Tree,
1222    prefix: &str,
1223    out: &mut Vec<String>,
1224) -> Result<()> {
1225    for entry in subtree.entries() {
1226        let child_path = format!("{prefix}/{}", entry.name());
1227        if entry.is_tree() {
1228            if let Some(hash) = entry.tree_hash()
1229                && let Some(nested) = repo.store().get_tree(&hash)?
1230            {
1231                collect_subtree_blob_paths(repo, &nested, &child_path, out)?;
1232            }
1233        } else {
1234            out.push(child_path);
1235        }
1236    }
1237    Ok(())
1238}
1239
1240fn head_from_tree(repo: &Repository) -> Result<Option<Tree>> {
1241    let Some(head_id) = repo.head()? else {
1242        return Ok(None);
1243    };
1244    let Some(state) = repo.store().get_state(&head_id)? else {
1245        return Ok(None);
1246    };
1247    Ok(repo.store().get_tree(&state.tree)?)
1248}
1249
1250/// Compute a state-to-state diff payload without printing.
1251///
1252/// Reuses the same line-rendering pipeline as `cmd_diff`'s state-to-state
1253/// path: object-store lookups for both sides, `diff_blobs` for modified
1254/// files, hunk grouping via `unified_hunks`. The result is the same
1255/// `DiffReport` shape that `cmd_diff` serializes, so callers can embed
1256/// it inside their own JSON payload.
1257///
1258/// Used by `heddle merge --with-diff` to surface the diff that would
1259/// land (or just landed) without a separate `heddle diff` invocation.
1260///
1261/// `semantic` requests the semantic change list in addition to the
1262/// line-level hunks. Building with `--features semantic` is required;
1263/// otherwise this errors out the same way `cmd_diff --semantic` does.
1264pub fn compute_state_diff(
1265    repo: &Repository,
1266    from_change_id: &ChangeId,
1267    to_change_id: &ChangeId,
1268    semantic: bool,
1269    unified: usize,
1270) -> Result<DiffReport> {
1271    let from_state = repo.store().get_state(from_change_id)?;
1272    let from_tree = if let Some(ref state) = from_state {
1273        repo.store().get_tree(&state.tree)?
1274    } else {
1275        None
1276    };
1277
1278    let to_state = require_resolved_state(repo, to_change_id)?;
1279    let to_tree = repo
1280        .store()
1281        .get_tree(&to_state.tree)?
1282        .ok_or_else(|| anyhow!("Tree not found for state {}", to_change_id.short()))?;
1283
1284    let from_hash = from_state
1285        .as_ref()
1286        .map(|s| s.tree)
1287        .unwrap_or_else(|| Tree::new().hash());
1288
1289    let semantic_diff_result: Option<SemanticDiffResult> = if semantic {
1290        Some(run_semantic_diff(repo, &from_hash, &to_state.tree)?)
1291    } else {
1292        None
1293    };
1294
1295    let changes: FileChangeSet = if let Some(ref result) = semantic_diff_result {
1296        result.file_changes.clone()
1297    } else {
1298        repo.diff_trees(&from_hash, &to_state.tree)?
1299    };
1300
1301    let file_changes: Vec<FileChange> = changes
1302        .iter()
1303        .map(|change| {
1304            build_state_change(
1305                repo,
1306                from_tree.as_ref(),
1307                &to_tree,
1308                &change.path,
1309                &change.kind.to_string(),
1310                change.kind,
1311                unified,
1312            )
1313        })
1314        .collect();
1315    let file_changes = sort_changes_by_path(file_changes);
1316    let file_changes = expand_type_changes(
1317        repo,
1318        from_tree.as_ref(),
1319        Some(&to_tree),
1320        file_changes,
1321        true,
1322        unified,
1323    )?;
1324    let file_changes = detect_clear_renames(
1325        repo,
1326        from_tree.as_ref(),
1327        Some(&to_tree),
1328        file_changes,
1329        true,
1330        unified,
1331    )?;
1332
1333    let semantic_changes = semantic_diff_result.map(|r| {
1334        r.changes
1335            .into_iter()
1336            .map(SemanticChangeEntry::from)
1337            .collect()
1338    });
1339
1340    let mut output = DiffReport::new(
1341        Some(from_change_id.short()),
1342        Some(to_change_id.short()),
1343        file_changes,
1344        semantic_changes,
1345        None,
1346        None,
1347    );
1348    populate_patch_text(&mut output);
1349    Ok(output)
1350}
1351
1352/// Compute a diff from an existing state to an in-memory tree.
1353///
1354/// Merge preview uses this for clean 3-way previews: the tree that would
1355/// land has been computed, but no state has been committed yet. The top
1356/// tree is installed in the object store so the existing semantic and
1357/// rename-aware diff pipeline can address it by hash.
1358pub fn compute_tree_diff(
1359    repo: &Repository,
1360    from_change_id: &ChangeId,
1361    to_tree: &Tree,
1362    to_label: impl Into<String>,
1363    semantic: bool,
1364    unified: usize,
1365) -> Result<DiffReport> {
1366    let from_state = repo.store().get_state(from_change_id)?;
1367    let from_tree = if let Some(ref state) = from_state {
1368        repo.store().get_tree(&state.tree)?
1369    } else {
1370        None
1371    };
1372    let from_hash = from_state
1373        .as_ref()
1374        .map(|s| s.tree)
1375        .unwrap_or_else(|| Tree::new().hash());
1376
1377    let to_hash = repo.store().put_tree(to_tree)?;
1378
1379    let semantic_diff_result: Option<SemanticDiffResult> = if semantic {
1380        Some(run_semantic_diff(repo, &from_hash, &to_hash)?)
1381    } else {
1382        None
1383    };
1384
1385    let changes: FileChangeSet = if let Some(ref result) = semantic_diff_result {
1386        result.file_changes.clone()
1387    } else {
1388        repo.diff_trees(&from_hash, &to_hash)?
1389    };
1390
1391    let file_changes: Vec<FileChange> = changes
1392        .iter()
1393        .map(|change| {
1394            build_state_change(
1395                repo,
1396                from_tree.as_ref(),
1397                to_tree,
1398                &change.path,
1399                &change.kind.to_string(),
1400                change.kind,
1401                unified,
1402            )
1403        })
1404        .collect();
1405    let file_changes = sort_changes_by_path(file_changes);
1406    let file_changes = expand_type_changes(
1407        repo,
1408        from_tree.as_ref(),
1409        Some(to_tree),
1410        file_changes,
1411        true,
1412        unified,
1413    )?;
1414    let file_changes = detect_clear_renames(
1415        repo,
1416        from_tree.as_ref(),
1417        Some(to_tree),
1418        file_changes,
1419        true,
1420        unified,
1421    )?;
1422
1423    let semantic_changes = semantic_diff_result.map(|r| {
1424        r.changes
1425            .into_iter()
1426            .map(SemanticChangeEntry::from)
1427            .collect()
1428    });
1429
1430    let mut output = DiffReport::new(
1431        Some(from_change_id.short()),
1432        Some(to_label.into()),
1433        file_changes,
1434        semantic_changes,
1435        None,
1436        None,
1437    );
1438    populate_patch_text(&mut output);
1439    Ok(output)
1440}
1441
1442fn strip_line_hunks(changes: Vec<FileChange>) -> Vec<FileChange> {
1443    changes
1444        .into_iter()
1445        .map(|mut change| {
1446            change.lines = None;
1447            change
1448        })
1449        .collect()
1450}
1451
1452fn unified_hunks(lines: Vec<LineDiff>, context: usize, eol: &FileEolState) -> Vec<LineDiff> {
1453    if lines.is_empty() {
1454        return lines;
1455    }
1456    if !lines.iter().any(|line| line.prefix != " ") {
1457        // No `+`/`-` lines. The only way an all-context diff is still a
1458        // real change is a trailing-newline-only edit (`hello\n` <->
1459        // `hello`): `diff_blobs` strips terminators, so the changed tail
1460        // line collapses to shared context. Synthesize a single tail
1461        // hunk so the renderer can split it and attach the
1462        // `\ No newline at end of file` marker. Otherwise it's a genuine
1463        // no-op — return the lines untouched (no hunk header).
1464        if eol.old_has_final_newline == eol.new_has_final_newline {
1465            return lines;
1466        }
1467        return eol_only_tail_hunk(lines, context);
1468    }
1469
1470    let mut ranges = Vec::<(usize, usize)>::new();
1471    let mut cursor = 0usize;
1472    while cursor < lines.len() {
1473        while cursor < lines.len() && lines[cursor].prefix == " " {
1474            cursor += 1;
1475        }
1476        if cursor >= lines.len() {
1477            break;
1478        }
1479
1480        let start = cursor.saturating_sub(context);
1481        while cursor < lines.len() && lines[cursor].prefix != " " {
1482            cursor += 1;
1483        }
1484        let mut end = (cursor + context).min(lines.len());
1485
1486        while cursor < lines.len() && lines[cursor].prefix == " " && cursor < end {
1487            cursor += 1;
1488        }
1489        while cursor < lines.len() && lines[cursor].prefix != " " {
1490            end = (cursor + 1 + context).min(lines.len());
1491            cursor += 1;
1492        }
1493
1494        if let Some((_, previous_end)) = ranges.last_mut()
1495            && start <= *previous_end
1496        {
1497            *previous_end = end;
1498            continue;
1499        }
1500        ranges.push((start, end));
1501    }
1502
1503    let mut output = Vec::new();
1504    for (start, end) in ranges {
1505        let (old_start, old_len, new_start, new_len) = hunk_span(&lines, start, end);
1506        output.push(LineDiff {
1507            prefix: "@".to_string(),
1508            content: format!("@ -{},{} +{},{} @@", old_start, old_len, new_start, new_len),
1509            old_line: None,
1510            new_line: None,
1511        });
1512        // Emit the hunk body UNTRIMMED. Decoration trimming drops a real
1513        // `+` line, which is a pretty-display nicety only — applying it
1514        // here would desync the body from the `@@` header counts computed
1515        // above (via `hunk_span`) and corrupt the `--patch`/JSON line
1516        // model so `git apply` rejects or mis-reconstructs the file (cid
1517        // 3320364905). The trim now lives in `print_diff` alone, via
1518        // `trim_added_decorations_for_display`.
1519        output.extend_from_slice(&lines[start..end]);
1520    }
1521    output
1522}
1523
1524/// Build a single hunk anchored on the file's last line for a
1525/// trailing-newline-only change. The body is `context` lines plus the
1526/// tail (all shared context); the renderer (`render_patch_hunks`) splits
1527/// the tail into a `-`/`+` pair and attaches the no-newline marker to
1528/// the side that lacks the terminator. Mirrors `git diff`'s hunk for an
1529/// EOL-only edit (e.g. `@@ -2,4 +2,4 @@` for a 5-line file at context 3).
1530fn eol_only_tail_hunk(lines: Vec<LineDiff>, context: usize) -> Vec<LineDiff> {
1531    let end = lines.len();
1532    let start = end.saturating_sub(context + 1);
1533    let (old_start, old_len, new_start, new_len) = hunk_span(&lines, start, end);
1534    let mut output = Vec::with_capacity(end - start + 1);
1535    output.push(LineDiff {
1536        prefix: "@".to_string(),
1537        content: format!("@ -{},{} +{},{} @@", old_start, old_len, new_start, new_len),
1538        old_line: None,
1539        new_line: None,
1540    });
1541    output.extend_from_slice(&lines[start..end]);
1542    output
1543}
1544
1545/// Pretty-display transform: drop a leading added "decoration" line
1546/// (`#[...]`, `///`, `@`, etc.) when an identical context line already
1547/// follows the inserted block, so the diff anchors on the existing item
1548/// rather than showing a duplicated attribute.
1549///
1550/// DISPLAY ONLY. This drops a real `+` line, so it must never reach the
1551/// `--patch`/JSON line model — the dropped line is a genuine change and
1552/// omitting it desyncs the `@@` header counts, corrupting `git apply`
1553/// (cid 3320364905). `unified_hunks` keeps the canonical (untrimmed)
1554/// hunk body; `print_diff` calls this purely for human-facing rendering.
1555///
1556/// Applied per hunk body (segmented on the `@` header lines) so the
1557/// decoration match can never cross a hunk boundary into an unrelated
1558/// context line.
1559pub fn trim_added_decorations_for_display(lines: &[LineDiff]) -> Vec<LineDiff> {
1560    let mut output = Vec::with_capacity(lines.len());
1561    let mut body_start = 0usize;
1562    for (index, line) in lines.iter().enumerate() {
1563        if line.prefix == "@" {
1564            if body_start < index {
1565                output.extend(trim_trailing_added_decorations(&lines[body_start..index]));
1566            }
1567            output.push(line.clone());
1568            body_start = index + 1;
1569        }
1570    }
1571    if body_start < lines.len() {
1572        output.extend(trim_trailing_added_decorations(&lines[body_start..]));
1573    }
1574    output
1575}
1576
1577fn trim_trailing_added_decorations(lines: &[LineDiff]) -> Vec<LineDiff> {
1578    let mut trimmed = Vec::with_capacity(lines.len());
1579    let mut index = 0usize;
1580    while index < lines.len() {
1581        if lines[index].prefix == "+"
1582            && is_visual_decoration_line(&lines[index].content)
1583            && let Some(next_context) = next_context_line(lines, index + 1)
1584            && next_context.content == lines[index].content
1585        {
1586            let added_block_has_code = lines[index + 1..next_context.index]
1587                .iter()
1588                .any(|line| line.prefix == "+" && !is_blank_or_visual_decoration(&line.content));
1589            if added_block_has_code {
1590                index += 1;
1591                continue;
1592            }
1593        }
1594        trimmed.push(lines[index].clone());
1595        index += 1;
1596    }
1597    trimmed
1598}
1599
1600struct IndexedLine<'a> {
1601    index: usize,
1602    content: &'a str,
1603}
1604
1605fn next_context_line(lines: &[LineDiff], start: usize) -> Option<IndexedLine<'_>> {
1606    lines[start..]
1607        .iter()
1608        .enumerate()
1609        .find(|(_, line)| line.prefix == " ")
1610        .map(|(offset, line)| IndexedLine {
1611            index: start + offset,
1612            content: &line.content,
1613        })
1614}
1615
1616fn is_blank_or_visual_decoration(line: &str) -> bool {
1617    line.trim().is_empty() || is_visual_decoration_line(line)
1618}
1619
1620fn is_visual_decoration_line(line: &str) -> bool {
1621    let trimmed = line.trim_start();
1622    trimmed.starts_with("#[")
1623        || trimmed.starts_with("#![")
1624        || trimmed.starts_with('@')
1625        || trimmed.starts_with("///")
1626        || trimmed.starts_with("//!")
1627}
1628
1629fn hunk_span(lines: &[LineDiff], start: usize, end: usize) -> (usize, usize, usize, usize) {
1630    let old_before = lines[..start]
1631        .iter()
1632        .filter(|line| line.prefix != "+")
1633        .count();
1634    let new_before = lines[..start]
1635        .iter()
1636        .filter(|line| line.prefix != "-")
1637        .count();
1638    let old_len = lines[start..end]
1639        .iter()
1640        .filter(|line| line.prefix != "+")
1641        .count();
1642    let new_len = lines[start..end]
1643        .iter()
1644        .filter(|line| line.prefix != "-")
1645        .count();
1646
1647    let old_start = if old_len == 0 {
1648        old_before
1649    } else {
1650        old_before + 1
1651    };
1652    let new_start = if new_len == 0 {
1653        new_before
1654    } else {
1655        new_before + 1
1656    };
1657    (old_start, old_len, new_start, new_len)
1658}
1659
1660fn collect_file_context(
1661    repo: &Repository,
1662    state: &State,
1663    changes: &FileChangeSet,
1664) -> Result<Vec<FileContextEntry>> {
1665    let Some(context_root) = &state.context else {
1666        return Ok(Vec::new());
1667    };
1668
1669    let mut entries = Vec::new();
1670    for change in changes {
1671        let target = ContextTarget::file(change.path.clone())?;
1672        let Some(blob) = repo.get_context_blob(context_root, &target)? else {
1673            continue;
1674        };
1675        let annotations = blob
1676            .annotations
1677            .iter()
1678            .filter(|annotation| annotation.status == AnnotationStatus::Active)
1679            .filter_map(|annotation| {
1680                annotation
1681                    .current_revision()
1682                    .map(|revision| ContextSnippet {
1683                        annotation_id: annotation.annotation_id.clone(),
1684                        kind: revision.kind.to_string(),
1685                        content: summarize_context(&revision.content),
1686                        revision_count: annotation.revisions.len(),
1687                    })
1688            })
1689            .collect::<Vec<_>>();
1690        if !annotations.is_empty() {
1691            entries.push(FileContextEntry {
1692                path: change.path.clone(),
1693                annotations,
1694            });
1695        }
1696    }
1697    Ok(entries)
1698}
1699
1700fn collect_state_guidance(repo: &Repository, state: &State) -> Result<Vec<ContextSnippet>> {
1701    let Some(context_root) = &state.context else {
1702        return Ok(Vec::new());
1703    };
1704    let target = ContextTarget::state(state.change_id);
1705    let Some(blob) = repo.get_context_blob(context_root, &target)? else {
1706        return Ok(Vec::new());
1707    };
1708    Ok(blob
1709        .annotations
1710        .iter()
1711        .filter(|annotation| annotation.status == AnnotationStatus::Active)
1712        .filter_map(|annotation| {
1713            annotation
1714                .current_revision()
1715                .map(|revision| ContextSnippet {
1716                    annotation_id: annotation.annotation_id.clone(),
1717                    kind: revision.kind.to_string(),
1718                    content: summarize_context(&revision.content),
1719                    revision_count: annotation.revisions.len(),
1720                })
1721        })
1722        .collect())
1723}
1724
1725fn summarize_context(content: &str) -> String {
1726    let first_line = content
1727        .lines()
1728        .find(|line| !line.trim().is_empty())
1729        .unwrap_or("");
1730    let char_count = first_line.chars().count();
1731    if char_count <= 88 {
1732        first_line.to_string()
1733    } else {
1734        format!(
1735            "{}...",
1736            first_line.chars().take(85).collect::<String>()
1737        )
1738    }
1739}
1740
1741fn get_worktree_diff(
1742    repo: &Repository,
1743    from_tree: Option<&Tree>,
1744    path: &str,
1745    kind: &DiffKind,
1746) -> Result<(Vec<LineDiff>, FileEolState)> {
1747    let worktree_path = repo.root().join(path);
1748
1749    match kind {
1750        DiffKind::Added => {
1751            let new_blob = read_worktree_blob_for_diff(&worktree_path)?;
1752            let eol = eol_for_added(&new_blob);
1753            Ok((number_lines(blob_lines(&new_blob, "+")?), eol))
1754        }
1755        DiffKind::Deleted => {
1756            // `find_blob_in_tree` walks the path component by component;
1757            // a root-only `tree.get(path)` misses nested deletions like
1758            // `src/nested/file.txt` and would drop the deletion hunk.
1759            if let Some(tree) = from_tree
1760                && let Some(blob) = find_blob_in_tree(repo, tree, path)?
1761            {
1762                let eol = eol_for_deleted(&blob);
1763                return Ok((number_lines(blob_lines(&blob, "-")?), eol));
1764            }
1765            Ok((vec![], FileEolState::default()))
1766        }
1767        DiffKind::Modified => {
1768            let new_blob = read_worktree_blob_for_diff(&worktree_path)?;
1769
1770            if let Some(tree) = from_tree
1771                && let Some(old_blob) = find_blob_in_tree(repo, tree, path)?
1772            {
1773                return modified_blob_hunks(&old_blob, &new_blob);
1774            }
1775
1776            let eol = eol_for_added(&new_blob);
1777            Ok((number_lines(blob_lines(&new_blob, "+")?), eol))
1778        }
1779        DiffKind::Unchanged => Ok((Vec::new(), FileEolState::default())),
1780    }
1781}
1782
1783/// A tracked file replaced by a directory (`foo` → `foo/bar`) surfaces in
1784/// heddle's worktree status as a `modified` path whose worktree side is
1785/// now a directory. `git diff` represents that as a *deletion* of the file
1786/// (the directory's new files arrive as separate `added` entries), so we
1787/// reclassify the modify to a deletion: otherwise `read_worktree_blob_for_diff`
1788/// fails reading the directory, the change collapses to `lines: None`, and
1789/// the renderer drops it — leaving `git apply` unable to create `foo/bar`
1790/// over the still-present `foo`. Returns the effective `(kind, DiffKind)`.
1791///
1792/// Classification goes through `worktree_side_kind` (`symlink_metadata`, no
1793/// link following), so only a *real* directory triggers the downgrade. A
1794/// regular file replaced by a symlink *pointing at* a directory reports
1795/// `Symlink`, stays a `modified` entry, and is split into delete+add by
1796/// `expand_type_changes` — `Path::is_dir()` would have followed the link,
1797/// misread it as a directory, and dropped the `120000` add (cid 3320033195).
1798fn worktree_modified_type_change(
1799    repo_root: &Path,
1800    path: &str,
1801    diff_kind: DiffKind,
1802) -> Option<(&'static str, DiffKind)> {
1803    if matches!(diff_kind, DiffKind::Modified)
1804        && worktree_side_kind(&repo_root.join(path)) == SideKind::Dir
1805    {
1806        Some(("deleted", DiffKind::Deleted))
1807    } else {
1808        None
1809    }
1810}
1811
1812fn read_worktree_blob_for_diff(path: &std::path::Path) -> Result<Blob> {
1813    let metadata = std::fs::symlink_metadata(path)?;
1814    if metadata.file_type().is_symlink() {
1815        let target = std::fs::read_link(path)?;
1816        return Ok(Blob::new(objects::util::symlink_target_bytes(&target)));
1817    }
1818    Ok(Blob::new(std::fs::read(path)?))
1819}
1820
1821fn is_symlink_mode(mode: Option<FileMode>) -> bool {
1822    matches!(mode, Some(FileMode::Symlink))
1823}
1824
1825/// Whether each side of a change is a symlink, resolved per `kind`. The mode
1826/// fields' meaning is kind-dependent: an `added`/`deleted` change carries the
1827/// present side's mode in `mode` (with `old_mode == None` even for a delete,
1828/// where `mode` is the *deleted* file's mode — see `change_file_modes`),
1829/// while a `modified`/`renamed` change carries `old_mode` + `mode` per side.
1830/// Reading `old_mode`/`mode` blindly would miss a deleted symlink (whose
1831/// old-side mode lives in `mode`, not `old_mode`).
1832fn symlink_sides(kind: &str, old_mode: Option<FileMode>, mode: Option<FileMode>) -> (bool, bool) {
1833    match kind {
1834        "added" => (false, is_symlink_mode(mode)),
1835        "deleted" => (is_symlink_mode(mode), false),
1836        _ => (is_symlink_mode(old_mode), is_symlink_mode(mode)),
1837    }
1838}
1839
1840/// The single byte-preserving extraction of symlink target content for one
1841/// change. A symlink's git blob *is* its raw target bytes, so the renderer
1842/// reconstructs the patch hunk from these directly — never through
1843/// `content_str()`/`diff_blobs` (which require UTF-8) and never as a
1844/// placeholder-binary stanza (which `git apply` rejects for a `120000`
1845/// entry). A side's bytes are taken only when that side's mode is a symlink:
1846/// `old`/`new` mirror the change's two sides (an add has no old side, a
1847/// delete no new side, a target-edit/rename both). Returns `None` when
1848/// neither side is a symlink, leaving the change to render as ordinary text.
1849fn make_symlink_change(old: Option<Vec<u8>>, new: Option<Vec<u8>>) -> Option<SymlinkChange> {
1850    (old.is_some() || new.is_some()).then_some(SymlinkChange { old, new })
1851}
1852
1853/// Build the symlink content from blobs already in hand (the plain-Git path,
1854/// which loads both sides up front). `blob.content()` is the raw target bytes
1855/// for a symlink entry, so no lossy conversion ever occurs.
1856fn symlink_change_from_blobs(
1857    kind: &str,
1858    old_blob: Option<&Blob>,
1859    old_mode: Option<FileMode>,
1860    new_blob: Option<&Blob>,
1861    mode: Option<FileMode>,
1862) -> Option<SymlinkChange> {
1863    let (old_is_link, new_is_link) = symlink_sides(kind, old_mode, mode);
1864    let old = old_is_link
1865        .then(|| old_blob.map(|blob| blob.content().to_vec()))
1866        .flatten();
1867    let new = new_is_link
1868        .then(|| new_blob.map(|blob| blob.content().to_vec()))
1869        .flatten();
1870    make_symlink_change(old, new)
1871}
1872
1873/// Build the symlink content for a heddle-overlay change by loading each
1874/// side's blob through the same loaders the hunk path uses
1875/// (`blob_from_tree` for a tree side, `new_blob_for_rename` for the new side,
1876/// which reads the live worktree via `read_worktree_blob_for_diff` when
1877/// `to_tree` is `None`). `to_tree == None` means the new side is the live
1878/// worktree. `old_path`/`new_path` differ only for a rename.
1879#[allow(clippy::too_many_arguments)]
1880fn symlink_change_for_paths(
1881    repo: &Repository,
1882    from_tree: Option<&Tree>,
1883    to_tree: Option<&Tree>,
1884    kind: &str,
1885    old_path: &str,
1886    new_path: &str,
1887    old_mode: Option<FileMode>,
1888    mode: Option<FileMode>,
1889) -> Option<SymlinkChange> {
1890    let (old_is_link, new_is_link) = symlink_sides(kind, old_mode, mode);
1891    let old = old_is_link
1892        .then(|| blob_from_tree(repo, from_tree, old_path).ok().flatten())
1893        .flatten()
1894        .map(|blob| blob.content().to_vec());
1895    let new = new_is_link
1896        .then(|| new_blob_for_rename(repo, to_tree, new_path).ok().flatten())
1897        .flatten()
1898        .map(|blob| blob.content().to_vec());
1899    make_symlink_change(old, new)
1900}
1901fn detect_clear_renames(
1902    repo: &Repository,
1903    from_tree: Option<&Tree>,
1904    to_tree: Option<&Tree>,
1905    changes: Vec<FileChange>,
1906    include_lines: bool,
1907    unified: usize,
1908) -> Result<Vec<FileChange>> {
1909    let deleted = changes
1910        .iter()
1911        .filter(|change| change.kind == "deleted")
1912        .map(|change| change.path.as_str())
1913        .collect::<Vec<_>>();
1914    let added = changes
1915        .iter()
1916        .filter(|change| change.kind == "added")
1917        .map(|change| change.path.as_str())
1918        .collect::<Vec<_>>();
1919    if deleted.is_empty() || added.is_empty() {
1920        return Ok(changes);
1921    }
1922
1923    // Snapshot each side's git mode so a candidate can be rejected when the
1924    // deleted and added sides differ in git *type class* (regular vs
1925    // symlink). git never renames across a type boundary: `git apply`
1926    // rejects a `rename from/to` whose `old mode`/`new mode` cross S_IFMT
1927    // (e.g. `100644` → `120000`). Such a pair must stay a delete + add,
1928    // which the cross-path delete/add rendering already round-trips. A
1929    // regular↔executable move stays *within* the regular class, so it is
1930    // intentionally still collapsible — git emits it as a rename with an
1931    // `old mode`/`new mode` pair that `git apply` accepts.
1932    let deleted_side_modes = changes
1933        .iter()
1934        .filter(|change| change.kind == "deleted")
1935        .map(|change| (change.path.as_str(), change.mode))
1936        .collect::<std::collections::BTreeMap<&str, Option<FileMode>>>();
1937    let added_side_modes = changes
1938        .iter()
1939        .filter(|change| change.kind == "added")
1940        .map(|change| (change.path.as_str(), change.mode))
1941        .collect::<std::collections::BTreeMap<&str, Option<FileMode>>>();
1942
1943    let mut candidates = Vec::new();
1944    for old_path in &deleted {
1945        let Some(old_blob) = blob_from_tree(repo, from_tree, old_path)? else {
1946            continue;
1947        };
1948        for new_path in &added {
1949            // A delete + add at the *same* path is a type change
1950            // (regular ↔ symlink), not a rename — `expand_type_changes`
1951            // emits both halves and collapsing them back into a
1952            // `foo → foo` rename would drop the type swap.
1953            if old_path == new_path {
1954                continue;
1955            }
1956            // A cross-*type* move (regular ↔ symlink) at different paths is
1957            // never a rename either: collapsing it would emit a rename
1958            // header carrying a mismatched `old mode`/`new mode`, which
1959            // `git apply` rejects. Leave the pair as a separate delete +
1960            // add. (Regular↔executable stays compatible — see the
1961            // mode-snapshot comment above.)
1962            if !rename_mode_compatible(
1963                deleted_side_modes.get(old_path).copied().flatten(),
1964                added_side_modes.get(new_path).copied().flatten(),
1965            ) {
1966                continue;
1967            }
1968            let Some(new_blob) = new_blob_for_rename(repo, to_tree, new_path)? else {
1969                continue;
1970            };
1971            let score = rename_similarity(&old_blob, &new_blob);
1972            if score >= 0.75 {
1973                candidates.push((score, (*old_path).to_string(), (*new_path).to_string()));
1974            }
1975        }
1976    }
1977
1978    candidates.sort_by(|left, right| {
1979        right
1980            .0
1981            .total_cmp(&left.0)
1982            .then_with(|| left.1.cmp(&right.1))
1983            .then_with(|| left.2.cmp(&right.2))
1984    });
1985
1986    let mut used_old = BTreeSet::new();
1987    let mut used_new = BTreeSet::new();
1988    let mut renames: Vec<(String, String, f64)> = Vec::new();
1989    for (score, old_path, new_path) in candidates {
1990        if used_old.insert(old_path.clone()) && used_new.insert(new_path.clone()) {
1991            renames.push((old_path, new_path, score));
1992        }
1993    }
1994    if renames.is_empty() {
1995        return Ok(changes);
1996    }
1997
1998    let rename_by_new = renames
1999        .iter()
2000        .map(|(old_path, new_path, score)| (new_path.as_str(), (old_path.as_str(), *score)))
2001        .collect::<std::collections::BTreeMap<_, _>>();
2002    let removed_old = renames
2003        .iter()
2004        .map(|(old_path, _, _)| old_path.as_str())
2005        .collect::<BTreeSet<_>>();
2006    // The deleted entry (whose `mode` carries the rename's *old-side*
2007    // mode) is dropped below, so snapshot old-side modes keyed by path
2008    // first. A rename paired with a chmod/type change (`old.sh` -> `new.sh`
2009    // made executable) needs both modes on the collapsed `renamed` change
2010    // so the renderer can emit `old mode`/`new mode`.
2011    let deleted_modes = changes
2012        .iter()
2013        .filter(|change| change.kind == "deleted")
2014        .map(|change| (change.path.clone(), change.mode))
2015        .collect::<std::collections::BTreeMap<String, Option<FileMode>>>();
2016
2017    let mut output = Vec::with_capacity(changes.len() - renames.len());
2018    for mut change in changes {
2019        if change.kind == "deleted" && removed_old.contains(change.path.as_str()) {
2020            continue;
2021        }
2022        if change.kind == "added"
2023            && let Some((old_path, score)) = rename_by_new.get(change.path.as_str()).copied()
2024        {
2025            let (lines, eol) = if include_lines {
2026                match rename_lines(repo, from_tree, to_tree, old_path, &change.path, unified) {
2027                    Ok(Some((lines, eol))) => (Some(lines), eol),
2028                    Ok(None) => (None, FileEolState::default()),
2029                    Err(error) if is_binary_diff_error(&error) => {
2030                        change.binary = true;
2031                        (None, FileEolState::default())
2032                    }
2033                    Err(error) => return Err(error),
2034                }
2035            } else {
2036                (None, FileEolState::default())
2037            };
2038            change.kind = "renamed".to_string();
2039            change.old_path = Some(old_path.to_string());
2040            change.similarity_score = Some(score);
2041            change.lines = lines;
2042            change.eol = eol;
2043            // `change.mode` already holds the added (new) side mode; pull
2044            // the deleted (old) side mode off the snapshot so a rename+chmod
2045            // surfaces both modes in the patch headers.
2046            change.old_mode = deleted_modes.get(old_path).copied().flatten();
2047            // A symlink↔symlink rename (the only symlink move that collapses;
2048            // `rename_mode_compatible` keeps regular↔symlink as delete+add)
2049            // must carry byte-preserving target content so the renderer emits
2050            // a target-bytes hunk for a non-UTF-8 link instead of a binary
2051            // marker. Load both sides through the same loaders the rename
2052            // similarity used.
2053            change.symlink = symlink_change_for_paths(
2054                repo,
2055                from_tree,
2056                to_tree,
2057                "renamed",
2058                old_path,
2059                &change.path,
2060                change.old_mode,
2061                change.mode,
2062            );
2063            if change.symlink.is_some() {
2064                change.binary = false;
2065            }
2066            // The original `added` carried a stat-path tally that
2067            // counted the file as a pure insertion; after we collapse
2068            // the (added, deleted) pair into one rename, those line
2069            // counts double-count the move. Drop them so DiffStats
2070            // falls back to walking the (possibly None) `lines`
2071            // payload chosen above.
2072            change.line_counts = None;
2073        }
2074        output.push(change);
2075    }
2076    Ok(output)
2077}
2078
2079fn rename_lines(
2080    repo: &Repository,
2081    from_tree: Option<&Tree>,
2082    to_tree: Option<&Tree>,
2083    old_path: &str,
2084    new_path: &str,
2085    unified: usize,
2086) -> Result<Option<(Vec<LineDiff>, FileEolState)>> {
2087    let Some(old_blob) = blob_from_tree(repo, from_tree, old_path)? else {
2088        return Ok(None);
2089    };
2090    let Some(new_blob) = new_blob_for_rename(repo, to_tree, new_path)? else {
2091        return Ok(None);
2092    };
2093    ensure_text_diffable(&old_blob)?;
2094    ensure_text_diffable(&new_blob)?;
2095    let eol = eol_for_modified(&old_blob, &new_blob);
2096    let diff = diff_blobs(&old_blob, &new_blob);
2097    let lines = diff
2098        .iter()
2099        .map(|line| LineDiff::new(line.prefix(), line.content()))
2100        .collect();
2101    Ok(Some((
2102        unified_hunks(number_lines(lines), unified, &eol),
2103        eol,
2104    )))
2105}
2106
2107fn blob_from_tree(repo: &Repository, tree: Option<&Tree>, path: &str) -> Result<Option<Blob>> {
2108    let Some(tree) = tree else {
2109        return Ok(None);
2110    };
2111    find_blob_in_tree(repo, tree, path)
2112}
2113
2114fn new_blob_for_rename(
2115    repo: &Repository,
2116    to_tree: Option<&Tree>,
2117    path: &str,
2118) -> Result<Option<Blob>> {
2119    if let Some(tree) = to_tree {
2120        return find_blob_in_tree(repo, tree, path);
2121    }
2122
2123    // Rename similarity must compare the bytes git would store as the blob,
2124    // per entry type: a regular file → its content, a symlink → its target
2125    // *path* bytes. `read_worktree_blob_for_diff` branches on the entry type
2126    // (`read_link` for symlinks, `read` for files) — a blind `std::fs::read`
2127    // here would *follow* a symlink and score the dereferenced target file's
2128    // content, collapsing a symlink move into a wrong-target rename whose
2129    // patch leaves the old link target after `git apply` (cid 3322115749).
2130    let worktree_path = repo.root().join(path);
2131    match std::fs::symlink_metadata(&worktree_path) {
2132        Ok(_) => Ok(Some(read_worktree_blob_for_diff(&worktree_path)?)),
2133        Err(error) if error.kind() == std::io::ErrorKind::NotFound => Ok(None),
2134        Err(error) => Err(error.into()),
2135    }
2136}
2137
2138/// Whether a delete + add can be collapsed into a single `renamed` change
2139/// given the two sides' git file modes. git only renames *within* one
2140/// S_IFMT type class: regular files (`100644`) and executables (`100755`)
2141/// share the regular-file type, so a move between them renders as a rename
2142/// with an `old mode`/`new mode` pair that `git apply` accepts; a symlink
2143/// (`120000`) is a distinct type, so a regular↔symlink move is never a
2144/// rename — `git apply` rejects a `rename from/to` whose `new mode
2145/// (120000)` doesn't match its `old mode (100644)`. A missing mode falls
2146/// back to the regular-file default the renderer also assumes.
2147fn rename_mode_compatible(old: Option<FileMode>, new: Option<FileMode>) -> bool {
2148    let is_symlink = |mode: Option<FileMode>| matches!(mode, Some(FileMode::Symlink));
2149    is_symlink(old) == is_symlink(new)
2150}
2151
2152fn rename_similarity(old_blob: &Blob, new_blob: &Blob) -> f64 {
2153    if old_blob.content() == new_blob.content() {
2154        return 1.0;
2155    }
2156    let (Some(old_text), Some(new_text)) = (old_blob.content_str(), new_blob.content_str()) else {
2157        return 0.0;
2158    };
2159    if old_text.chars().any(is_terminal_hostile_control)
2160        || new_text.chars().any(is_terminal_hostile_control)
2161    {
2162        return 0.0;
2163    }
2164    let old_lines = old_text.lines().collect::<Vec<_>>();
2165    let new_lines = new_text.lines().collect::<Vec<_>>();
2166    if old_lines.is_empty() || new_lines.is_empty() {
2167        return 0.0;
2168    }
2169    let shared = lcs_len(&old_lines, &new_lines);
2170    (shared * 2) as f64 / (old_lines.len() + new_lines.len()) as f64
2171}
2172
2173fn lcs_len(left: &[&str], right: &[&str]) -> usize {
2174    let mut previous = vec![0usize; right.len() + 1];
2175    let mut current = vec![0usize; right.len() + 1];
2176    for left_line in left {
2177        for (index, right_line) in right.iter().enumerate() {
2178            current[index + 1] = if left_line == right_line {
2179                previous[index] + 1
2180            } else {
2181                previous[index + 1].max(current[index])
2182            };
2183        }
2184        std::mem::swap(&mut previous, &mut current);
2185        current.fill(0);
2186    }
2187    previous[right.len()]
2188}
2189
2190/// Render line-level diff for a path between two stored states.
2191///
2192/// Sister of `get_worktree_diff`, but every blob is loaded from the
2193/// heddle object store via `find_blob_in_tree` rather than from the
2194/// live filesystem — which is why this can run from anywhere (not just
2195/// the current worktree) and why it Just Works for `heddle diff
2196/// <thread-a> <thread-b>`.
2197///
2198/// Returns the same `Vec<LineDiff>` shape `print_diff` already knows
2199/// how to render, so the only renderer change for state-to-state diffs
2200/// is "stop falling through to the binary-file catch-all."
2201fn get_state_diff(
2202    repo: &Repository,
2203    from_tree: Option<&Tree>,
2204    to_tree: &Tree,
2205    path: &str,
2206    kind: &DiffKind,
2207) -> Result<(Vec<LineDiff>, FileEolState)> {
2208    match kind {
2209        DiffKind::Added => {
2210            let Some(new_blob) = find_blob_in_tree(repo, to_tree, path)? else {
2211                return Ok((Vec::new(), FileEolState::default()));
2212            };
2213            let eol = eol_for_added(&new_blob);
2214            Ok((number_lines(blob_lines(&new_blob, "+")?), eol))
2215        }
2216        DiffKind::Deleted => {
2217            let Some(tree) = from_tree else {
2218                return Ok((Vec::new(), FileEolState::default()));
2219            };
2220            let Some(old_blob) = find_blob_in_tree(repo, tree, path)? else {
2221                return Ok((Vec::new(), FileEolState::default()));
2222            };
2223            let eol = eol_for_deleted(&old_blob);
2224            Ok((number_lines(blob_lines(&old_blob, "-")?), eol))
2225        }
2226        DiffKind::Modified => {
2227            let Some(new_blob) = find_blob_in_tree(repo, to_tree, path)? else {
2228                return Ok((Vec::new(), FileEolState::default()));
2229            };
2230            if let Some(tree) = from_tree
2231                && let Some(old_blob) = find_blob_in_tree(repo, tree, path)?
2232            {
2233                return modified_blob_hunks(&old_blob, &new_blob);
2234            }
2235            // No corresponding blob in `from_tree` — render as all-new.
2236            let eol = eol_for_added(&new_blob);
2237            Ok((number_lines(blob_lines(&new_blob, "+")?), eol))
2238        }
2239        DiffKind::Unchanged => Ok((Vec::new(), FileEolState::default())),
2240    }
2241}
2242
2243/// Trailing-newline state for a one-sided change (added or deleted).
2244/// The absent side is reported as "has newline" so the patch renderer
2245/// never tries to emit a marker for content that doesn't exist.
2246fn eol_for_added(new_blob: &Blob) -> FileEolState {
2247    let (new_eol, new_count) = blob_eol_meta(new_blob);
2248    FileEolState {
2249        old_has_final_newline: true,
2250        new_has_final_newline: new_eol,
2251        old_line_count: 0,
2252        new_line_count: new_count,
2253    }
2254}
2255
2256fn eol_for_deleted(old_blob: &Blob) -> FileEolState {
2257    let (old_eol, old_count) = blob_eol_meta(old_blob);
2258    FileEolState {
2259        old_has_final_newline: old_eol,
2260        new_has_final_newline: true,
2261        old_line_count: old_count,
2262        new_line_count: 0,
2263    }
2264}
2265
2266fn eol_for_modified(old_blob: &Blob, new_blob: &Blob) -> FileEolState {
2267    let (old_eol, old_count) = blob_eol_meta(old_blob);
2268    let (new_eol, new_count) = blob_eol_meta(new_blob);
2269    FileEolState {
2270        old_has_final_newline: old_eol,
2271        new_has_final_newline: new_eol,
2272        old_line_count: old_count,
2273        new_line_count: new_count,
2274    }
2275}
2276
2277/// `diff_blobs` strips line terminators before the renderer sees the
2278/// hunks, so the per-side trailing-newline state has to come from the
2279/// raw blob bytes. Empty blobs are treated as "no marker needed":
2280/// there's nothing to lack a newline.
2281fn blob_eol_meta(blob: &Blob) -> (bool, usize) {
2282    let content = blob.content();
2283    if content.is_empty() {
2284        return (true, 0);
2285    }
2286    let has_eol = content.ends_with(b"\n");
2287    let line_count = blob
2288        .content_str()
2289        .map(|text| text.lines().count())
2290        .unwrap_or(0);
2291    (has_eol, line_count)
2292}
2293
2294fn blob_lines(blob: &Blob, prefix: &str) -> Result<Vec<LineDiff>> {
2295    let text = text_diff_content(blob)?;
2296    Ok(text
2297        .lines()
2298        .map(|line| LineDiff::new(prefix, line))
2299        .collect())
2300}
2301
2302/// Compute the `(lines, eol)` for a `modified` pair of blobs, applying the
2303/// identical-content short-circuit shared by every diff-rendering path.
2304///
2305/// When the two blobs carry identical bytes the change is a pure mode flip
2306/// (chmod / exec-bit), even on a binary file: returning an empty body routes
2307/// the renderer through the `old mode`/`new mode` header instead of the
2308/// binary-refusal branch, so a binary chmod-only round-trips through `git
2309/// apply` rather than emitting a placeholder binary patch git rejects.
2310///
2311/// Both heddle-backed paths (`get_worktree_diff`, `get_state_diff`) and the
2312/// plain-Git fast path (`compute_plain_git_hunks`) call this, so the
2313/// short-circuit + text-diff decision lives in exactly one place — a binary
2314/// chmod-only behaves identically regardless of backend (cid 3320033191).
2315fn modified_blob_hunks(old: &Blob, new: &Blob) -> Result<(Vec<LineDiff>, FileEolState)> {
2316    if old.content() == new.content() {
2317        return Ok((Vec::new(), FileEolState::default()));
2318    }
2319    ensure_text_diffable(old)?;
2320    ensure_text_diffable(new)?;
2321    let eol = eol_for_modified(old, new);
2322    let diff = diff_blobs(old, new);
2323    let lines = diff
2324        .iter()
2325        .map(|l| LineDiff::new(l.prefix(), l.content()))
2326        .collect();
2327    Ok((number_lines(lines), eol))
2328}
2329
2330fn ensure_text_diffable(blob: &Blob) -> Result<()> {
2331    text_diff_content(blob).map(|_| ())
2332}
2333
2334fn text_diff_content(blob: &Blob) -> Result<&str> {
2335    let Some(text) = blob.content_str() else {
2336        return Err(anyhow!(BINARY_DIFF_ERROR));
2337    };
2338    if text.chars().any(is_terminal_hostile_control) {
2339        return Err(anyhow!(BINARY_DIFF_ERROR));
2340    }
2341    Ok(text)
2342}
2343
2344fn is_binary_diff_error(error: &anyhow::Error) -> bool {
2345    error.to_string() == BINARY_DIFF_ERROR
2346}
2347
2348fn is_terminal_hostile_control(ch: char) -> bool {
2349    ch.is_control() && ch != '\n' && ch != '\t'
2350}
2351
2352fn number_lines(lines: Vec<LineDiff>) -> Vec<LineDiff> {
2353    let mut old_line = 1usize;
2354    let mut new_line = 1usize;
2355
2356    lines
2357        .into_iter()
2358        .map(|line| {
2359            let old = if line.prefix != "+" {
2360                let current = Some(old_line);
2361                old_line += 1;
2362                current
2363            } else {
2364                None
2365            };
2366            let new = if line.prefix != "-" {
2367                let current = Some(new_line);
2368                new_line += 1;
2369                current
2370            } else {
2371                None
2372            };
2373            LineDiff::with_lines(line.prefix, line.content, old, new)
2374        })
2375        .collect()
2376}
2377
2378fn find_blob_in_tree(repo: &Repository, tree: &Tree, path: &str) -> Result<Option<Blob>> {
2379    match find_entry_in_tree(repo, tree, path)? {
2380        Some(entry) => match entry.content_hash() {
2381            Some(hash) if entry.is_blob() || entry.is_symlink() => {
2382                Ok(Some(repo.require_blob(&hash)?))
2383            }
2384            _ => Ok(None),
2385        },
2386        None => Ok(None),
2387    }
2388}
2389
2390/// Resolve a path to its `TreeEntry`, descending through subtrees.
2391///
2392/// `Tree::get` binary-searches a single tree's direct children only, so
2393/// a nested path like `src/nested/file.txt` must be walked component by
2394/// component — a root-level `tree.get("src/nested/file.txt")` always
2395/// misses. Returns the entry for a blob or symlink leaf; `None` for a
2396/// missing path or a directory leaf.
2397fn find_entry_in_tree(repo: &Repository, tree: &Tree, path: &str) -> Result<Option<TreeEntry>> {
2398    let parts: Vec<&str> = path.split('/').collect();
2399    find_entry_recursive(repo, tree, &parts)
2400}
2401
2402fn find_entry_recursive(
2403    repo: &Repository,
2404    tree: &Tree,
2405    parts: &[&str],
2406) -> Result<Option<TreeEntry>> {
2407    if parts.is_empty() {
2408        return Ok(None);
2409    }
2410
2411    let name = parts[0];
2412    let entry = match tree.get(name) {
2413        Some(e) => e,
2414        None => return Ok(None),
2415    };
2416
2417    if parts.len() == 1 {
2418        if entry.is_blob() || entry.entry_type() == EntryType::Symlink || entry.is_gitlink() {
2419            return Ok(Some(entry.clone()));
2420        }
2421    } else if entry.is_tree()
2422        && let Some(hash) = entry.tree_hash()
2423        && let Some(subtree) = repo.store().get_tree(&hash)?
2424    {
2425        return find_entry_recursive(repo, &subtree, &parts[1..]);
2426    }
2427
2428    Ok(None)
2429}
2430
2431/// Resolve a worktree path's git file mode for patch headers. A symlink
2432/// reports `120000`; a regular file with any executable bit set reports
2433/// `100755`; everything else `100644`. Read failures fall back to `None`
2434/// (the renderer then emits the regular-file default).
2435fn worktree_file_mode(path: &Path) -> Option<FileMode> {
2436    let metadata = std::fs::symlink_metadata(path).ok()?;
2437    if metadata.file_type().is_symlink() {
2438        return Some(FileMode::Symlink);
2439    }
2440    #[cfg(unix)]
2441    {
2442        use std::os::unix::fs::PermissionsExt;
2443        if metadata.permissions().mode() & 0o111 != 0 {
2444            return Some(FileMode::Executable);
2445        }
2446    }
2447    Some(FileMode::Normal)
2448}
2449
2450/// Resolve the `(old_mode, mode)` pair the patch renderer stamps on a
2451/// change. `mode` is the field the renderer reads for `new file mode`
2452/// (adds) / `deleted file mode` (deletes); `old_mode` pairs with it on a
2453/// `modified` change so a chmod surfaces as `old mode`/`new mode`.
2454///
2455/// * **added** — `(None, new-side mode)`: the `to_tree` entry for a
2456///   state-to-state diff, otherwise the live worktree.
2457/// * **deleted** — `(None, old-side mode)`: the `from_tree` entry's mode
2458///   carried in `mode` for the `deleted file mode` header.
2459/// * **modified** — `(old-side mode, new-side mode)`: `from_tree` entry
2460///   vs. the `to_tree` entry (state diff) or live worktree.
2461/// * anything else — `(None, None)`.
2462fn change_file_modes(
2463    repo: &Repository,
2464    from_tree: Option<&Tree>,
2465    to_tree: Option<&Tree>,
2466    path: &str,
2467    kind: &str,
2468) -> (Option<FileMode>, Option<FileMode>) {
2469    let old_side = || {
2470        from_tree
2471            .and_then(|tree| find_entry_in_tree(repo, tree, path).ok().flatten())
2472            .map(|entry| entry.mode())
2473    };
2474    let new_side = || match to_tree {
2475        Some(tree) => find_entry_in_tree(repo, tree, path)
2476            .ok()
2477            .flatten()
2478            .map(|entry| entry.mode()),
2479        None => worktree_file_mode(&repo.root().join(path)),
2480    };
2481    match kind {
2482        "added" => (None, new_side()),
2483        "deleted" => (None, old_side()),
2484        "modified" => (old_side(), new_side()),
2485        _ => (None, None),
2486    }
2487}
2488
2489#[cfg(test)]
2490mod tests {
2491    use super::{
2492        DiffStats, FileChange, FileEolState, LineCounts, LineDiff, change_line_counts,
2493        summarize_context, unified_hunks,
2494    };
2495
2496    fn stat_change(kind: &str, counts: LineCounts) -> FileChange {
2497        FileChange {
2498            path: "notes.txt".to_string(),
2499            kind: kind.to_string(),
2500            line_counts: Some(counts),
2501            ..Default::default()
2502        }
2503    }
2504
2505    /// The stat-only branch is supposed to count once and then drop
2506    /// the hunk vector. `DiffStats` must read the pre-computed tally
2507    /// off the FileChange so a 10MB diff renders as
2508    /// "1 files changed, 1 additions, 0 modifications" even though
2509    /// `lines` is `None`. Regressing this re-introduces the cheap-
2510    /// branch behaviour that treated the file like name-only.
2511    #[test]
2512    fn diff_stats_reads_line_counts_when_hunks_dropped() {
2513        let changes = vec![stat_change(
2514            "modified",
2515            LineCounts {
2516                added: 1,
2517                modified: 0,
2518                deleted: 0,
2519            },
2520        )];
2521
2522        let stats = DiffStats::from_changes(&changes, None);
2523
2524        assert_eq!(stats.files_changed, 1);
2525        assert_eq!(stats.additions, 1);
2526        assert_eq!(stats.modifications, 0);
2527        assert_eq!(stats.deletions, 0);
2528        assert_eq!(stats.renames, 0);
2529    }
2530
2531    /// The file-level kind fallback must not fire when a stat-path
2532    /// FileChange has an empty `line_counts` payload — empty means
2533    /// "we counted and there were no eligible lines" (the binary or
2534    /// empty-diff case), not "we never counted".
2535    #[test]
2536    fn diff_stats_treats_zero_line_counts_as_authoritative() {
2537        let changes = vec![stat_change(
2538            "modified",
2539            LineCounts {
2540                added: 0,
2541                modified: 0,
2542                deleted: 0,
2543            },
2544        )];
2545
2546        let stats = DiffStats::from_changes(&changes, None);
2547
2548        assert_eq!(stats.modifications, 0);
2549        assert_eq!(stats.additions, 0);
2550        assert_eq!(stats.deletions, 0);
2551    }
2552
2553    /// Sanity-check the underlying counter so the stat closure that
2554    /// feeds `line_counts` produces matching output.
2555    #[test]
2556    fn change_line_counts_pairs_modified_lines() {
2557        let lines = vec![
2558            LineDiff::with_lines("-", "alpha", Some(1), None),
2559            LineDiff::with_lines("+", "alpha-changed", None, Some(1)),
2560            LineDiff::with_lines("+", "fresh", None, Some(2)),
2561        ];
2562        let counts = change_line_counts(Some(&lines));
2563        assert_eq!(counts.modified, 1);
2564        assert_eq!(counts.added, 1);
2565        assert_eq!(counts.deleted, 0);
2566    }
2567
2568    /// The canonical hunk body (the one `--patch`/JSON consume) must keep
2569    /// every real `+` line, including a leading `+#[test]` decoration that
2570    /// duplicates a following context line. Dropping it here desyncs the
2571    /// `@@` header counts and corrupts `git apply` (cid 3320364905) — the
2572    /// trim is now a display-only transform, not a property of the model.
2573    #[test]
2574    fn unified_hunks_keeps_added_decoration_in_canonical_body() {
2575        let lines = vec![
2576            LineDiff::with_lines("+", "#[test]", None, Some(1)),
2577            LineDiff::with_lines("+", "fn added() {}", None, Some(2)),
2578            LineDiff::with_lines(" ", "#[test]", Some(1), Some(3)),
2579            LineDiff::with_lines(" ", "fn existing() {}", Some(2), Some(4)),
2580        ];
2581
2582        let hunk = unified_hunks(lines, 3, &FileEolState::default());
2583
2584        let header = hunk
2585            .iter()
2586            .find(|line| line.prefix == "@")
2587            .expect("hunk should carry an `@@` header");
2588        // Two added (`+`) lines + two context lines on the new side → +4.
2589        assert_eq!(
2590            header.content, "@ -1,2 +1,4 @@",
2591            "header counts must match the untrimmed body: {hunk:?}"
2592        );
2593        assert!(
2594            hunk.iter()
2595                .any(|line| line.prefix == "+" && line.content == "#[test]"),
2596            "added decoration line must survive in the canonical body: {hunk:?}"
2597        );
2598        assert!(
2599            hunk.iter()
2600                .any(|line| line.prefix == "+" && line.content == "fn added() {}"),
2601            "added function body should remain: {hunk:?}"
2602        );
2603    }
2604
2605    /// The display transform DOES trim the leading `+#[test]` so the
2606    /// pretty diff anchors on the existing item — but only the body lines
2607    /// move; the `@@` header (untrimmed counts) is preserved verbatim.
2608    #[test]
2609    fn display_trim_drops_added_decoration_but_keeps_header() {
2610        use super::trim_added_decorations_for_display;
2611
2612        let lines = vec![
2613            LineDiff::with_lines("+", "#[test]", None, Some(1)),
2614            LineDiff::with_lines("+", "fn added() {}", None, Some(2)),
2615            LineDiff::with_lines(" ", "#[test]", Some(1), Some(3)),
2616            LineDiff::with_lines(" ", "fn existing() {}", Some(2), Some(4)),
2617        ];
2618        let hunk = unified_hunks(lines, 3, &FileEolState::default());
2619
2620        let display = trim_added_decorations_for_display(&hunk);
2621
2622        assert!(
2623            display
2624                .iter()
2625                .filter(|line| line.content == "#[test]")
2626                .all(|line| line.prefix == " "),
2627            "display trim should let existing context own the decoration: {display:?}"
2628        );
2629        assert!(
2630            display
2631                .iter()
2632                .any(|line| line.prefix == "+" && line.content == "fn added() {}"),
2633            "added function body should remain after display trim: {display:?}"
2634        );
2635        assert_eq!(
2636            display
2637                .iter()
2638                .find(|line| line.prefix == "@")
2639                .map(|l| l.content.as_str()),
2640            Some("@ -1,2 +1,4 @@"),
2641            "display trim must not rewrite the `@@` header: {display:?}"
2642        );
2643    }
2644
2645    /// Byte-index truncation at 85 panicked when a multi-byte code point straddled
2646    /// that offset (HEDDLE-DR-7 / #879). Summaries must truncate on char boundaries.
2647    #[test]
2648    fn summarize_context_truncates_on_char_boundary_not_byte_index() {
2649        let first_line = format!("{}中中", "a".repeat(83));
2650        assert!(first_line.len() > 88);
2651        assert!(!first_line.is_char_boundary(85));
2652
2653        let summary = summarize_context(&format!("{first_line}\nsecond line"));
2654        assert_eq!(summary, first_line);
2655    }
2656
2657    #[test]
2658    fn summarize_context_char_cap_truncates_multibyte_line() {
2659        let first_line = format!("{}中中中", "a".repeat(86));
2660        assert!(first_line.chars().count() > 88);
2661
2662        let summary = summarize_context(&first_line);
2663        let expected = format!("{}...", "a".repeat(85));
2664        assert_eq!(summary, expected);
2665    }
2666
2667    #[test]
2668    fn summarize_context_ascii_truncation_unchanged() {
2669        let line = "b".repeat(90);
2670        let summary = summarize_context(&line);
2671        assert_eq!(summary, format!("{}...", "b".repeat(85)));
2672    }
2673
2674    /// Characterization: core::diff maps minimal-policy not-found failures to
2675    /// [`RecoveryDetails::state_not_found`], not plain strings.
2676    #[test]
2677    fn minimal_resolve_failure_maps_to_recovery_state_not_found() {
2678        use objects::{RecoveryDetails, error::HeddleError};
2679        use repo::{
2680            ResolvePolicy, StateResolveError, StateResolveFailure, resolve_state_for_command,
2681        };
2682        use tempfile::TempDir;
2683
2684        let temp = TempDir::new().unwrap();
2685        let repo = repo::Repository::init_default(temp.path()).unwrap();
2686        std::fs::write(temp.path().join("a.txt"), "a").unwrap();
2687        repo.snapshot(Some("seed".into()), None).unwrap();
2688
2689        let err =
2690            resolve_state_for_command(&repo, "hd-zzzzzzzzzzzz", ResolvePolicy::minimal())
2691                .unwrap_err();
2692        let mapped = match err {
2693            StateResolveError::Failure(StateResolveFailure::NotFound { spec }) => {
2694                HeddleError::recovery(RecoveryDetails::state_not_found(spec))
2695            }
2696            other => panic!("expected not-found failure, got {other:?}"),
2697        };
2698        assert!(matches!(mapped, HeddleError::Recovery(_)));
2699        assert!(
2700            mapped.to_string().contains("State not found"),
2701            "unexpected message: {mapped}"
2702        );
2703    }
2704}