Skip to main content

grit_lib/
diff.rs

1//! Diff machinery — compare trees, index entries, and working tree files.
2//!
3//! # Overview
4//!
5//! This module provides the core diffing infrastructure shared by `diff`,
6//! `diff-index`, `status`, `log`, `show`, `commit`, and `merge`.
7//!
8//! ## Levels of comparison
9//!
10//! 1. **Tree-to-tree** — compare two tree objects (e.g. for `log`/`show`).
11//! 2. **Tree-to-index** — compare a tree (usually HEAD) against the index
12//!    (staged changes, used by `diff --cached` and `status`).
13//! 3. **Index-to-worktree** — compare index against the working directory
14//!    (unstaged changes, used by `diff` and `status`).
15//!
16//! ## Content diff
17//!
18//! Line-level diffing uses the Myers algorithm via the `similar` crate.
19//! Output formats: unified patch, raw (`:old-mode new-mode ...`), stat,
20//! numstat.
21
22use std::fs;
23use std::os::unix::fs::MetadataExt;
24use std::path::{Path, PathBuf};
25
26use crate::error::{Error, Result};
27use crate::index::{Index, IndexEntry};
28use crate::objects::{parse_tree, ObjectId, ObjectKind, TreeEntry};
29use crate::odb::Odb;
30use crate::userdiff::FuncnameMatcher;
31
32/// The kind of change between two sides of a diff.
33#[derive(Debug, Clone, Copy, PartialEq, Eq)]
34pub enum DiffStatus {
35    /// File was added.
36    Added,
37    /// File was deleted.
38    Deleted,
39    /// File was modified (content or mode change).
40    Modified,
41    /// File was renamed (with optional content change).
42    Renamed,
43    /// File was copied.
44    Copied,
45    /// File type changed (e.g. regular → symlink).
46    TypeChanged,
47    /// Unmerged (conflict).
48    Unmerged,
49}
50
51impl DiffStatus {
52    /// Single-character status letter used in raw diff output.
53    #[must_use]
54    pub fn letter(&self) -> char {
55        match self {
56            Self::Added => 'A',
57            Self::Deleted => 'D',
58            Self::Modified => 'M',
59            Self::Renamed => 'R',
60            Self::Copied => 'C',
61            Self::TypeChanged => 'T',
62            Self::Unmerged => 'U',
63        }
64    }
65}
66
67/// A single diff entry representing one changed path.
68#[derive(Debug, Clone)]
69pub struct DiffEntry {
70    /// The status of this change.
71    pub status: DiffStatus,
72    /// Path in the "old" side (None for Added).
73    pub old_path: Option<String>,
74    /// Path in the "new" side (None for Deleted).
75    pub new_path: Option<String>,
76    /// Old file mode (as octal string, e.g. "100644").
77    pub old_mode: String,
78    /// New file mode.
79    pub new_mode: String,
80    /// Old object ID (zero OID for Added).
81    pub old_oid: ObjectId,
82    /// New object ID (zero OID for Deleted).
83    pub new_oid: ObjectId,
84    /// Similarity score (0–100) for renames/copies.
85    pub score: Option<u32>,
86}
87
88impl DiffEntry {
89    /// The primary path for display (new_path for adds, old_path for deletes).
90    #[must_use]
91    pub fn path(&self) -> &str {
92        self.new_path
93            .as_deref()
94            .or(self.old_path.as_deref())
95            .unwrap_or("")
96    }
97
98    /// Return a human-oriented path display for this entry.
99    ///
100    /// For renames and copies this returns `old -> new`; for all other entry
101    /// kinds this returns the primary path.
102    #[must_use]
103    pub fn display_path(&self) -> String {
104        match self.status {
105            DiffStatus::Renamed | DiffStatus::Copied => {
106                let old = self.old_path.as_deref().unwrap_or("");
107                let new = self.new_path.as_deref().unwrap_or("");
108                if old.is_empty() || new.is_empty() {
109                    self.path().to_owned()
110                } else {
111                    format!("{old} -> {new}")
112                }
113            }
114            _ => self.path().to_owned(),
115        }
116    }
117}
118
119/// The zero (null) object ID used for "no object" in diff output.
120pub const ZERO_OID: &str = "0000000000000000000000000000000000000000";
121
122/// Return the zero ObjectId.
123#[must_use]
124pub fn zero_oid() -> ObjectId {
125    ObjectId::from_bytes(&[0u8; 20]).unwrap_or_else(|_| {
126        // This should never fail since we pass exactly 20 bytes
127        panic!("internal error: failed to create zero OID");
128    })
129}
130
131/// Return the ObjectId for the empty blob object.
132#[must_use]
133pub fn empty_blob_oid() -> ObjectId {
134    ObjectId::from_hex("e69de29bb2d1d6434b8b29ae775ad8c2e48c5391").unwrap_or_else(|_| {
135        // This should never fail since the object ID literal is valid.
136        panic!("internal error: failed to create empty blob OID");
137    })
138}
139
140// ── Tree-to-tree diff ───────────────────────────────────────────────
141
142/// Compare two trees and return the list of changed entries.
143///
144/// # Parameters
145///
146/// - `odb` — object database to read tree objects from.
147/// - `old_tree_oid` — OID of the old tree (or `None` for comparison against empty).
148/// - `new_tree_oid` — OID of the new tree (or `None` for comparison against empty).
149/// - `prefix` — path prefix for nested tree recursion (empty string for root).
150///
151/// # Errors
152///
153/// Returns errors from object database reads.
154pub fn diff_trees(
155    odb: &Odb,
156    old_tree_oid: Option<&ObjectId>,
157    new_tree_oid: Option<&ObjectId>,
158    prefix: &str,
159) -> Result<Vec<DiffEntry>> {
160    diff_trees_opts(odb, old_tree_oid, new_tree_oid, prefix, false)
161}
162
163/// Like `diff_trees` but with `show_trees` flag: when true, emit entries for
164/// tree objects themselves in addition to their recursive contents (the `-t`
165/// flag of `diff-tree`).
166pub fn diff_trees_show_tree_entries(
167    odb: &Odb,
168    old_tree_oid: Option<&ObjectId>,
169    new_tree_oid: Option<&ObjectId>,
170    prefix: &str,
171) -> Result<Vec<DiffEntry>> {
172    diff_trees_opts(odb, old_tree_oid, new_tree_oid, prefix, true)
173}
174
175fn diff_trees_opts(
176    odb: &Odb,
177    old_tree_oid: Option<&ObjectId>,
178    new_tree_oid: Option<&ObjectId>,
179    prefix: &str,
180    show_trees: bool,
181) -> Result<Vec<DiffEntry>> {
182    let old_entries = match old_tree_oid {
183        Some(oid) => read_tree(odb, oid)?,
184        None => Vec::new(),
185    };
186    let new_entries = match new_tree_oid {
187        Some(oid) => read_tree(odb, oid)?,
188        None => Vec::new(),
189    };
190
191    let mut result = Vec::new();
192    diff_tree_entries_opts(
193        odb,
194        &old_entries,
195        &new_entries,
196        prefix,
197        show_trees,
198        &mut result,
199    )?;
200    Ok(result)
201}
202
203/// Read and parse a tree object from the ODB.
204fn read_tree(odb: &Odb, oid: &ObjectId) -> Result<Vec<TreeEntry>> {
205    let obj = odb.read(oid)?;
206    if obj.kind != ObjectKind::Tree {
207        return Err(Error::CorruptObject(format!(
208            "expected tree, got {}",
209            obj.kind.as_str()
210        )));
211    }
212    parse_tree(&obj.data)
213}
214
215/// Compare two sorted lists of tree entries, recursing into subtrees.
216fn diff_tree_entries_opts(
217    odb: &Odb,
218    old: &[TreeEntry],
219    new: &[TreeEntry],
220    prefix: &str,
221    show_trees: bool,
222    result: &mut Vec<DiffEntry>,
223) -> Result<()> {
224    let mut oi = 0;
225    let mut ni = 0;
226
227    while oi < old.len() || ni < new.len() {
228        match (old.get(oi), new.get(ni)) {
229            (Some(o), Some(n)) => {
230                let cmp = crate::objects::tree_entry_cmp(
231                    &o.name,
232                    is_tree_mode(o.mode),
233                    &n.name,
234                    is_tree_mode(n.mode),
235                );
236                match cmp {
237                    std::cmp::Ordering::Less => {
238                        // Old entry not in new → deleted
239                        emit_deleted_opts(odb, o, prefix, show_trees, result)?;
240                        oi += 1;
241                    }
242                    std::cmp::Ordering::Greater => {
243                        // New entry not in old → added
244                        emit_added_opts(odb, n, prefix, show_trees, result)?;
245                        ni += 1;
246                    }
247                    std::cmp::Ordering::Equal => {
248                        // Both present — check for changes
249                        if o.oid != n.oid || o.mode != n.mode {
250                            let name_str = String::from_utf8_lossy(&o.name);
251                            let path = format_path(prefix, &name_str);
252                            if is_tree_mode(o.mode) && is_tree_mode(n.mode) {
253                                // Both are trees
254                                if show_trees {
255                                    result.push(DiffEntry {
256                                        status: DiffStatus::Modified,
257                                        old_path: Some(path.clone()),
258                                        new_path: Some(path.clone()),
259                                        old_mode: format_mode(o.mode),
260                                        new_mode: format_mode(n.mode),
261                                        old_oid: o.oid,
262                                        new_oid: n.oid,
263                                        score: None,
264                                    });
265                                }
266                                // Recurse
267                                let nested = diff_trees_opts(
268                                    odb,
269                                    Some(&o.oid),
270                                    Some(&n.oid),
271                                    &path,
272                                    show_trees,
273                                )?;
274                                result.extend(nested);
275                            } else if is_tree_mode(o.mode) && !is_tree_mode(n.mode) {
276                                // Tree → blob: delete tree contents, add blob
277                                emit_deleted_opts(odb, o, prefix, show_trees, result)?;
278                                emit_added_opts(odb, n, prefix, show_trees, result)?;
279                            } else if !is_tree_mode(o.mode) && is_tree_mode(n.mode) {
280                                // Blob → tree: delete blob, add tree contents
281                                emit_deleted_opts(odb, o, prefix, show_trees, result)?;
282                                emit_added_opts(odb, n, prefix, show_trees, result)?;
283                            } else {
284                                // Both blobs — modified.
285                                // A mode-only change (e.g. chmod) is Modified.
286                                // TypeChanged is only for actual type changes (blob ↔ symlink).
287                                let old_type = o.mode & 0o170000;
288                                let new_type = n.mode & 0o170000;
289                                result.push(DiffEntry {
290                                    status: if old_type != new_type {
291                                        DiffStatus::TypeChanged
292                                    } else {
293                                        DiffStatus::Modified
294                                    },
295                                    old_path: Some(path.clone()),
296                                    new_path: Some(path),
297                                    old_mode: format_mode(o.mode),
298                                    new_mode: format_mode(n.mode),
299                                    old_oid: o.oid,
300                                    new_oid: n.oid,
301                                    score: None,
302                                });
303                            }
304                        }
305                        oi += 1;
306                        ni += 1;
307                    }
308                }
309            }
310            (Some(o), None) => {
311                emit_deleted_opts(odb, o, prefix, show_trees, result)?;
312                oi += 1;
313            }
314            (None, Some(n)) => {
315                emit_added_opts(odb, n, prefix, show_trees, result)?;
316                ni += 1;
317            }
318            (None, None) => break,
319        }
320    }
321
322    Ok(())
323}
324
325fn emit_deleted_opts(
326    odb: &Odb,
327    entry: &TreeEntry,
328    prefix: &str,
329    show_trees: bool,
330    result: &mut Vec<DiffEntry>,
331) -> Result<()> {
332    let name_str = String::from_utf8_lossy(&entry.name);
333    let path = format_path(prefix, &name_str);
334    if is_tree_mode(entry.mode) {
335        if show_trees {
336            result.push(DiffEntry {
337                status: DiffStatus::Deleted,
338                old_path: Some(path.clone()),
339                new_path: None,
340                old_mode: format_mode(entry.mode),
341                new_mode: "000000".to_owned(),
342                old_oid: entry.oid,
343                new_oid: zero_oid(),
344                score: None,
345            });
346        }
347        // Recurse into deleted tree
348        let nested = diff_trees_opts(odb, Some(&entry.oid), None, &path, show_trees)?;
349        result.extend(nested);
350    } else {
351        result.push(DiffEntry {
352            status: DiffStatus::Deleted,
353            old_path: Some(path.clone()),
354            new_path: None,
355            old_mode: format_mode(entry.mode),
356            new_mode: "000000".to_owned(),
357            old_oid: entry.oid,
358            new_oid: zero_oid(),
359            score: None,
360        });
361    }
362    Ok(())
363}
364
365fn emit_added_opts(
366    odb: &Odb,
367    entry: &TreeEntry,
368    prefix: &str,
369    show_trees: bool,
370    result: &mut Vec<DiffEntry>,
371) -> Result<()> {
372    let name_str = String::from_utf8_lossy(&entry.name);
373    let path = format_path(prefix, &name_str);
374    if is_tree_mode(entry.mode) {
375        if show_trees {
376            result.push(DiffEntry {
377                status: DiffStatus::Added,
378                old_path: None,
379                new_path: Some(path.clone()),
380                old_mode: "000000".to_owned(),
381                new_mode: format_mode(entry.mode),
382                old_oid: zero_oid(),
383                new_oid: entry.oid,
384                score: None,
385            });
386        }
387        // Recurse into added tree
388        let nested = diff_trees_opts(odb, None, Some(&entry.oid), &path, show_trees)?;
389        result.extend(nested);
390    } else {
391        result.push(DiffEntry {
392            status: DiffStatus::Added,
393            old_path: None,
394            new_path: Some(path),
395            old_mode: "000000".to_owned(),
396            new_mode: format_mode(entry.mode),
397            old_oid: zero_oid(),
398            new_oid: entry.oid,
399            score: None,
400        });
401    }
402    Ok(())
403}
404
405// ── Index-to-tree diff (staged changes) ─────────────────────────────
406
407/// Compare the index against a tree (usually HEAD's tree).
408///
409/// This shows "staged" changes — what would be committed.
410///
411/// # Parameters
412///
413/// - `odb` — object database.
414/// - `index` — the current index.
415/// - `tree_oid` — the tree to compare against (e.g. HEAD's tree), or `None`
416///   for comparison against an empty tree (initial commit).
417///
418/// # Errors
419///
420/// Returns errors from ODB reads.
421pub fn diff_index_to_tree(
422    odb: &Odb,
423    index: &Index,
424    tree_oid: Option<&ObjectId>,
425) -> Result<Vec<DiffEntry>> {
426    // Flatten the tree into a sorted list of (path, mode, oid)
427    let tree_entries = match tree_oid {
428        Some(oid) => flatten_tree(odb, oid, "")?,
429        None => Vec::new(),
430    };
431
432    // Build maps keyed by path
433    let mut tree_map: std::collections::BTreeMap<&str, &FlatEntry> =
434        std::collections::BTreeMap::new();
435    for entry in &tree_entries {
436        tree_map.insert(&entry.path, entry);
437    }
438
439    let mut result = Vec::new();
440    let mut stage0_paths = std::collections::BTreeSet::new();
441    let mut unmerged_modes: std::collections::BTreeMap<String, (u8, u32)> =
442        std::collections::BTreeMap::new();
443
444    // Check index entries against tree
445    for ie in &index.entries {
446        let path = String::from_utf8_lossy(&ie.path).to_string();
447        if ie.stage() != 0 {
448            let rank = match ie.stage() {
449                2 => 0u8,
450                3 => 1u8,
451                1 => 2u8,
452                _ => 3u8,
453            };
454            match unmerged_modes.get(&path) {
455                Some((existing_rank, _)) if *existing_rank <= rank => {}
456                _ => {
457                    unmerged_modes.insert(path, (rank, ie.mode));
458                }
459            }
460            continue;
461        }
462        stage0_paths.insert(path.clone());
463        match tree_map.remove(path.as_str()) {
464            Some(te) => {
465                // Present in both — check for differences
466                if te.oid != ie.oid || te.mode != ie.mode {
467                    result.push(DiffEntry {
468                        status: DiffStatus::Modified,
469                        old_path: Some(path.clone()),
470                        new_path: Some(path),
471                        old_mode: format_mode(te.mode),
472                        new_mode: format_mode(ie.mode),
473                        old_oid: te.oid,
474                        new_oid: ie.oid,
475                        score: None,
476                    });
477                }
478            }
479            None => {
480                // In index but not tree → added
481                result.push(DiffEntry {
482                    status: DiffStatus::Added,
483                    old_path: None,
484                    new_path: Some(path),
485                    old_mode: "000000".to_owned(),
486                    new_mode: format_mode(ie.mode),
487                    old_oid: zero_oid(),
488                    new_oid: ie.oid,
489                    score: None,
490                });
491            }
492        }
493    }
494
495    for (path, (_, mode)) in &unmerged_modes {
496        if stage0_paths.contains(path) {
497            continue;
498        }
499        tree_map.remove(path.as_str());
500        result.push(DiffEntry {
501            status: DiffStatus::Unmerged,
502            old_path: Some(path.clone()),
503            new_path: Some(path.clone()),
504            old_mode: "000000".to_owned(),
505            new_mode: format_mode(*mode),
506            old_oid: zero_oid(),
507            new_oid: zero_oid(),
508            score: None,
509        });
510    }
511
512    // Remaining tree entries not in index → deleted
513    for (path, te) in tree_map {
514        result.push(DiffEntry {
515            status: DiffStatus::Deleted,
516            old_path: Some(path.to_owned()),
517            new_path: None,
518            old_mode: format_mode(te.mode),
519            new_mode: "000000".to_owned(),
520            old_oid: te.oid,
521            new_oid: zero_oid(),
522            score: None,
523        });
524    }
525
526    result.sort_by(|a, b| a.path().cmp(b.path()));
527    Ok(result)
528}
529
530// ── Index-to-worktree diff (unstaged changes) ───────────────────────
531
532/// Compare the index against the working tree.
533///
534/// This shows "unstaged" changes — modifications not yet staged.
535///
536/// # Parameters
537///
538/// - `odb` — object database (for hashing worktree files).
539/// - `index` — the current index.
540/// - `work_tree` — path to the working tree root.
541///
542/// # Errors
543///
544/// Returns errors from I/O or hashing.
545pub fn diff_index_to_worktree(
546    odb: &Odb,
547    index: &Index,
548    work_tree: &Path,
549) -> Result<Vec<DiffEntry>> {
550    use crate::config::ConfigSet;
551    use crate::crlf;
552
553    let git_dir = work_tree.join(".git");
554    let config = ConfigSet::load(Some(&git_dir), true).unwrap_or_else(|_| ConfigSet::new());
555    let conv = crlf::ConversionConfig::from_config(&config);
556    let attrs = crlf::load_gitattributes(work_tree);
557
558    let mut result = Vec::new();
559    let mut unmerged_base: std::collections::BTreeMap<String, (u8, &IndexEntry)> =
560        std::collections::BTreeMap::new();
561
562    for ie in &index.entries {
563        if ie.stage() != 0 {
564            let path = String::from_utf8_lossy(&ie.path).to_string();
565            let rank = match ie.stage() {
566                2 => 0u8,
567                3 => 1u8,
568                1 => 2u8,
569                _ => 3u8,
570            };
571            match unmerged_base.get(&path) {
572                Some((existing_rank, _)) if *existing_rank <= rank => {}
573                _ => {
574                    unmerged_base.insert(path, (rank, ie));
575                }
576            }
577            continue;
578        }
579        // Use str slice directly to avoid allocation for path joining;
580        // only allocate String if we need it for DiffEntry output.
581        let path_str_ref = std::str::from_utf8(&ie.path).unwrap_or("");
582        let is_intent_to_add = ie.intent_to_add();
583
584        // Gitlink entries (submodules) are directories — compare HEAD commit.
585        if ie.mode == 0o160000 {
586            let sub_dir = work_tree.join(path_str_ref);
587            let sub_head_oid = read_submodule_head(&sub_dir);
588            if sub_head_oid.as_ref() != Some(&ie.oid) {
589                let path_owned = path_str_ref.to_owned();
590                let new_oid = sub_head_oid.unwrap_or_else(zero_oid);
591                result.push(DiffEntry {
592                    status: DiffStatus::Modified,
593                    old_path: Some(path_owned.clone()),
594                    new_path: Some(path_owned),
595                    old_mode: format_mode(ie.mode),
596                    new_mode: format_mode(ie.mode),
597                    old_oid: ie.oid,
598                    new_oid,
599                    score: None,
600                });
601            }
602            continue;
603        }
604
605        let file_path = work_tree.join(path_str_ref);
606
607        if is_intent_to_add {
608            match fs::symlink_metadata(&file_path) {
609                Ok(meta) => {
610                    let file_attrs = crlf::get_file_attrs(&attrs, path_str_ref, &config);
611                    let worktree_oid = hash_worktree_file(
612                        odb,
613                        &file_path,
614                        &meta,
615                        &conv,
616                        &file_attrs,
617                        path_str_ref,
618                    )?;
619                    let worktree_mode = mode_from_metadata(&meta);
620                    result.push(DiffEntry {
621                        status: DiffStatus::Added,
622                        old_path: None,
623                        new_path: Some(path_str_ref.to_owned()),
624                        old_mode: "000000".to_owned(),
625                        new_mode: format_mode(worktree_mode),
626                        old_oid: zero_oid(),
627                        new_oid: worktree_oid,
628                        score: None,
629                    });
630                }
631                Err(e)
632                    if e.kind() == std::io::ErrorKind::NotFound
633                        || e.raw_os_error() == Some(20) /* ENOTDIR */ =>
634                {
635                    result.push(DiffEntry {
636                        status: DiffStatus::Deleted,
637                        old_path: Some(path_str_ref.to_owned()),
638                        new_path: None,
639                        old_mode: format_mode(ie.mode),
640                        new_mode: "000000".to_owned(),
641                        old_oid: empty_blob_oid(),
642                        new_oid: zero_oid(),
643                        score: None,
644                    });
645                }
646                Err(e) => return Err(Error::Io(e)),
647            }
648            continue;
649        }
650
651        // If any parent component of the path is a symlink, the file is effectively
652        // deleted from the working tree (a symlink replaced a directory).
653        if has_symlink_in_path(work_tree, path_str_ref) {
654            result.push(DiffEntry {
655                status: DiffStatus::Deleted,
656                old_path: Some(path_str_ref.to_owned()),
657                new_path: None,
658                old_mode: format_mode(ie.mode),
659                new_mode: "000000".to_owned(),
660                old_oid: ie.oid,
661                new_oid: zero_oid(),
662                score: None,
663            });
664            continue;
665        }
666
667        match fs::symlink_metadata(&file_path) {
668            Ok(meta) if meta.is_dir() => {
669                // A directory exists where the index expects a file.
670                // Treat as a type change: the indexed file is effectively deleted.
671                result.push(DiffEntry {
672                    status: DiffStatus::Deleted,
673                    old_path: Some(path_str_ref.to_owned()),
674                    new_path: None,
675                    old_mode: format_mode(ie.mode),
676                    new_mode: String::new(),
677                    old_oid: ie.oid,
678                    new_oid: zero_oid(),
679                    score: None,
680                });
681            }
682            Ok(meta) => {
683                // Check if the file has changed using stat data first
684                if stat_matches(ie, &meta) {
685                    // Stat data matches content-wise, but also check mode.
686                    // The index mode might have been changed via --chmod=+x.
687                    let worktree_mode = mode_from_metadata(&meta);
688                    if worktree_mode == ie.mode {
689                        continue; // Fast path: stat+mode match, assume unchanged
690                    }
691                    // Mode differs — emit a mode-only change entry.
692                    let path_owned = path_str_ref.to_owned();
693                    result.push(DiffEntry {
694                        status: DiffStatus::Modified,
695                        old_path: Some(path_owned.clone()),
696                        new_path: Some(path_owned),
697                        old_mode: format_mode(ie.mode),
698                        new_mode: format_mode(worktree_mode),
699                        old_oid: ie.oid,
700                        new_oid: ie.oid,
701                        score: None,
702                    });
703                    continue;
704                }
705
706                // Stat differs — hash the file to check actual content
707                let file_attrs = crlf::get_file_attrs(&attrs, path_str_ref, &config);
708                let worktree_oid = hash_worktree_file(odb, &file_path, &meta, &conv, &file_attrs, path_str_ref)?;
709                let worktree_mode = mode_from_metadata(&meta);
710
711                // If clean conversion disagrees with the index but raw bytes match the
712                // blob (e.g. mixed line endings committed with autocrlf off), Git reports
713                // no diff (t0020: touch + git diff --exit-code).
714                let mut eff_oid = worktree_oid;
715                if eff_oid != ie.oid {
716                    if let Ok(raw) = fs::read(&file_path) {
717                        let raw_oid = Odb::hash_object_data(ObjectKind::Blob, &raw);
718                        if raw_oid == ie.oid {
719                            eff_oid = ie.oid;
720                        }
721                    }
722                }
723
724                if eff_oid != ie.oid || worktree_mode != ie.mode {
725                    let path_owned = path_str_ref.to_owned();
726                    result.push(DiffEntry {
727                        status: DiffStatus::Modified,
728                        old_path: Some(path_owned.clone()),
729                        new_path: Some(path_owned),
730                        old_mode: format_mode(ie.mode),
731                        new_mode: format_mode(worktree_mode),
732                        old_oid: ie.oid,
733                        new_oid: eff_oid,
734                    score: None,
735                    });
736                }
737            }
738            Err(e) if e.kind() == std::io::ErrorKind::NotFound
739                || e.raw_os_error() == Some(20) /* ENOTDIR */ => {
740                // File deleted from working tree (or parent replaced by a file)
741                result.push(DiffEntry {
742                    status: DiffStatus::Deleted,
743                    old_path: Some(path_str_ref.to_owned()),
744                    new_path: None,
745                    old_mode: format_mode(ie.mode),
746                    new_mode: "000000".to_owned(),
747                    old_oid: ie.oid,
748                    new_oid: zero_oid(),
749                    score: None,
750                });
751            }
752            Err(e) => return Err(Error::Io(e)),
753        }
754    }
755
756    for (path, (_, base_entry)) in unmerged_base {
757        let file_path = work_tree.join(&path);
758        let wt_meta = match fs::symlink_metadata(&file_path) {
759            Ok(meta) => Some(meta),
760            Err(e)
761                if e.kind() == std::io::ErrorKind::NotFound
762                    || e.raw_os_error() == Some(20) /* ENOTDIR */ =>
763            {
764                None
765            }
766            Err(e) => return Err(Error::Io(e)),
767        };
768
769        let new_mode = wt_meta.as_ref().map_or_else(
770            || "000000".to_owned(),
771            |meta| format_mode(mode_from_metadata(meta)),
772        );
773        result.push(DiffEntry {
774            status: DiffStatus::Unmerged,
775            old_path: Some(path.clone()),
776            new_path: Some(path.clone()),
777            old_mode: "000000".to_owned(),
778            new_mode,
779            old_oid: zero_oid(),
780            new_oid: zero_oid(),
781            score: None,
782        });
783
784        if let Some(meta) = wt_meta {
785            let file_attrs = crlf::get_file_attrs(&attrs, &path, &config);
786            let wt_oid = hash_worktree_file(odb, &file_path, &meta, &conv, &file_attrs, &path)?;
787            let wt_mode = mode_from_metadata(&meta);
788            if wt_oid != base_entry.oid || wt_mode != base_entry.mode {
789                result.push(DiffEntry {
790                    status: DiffStatus::Modified,
791                    old_path: Some(path.clone()),
792                    new_path: Some(path),
793                    old_mode: format_mode(base_entry.mode),
794                    new_mode: format_mode(wt_mode),
795                    old_oid: base_entry.oid,
796                    new_oid: wt_oid,
797                    score: None,
798                });
799            }
800        }
801    }
802
803    Ok(result)
804}
805
806/// Quick stat check: does the index entry's cached stat data match the file?
807pub fn stat_matches(ie: &IndexEntry, meta: &fs::Metadata) -> bool {
808    // Compare size
809    if meta.len() as u32 != ie.size {
810        return false;
811    }
812    // Compare mtime (seconds + nanoseconds)
813    if meta.mtime() as u32 != ie.mtime_sec {
814        return false;
815    }
816    if meta.mtime_nsec() as u32 != ie.mtime_nsec {
817        return false;
818    }
819    // Compare ctime (seconds + nanoseconds)
820    if meta.ctime() as u32 != ie.ctime_sec {
821        return false;
822    }
823    if meta.ctime_nsec() as u32 != ie.ctime_nsec {
824        return false;
825    }
826    // Compare inode and device
827    if meta.ino() as u32 != ie.ino {
828        return false;
829    }
830    if meta.dev() as u32 != ie.dev {
831        return false;
832    }
833    true
834}
835
836/// Hash a working tree file as a blob to get its OID.
837/// Check if any parent component of `rel_path` (relative to `work_tree`) is a symlink.
838fn has_symlink_in_path(work_tree: &Path, rel_path: &str) -> bool {
839    let mut check = work_tree.to_path_buf();
840    let components: Vec<&str> = rel_path.split('/').collect();
841    // Check all components except the last one (which is the file itself)
842    for component in &components[..components.len().saturating_sub(1)] {
843        check.push(component);
844        match fs::symlink_metadata(&check) {
845            Ok(meta) if meta.file_type().is_symlink() => return true,
846            _ => {}
847        }
848    }
849    false
850}
851
852fn hash_worktree_file(
853    _odb: &Odb,
854    path: &Path,
855    meta: &fs::Metadata,
856    conv: &crate::crlf::ConversionConfig,
857    file_attrs: &crate::crlf::FileAttrs,
858    rel_path: &str,
859) -> Result<ObjectId> {
860    let data = if meta.file_type().is_symlink() {
861        // For symlinks, hash the target path
862        let target = fs::read_link(path)?;
863        target.to_string_lossy().into_owned().into_bytes()
864    } else {
865        let raw = fs::read(path)?;
866        // Apply clean conversion (CRLF→LF) so hash matches index blob
867        crate::crlf::convert_to_git(&raw, rel_path, conv, file_attrs).unwrap_or(raw)
868    };
869
870    Ok(Odb::hash_object_data(ObjectKind::Blob, &data))
871}
872
873/// Derive a Git file mode from filesystem metadata.
874fn mode_from_metadata(meta: &fs::Metadata) -> u32 {
875    if meta.file_type().is_symlink() {
876        0o120000
877    } else if meta.mode() & 0o111 != 0 {
878        0o100755
879    } else {
880        0o100644
881    }
882}
883
884/// Compare a tree against the working tree.
885///
886/// Shows changes from `tree_oid` to the current working directory state.
887/// Files tracked in the index but not in the tree are shown as Added.
888/// Files in the tree but missing from the working tree are shown as Deleted.
889///
890/// # Parameters
891///
892/// - `odb` — object database.
893/// - `tree_oid` — the tree to compare against (`None` for empty tree).
894/// - `work_tree` — path to the working tree root.
895/// - `index` — current index (used to discover new tracked files not in tree).
896///
897/// # Errors
898///
899/// Returns errors from ODB reads or I/O.
900pub fn diff_tree_to_worktree(
901    odb: &Odb,
902    tree_oid: Option<&ObjectId>,
903    work_tree: &Path,
904    index: &Index,
905) -> Result<Vec<DiffEntry>> {
906    use crate::config::ConfigSet;
907    use crate::crlf;
908
909    let git_dir = work_tree.join(".git");
910    let config = ConfigSet::load(Some(&git_dir), true).unwrap_or_else(|_| ConfigSet::new());
911    let conv = crlf::ConversionConfig::from_config(&config);
912    let attrs = crlf::load_gitattributes(work_tree);
913
914    // Flatten the tree into a BTreeMap keyed by path
915    let tree_flat = match tree_oid {
916        Some(oid) => flatten_tree(odb, oid, "")?,
917        None => Vec::new(),
918    };
919    let tree_map: std::collections::BTreeMap<String, &FlatEntry> =
920        tree_flat.iter().map(|e| (e.path.clone(), e)).collect();
921
922    // Build index lookup: path → &IndexEntry (stage 0 only)
923    let mut index_entries: std::collections::BTreeMap<&[u8], &IndexEntry> =
924        std::collections::BTreeMap::new();
925    let mut index_paths: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
926    for ie in &index.entries {
927        if ie.stage() != 0 {
928            continue;
929        }
930        let path = String::from_utf8_lossy(&ie.path).to_string();
931        index_entries.insert(&ie.path, ie);
932        index_paths.insert(path);
933    }
934
935    // Union of tree paths + index paths
936    let mut all_paths: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
937    all_paths.extend(tree_map.keys().cloned());
938    all_paths.extend(index_paths.iter().cloned());
939
940    let mut result = Vec::new();
941
942    for path in &all_paths {
943        let tree_entry = tree_map.get(path.as_str());
944
945        // Gitlink entries (submodules) — compare HEAD commit, not file content.
946        let is_gitlink = tree_entry.is_some_and(|te| te.mode == 0o160000)
947            || index_entries
948                .get(path.as_bytes())
949                .is_some_and(|ie| ie.mode == 0o160000);
950        if is_gitlink {
951            if let Some(te) = tree_entry {
952                let sub_dir = work_tree.join(path);
953                let sub_head = read_submodule_head(&sub_dir);
954                if sub_head.as_ref() != Some(&te.oid) {
955                    let new_oid = sub_head.unwrap_or_else(zero_oid);
956                    result.push(DiffEntry {
957                        status: DiffStatus::Modified,
958                        old_path: Some(path.clone()),
959                        new_path: Some(path.clone()),
960                        old_mode: format_mode(te.mode),
961                        new_mode: format_mode(te.mode),
962                        old_oid: te.oid,
963                        new_oid,
964                        score: None,
965                    });
966                }
967            }
968            continue;
969        }
970
971        let file_path = work_tree.join(path);
972
973        let wt_meta = match fs::symlink_metadata(&file_path) {
974            Ok(m) => Some(m),
975            Err(e) if e.kind() == std::io::ErrorKind::NotFound => None,
976            Err(e) => return Err(Error::Io(e)),
977        };
978
979        match (tree_entry, wt_meta) {
980            (Some(te), Some(ref meta)) => {
981                // Fast path: if the index entry matches the tree entry AND
982                // stat cache matches, the file is unchanged — skip hashing.
983                if let Some(ie) = index_entries.get(path.as_bytes()) {
984                    if ie.oid == te.oid && ie.mode == te.mode && stat_matches(ie, meta) {
985                        continue;
986                    }
987                }
988
989                // Stat or content differs — hash the file
990                let file_attrs = crlf::get_file_attrs(&attrs, path, &config);
991                let wt_oid = hash_worktree_file(odb, &file_path, meta, &conv, &file_attrs, path)?;
992                let wt_mode = mode_from_metadata(meta);
993                if wt_oid != te.oid || wt_mode != te.mode {
994                    result.push(DiffEntry {
995                        status: DiffStatus::Modified,
996                        old_path: Some(path.clone()),
997                        new_path: Some(path.clone()),
998                        old_mode: format_mode(te.mode),
999                        new_mode: format_mode(wt_mode),
1000                        old_oid: te.oid,
1001                        new_oid: wt_oid,
1002                        score: None,
1003                    });
1004                }
1005            }
1006            (Some(te), None) => {
1007                // In tree but missing from worktree
1008                result.push(DiffEntry {
1009                    status: DiffStatus::Deleted,
1010                    old_path: Some(path.clone()),
1011                    new_path: None,
1012                    old_mode: format_mode(te.mode),
1013                    new_mode: "000000".to_owned(),
1014                    old_oid: te.oid,
1015                    new_oid: zero_oid(),
1016                    score: None,
1017                });
1018            }
1019            (None, Some(ref meta)) => {
1020                // In index but not in tree, and exists in worktree
1021                let file_attrs = crlf::get_file_attrs(&attrs, path, &config);
1022                let wt_oid = hash_worktree_file(odb, &file_path, meta, &conv, &file_attrs, path)?;
1023                let wt_mode = mode_from_metadata(meta);
1024                result.push(DiffEntry {
1025                    status: DiffStatus::Added,
1026                    old_path: None,
1027                    new_path: Some(path.clone()),
1028                    old_mode: "000000".to_owned(),
1029                    new_mode: format_mode(wt_mode),
1030                    old_oid: zero_oid(),
1031                    new_oid: wt_oid,
1032                    score: None,
1033                });
1034            }
1035            (None, None) => {
1036                // Tracked in index but neither in tree nor worktree — skip
1037            }
1038        }
1039    }
1040
1041    result.sort_by(|a, b| a.path().cmp(b.path()));
1042    Ok(result)
1043}
1044
1045// ── Rename detection ────────────────────────────────────────────────
1046
1047/// Detect renames by pairing Deleted and Added entries with similar content.
1048///
1049/// `threshold` is the minimum similarity percentage (0–100) for a pair to
1050/// be considered a rename (Git's default is 50%).  The function reads blob
1051/// content from the ODB to compute a line-level similarity score.
1052///
1053/// Exact-OID matches are always 100% similar regardless of content.
1054pub fn detect_renames(odb: &Odb, entries: Vec<DiffEntry>, threshold: u32) -> Vec<DiffEntry> {
1055    // Split entries into deleted, added, and others.
1056    let mut deleted: Vec<DiffEntry> = Vec::new();
1057    let mut added: Vec<DiffEntry> = Vec::new();
1058    let mut others: Vec<DiffEntry> = Vec::new();
1059
1060    for entry in entries {
1061        match entry.status {
1062            DiffStatus::Deleted => deleted.push(entry),
1063            DiffStatus::Added => added.push(entry),
1064            _ => others.push(entry),
1065        }
1066    }
1067
1068    if deleted.is_empty() || added.is_empty() {
1069        // Nothing to pair — return original order.
1070        let mut result = others;
1071        result.extend(deleted);
1072        result.extend(added);
1073        result.sort_by(|a, b| a.path().cmp(b.path()));
1074        return result;
1075    }
1076
1077    // Read content for all deleted blobs.
1078    let deleted_contents: Vec<Option<Vec<u8>>> = deleted
1079        .iter()
1080        .map(|d| odb.read(&d.old_oid).ok().map(|obj| obj.data))
1081        .collect();
1082
1083    // Read content for all added blobs.
1084    let added_contents: Vec<Option<Vec<u8>>> = added
1085        .iter()
1086        .map(|a| odb.read(&a.new_oid).ok().map(|obj| obj.data))
1087        .collect();
1088
1089    // Build a matrix of similarity scores and find the best pairings.
1090    // We use a greedy approach: pick the highest-scoring pair first.
1091    let mut scores: Vec<(u32, usize, usize)> = Vec::new();
1092
1093    for (di, del) in deleted.iter().enumerate() {
1094        for (ai, add) in added.iter().enumerate() {
1095            // Exact OID match → 100%
1096            if del.old_oid == add.new_oid {
1097                scores.push((100, di, ai));
1098                continue;
1099            }
1100
1101            let score = match (&deleted_contents[di], &added_contents[ai]) {
1102                (Some(old_data), Some(new_data)) => compute_similarity(old_data, new_data),
1103                _ => 0,
1104            };
1105
1106            if score >= threshold {
1107                scores.push((score, di, ai));
1108            }
1109        }
1110    }
1111
1112    // Sort: prefer same-basename pairs first, then by score descending.
1113    // This matches Git's behavior where basename matches are checked first.
1114    scores.sort_by(|a, b| {
1115        let a_same = same_basename(&deleted[a.1], &added[a.2]);
1116        let b_same = same_basename(&deleted[b.1], &added[b.2]);
1117        b_same.cmp(&a_same).then_with(|| b.0.cmp(&a.0))
1118    });
1119
1120    let mut used_deleted = vec![false; deleted.len()];
1121    let mut used_added = vec![false; added.len()];
1122    let mut renames: Vec<DiffEntry> = Vec::new();
1123
1124    for (score, di, ai) in &scores {
1125        if used_deleted[*di] || used_added[*ai] {
1126            continue;
1127        }
1128        used_deleted[*di] = true;
1129        used_added[*ai] = true;
1130
1131        let del = &deleted[*di];
1132        let add = &added[*ai];
1133
1134        renames.push(DiffEntry {
1135            status: DiffStatus::Renamed,
1136            old_path: del.old_path.clone(),
1137            new_path: add.new_path.clone(),
1138            old_mode: del.old_mode.clone(),
1139            new_mode: add.new_mode.clone(),
1140            old_oid: del.old_oid,
1141            new_oid: add.new_oid,
1142            score: Some(*score),
1143        });
1144    }
1145
1146    // Collect unmatched entries.
1147    let mut result = others;
1148    result.extend(renames);
1149    for (i, entry) in deleted.into_iter().enumerate() {
1150        if !used_deleted[i] {
1151            result.push(entry);
1152        }
1153    }
1154    for (i, entry) in added.into_iter().enumerate() {
1155        if !used_added[i] {
1156            result.push(entry);
1157        }
1158    }
1159
1160    result.sort_by(|a, b| a.path().cmp(b.path()));
1161    result
1162}
1163
1164/// Detect copies among diff entries.
1165///
1166/// This first runs rename detection (pairing Deleted+Added), then for any
1167/// remaining Added entries, looks for copy sources.
1168///
1169/// - `find_copies_harder` = false: only Modified entries are copy source candidates.
1170/// - `find_copies_harder` = true: also examine unmodified files from `source_tree_entries`.
1171///
1172/// `source_tree_entries` should be a list of (path, mode, oid) from the source tree;
1173/// used when `find_copies_harder` is true to consider unmodified files as copy sources.
1174pub fn detect_copies(
1175    odb: &Odb,
1176    entries: Vec<DiffEntry>,
1177    threshold: u32,
1178    find_copies_harder: bool,
1179    source_tree_entries: &[(String, String, ObjectId)],
1180) -> Vec<DiffEntry> {
1181    use std::collections::{HashMap, HashSet};
1182
1183    // Separate entries by status.
1184    let mut deleted: Vec<DiffEntry> = Vec::new();
1185    let mut added: Vec<DiffEntry> = Vec::new();
1186    let mut others: Vec<DiffEntry> = Vec::new();
1187
1188    for entry in entries {
1189        match entry.status {
1190            DiffStatus::Deleted => deleted.push(entry),
1191            DiffStatus::Added => added.push(entry),
1192            _ => others.push(entry),
1193        }
1194    }
1195
1196    if added.is_empty() {
1197        let mut result = others;
1198        result.extend(deleted);
1199        result.sort_by(|a, b| a.path().cmp(b.path()));
1200        return result;
1201    }
1202
1203    // Build source candidates: deleted files, modified files, and optionally tree entries.
1204    // Track which sources are from deleted files (can become renames).
1205    let mut sources: Vec<(String, ObjectId, bool)> = Vec::new(); // (path, oid, is_deleted)
1206    let mut deleted_source_idx: HashMap<String, usize> = HashMap::new();
1207
1208    for entry in &deleted {
1209        if let Some(ref path) = entry.old_path {
1210            deleted_source_idx.insert(path.clone(), sources.len());
1211            sources.push((path.clone(), entry.old_oid, true));
1212        }
1213    }
1214
1215    // Modified files are always candidates for -C.
1216    for entry in &others {
1217        if entry.status == DiffStatus::Modified {
1218            if let Some(ref old_path) = entry.old_path {
1219                if !sources.iter().any(|(p, _, _)| p == old_path) {
1220                    sources.push((old_path.clone(), entry.old_oid, false));
1221                }
1222            }
1223        }
1224    }
1225
1226    // With find_copies_harder, add all source tree entries.
1227    if find_copies_harder {
1228        for (path, _mode, oid) in source_tree_entries {
1229            if !sources.iter().any(|(p, _, _)| p == path) {
1230                sources.push((path.clone(), *oid, false));
1231            }
1232        }
1233    }
1234
1235    if sources.is_empty() {
1236        let mut result = others;
1237        result.extend(deleted);
1238        result.extend(added);
1239        result.sort_by(|a, b| a.path().cmp(b.path()));
1240        return result;
1241    }
1242
1243    // Read content for sources.
1244    let source_contents: Vec<Option<Vec<u8>>> = sources
1245        .iter()
1246        .map(|(_, oid, _)| odb.read(oid).ok().map(|obj| obj.data))
1247        .collect();
1248
1249    // Read content for added blobs.
1250    let added_contents: Vec<Option<Vec<u8>>> = added
1251        .iter()
1252        .map(|a| odb.read(&a.new_oid).ok().map(|obj| obj.data))
1253        .collect();
1254
1255    // Build score matrix: (score, source_idx, added_idx)
1256    let mut scores: Vec<(u32, usize, usize)> = Vec::new();
1257    for (si, (_, src_oid, _)) in sources.iter().enumerate() {
1258        for (ai, add) in added.iter().enumerate() {
1259            if *src_oid == add.new_oid {
1260                scores.push((100, si, ai));
1261                continue;
1262            }
1263            let score = match (&source_contents[si], &added_contents[ai]) {
1264                (Some(old_data), Some(new_data)) => compute_similarity(old_data, new_data),
1265                _ => 0,
1266            };
1267            if score >= threshold {
1268                scores.push((score, si, ai));
1269            }
1270        }
1271    }
1272
1273    // Sort by score descending.
1274    scores.sort_by(|a, b| b.0.cmp(&a.0));
1275
1276    // Build source->added mappings, each added file assigned to best source.
1277    let mut used_added = vec![false; added.len()];
1278    let mut source_to_added: HashMap<usize, Vec<(usize, u32)>> = HashMap::new();
1279    for &(score, si, ai) in &scores {
1280        if used_added[ai] {
1281            continue;
1282        }
1283        used_added[ai] = true;
1284        source_to_added.entry(si).or_default().push((ai, score));
1285    }
1286
1287    // Determine which become Rename vs Copy.
1288    // For each deleted source, at most one assignment becomes a Rename;
1289    // the rest become Copies. Pick the last alphabetically as rename target.
1290    let mut used_added2 = vec![false; added.len()];
1291    let mut result_entries: Vec<DiffEntry> = Vec::new();
1292
1293    // Track which deleted sources got a rename.
1294    let mut renamed_deleted: HashSet<usize> = HashSet::new();
1295
1296    // For each deleted source, pick one assignment as Rename, rest as Copy.
1297    for (&si, assignments_for_src) in &source_to_added {
1298        let (_, _, is_deleted) = &sources[si];
1299        if *is_deleted && !assignments_for_src.is_empty() {
1300            // Pick the last one (by path) as the rename target.
1301            // Git tends to pick the rename as the last alphabetically.
1302            let rename_ai = assignments_for_src
1303                .iter()
1304                .max_by_key(|(ai, _score)| added[*ai].path().to_string())
1305                .map(|(ai, _)| *ai);
1306
1307            for &(ai, score) in assignments_for_src {
1308                let (ref src_path, _, _) = sources[si];
1309                let add = &added[ai];
1310                let src_mode = source_tree_entries
1311                    .iter()
1312                    .find(|(p, _, _)| p == src_path)
1313                    .map(|(_, m, _)| m.clone())
1314                    .unwrap_or_else(|| add.old_mode.clone());
1315
1316                let is_rename = Some(ai) == rename_ai;
1317                result_entries.push(DiffEntry {
1318                    status: if is_rename {
1319                        DiffStatus::Renamed
1320                    } else {
1321                        DiffStatus::Copied
1322                    },
1323                    old_path: Some(src_path.clone()),
1324                    new_path: add.new_path.clone(),
1325                    old_mode: src_mode,
1326                    new_mode: add.new_mode.clone(),
1327                    old_oid: sources[si].1,
1328                    new_oid: add.new_oid,
1329                    score: Some(score),
1330                });
1331                used_added2[ai] = true;
1332            }
1333            renamed_deleted.insert(si);
1334        } else {
1335            // Non-deleted source: all assignments are copies.
1336            for &(ai, score) in assignments_for_src {
1337                let (ref src_path, _, _) = sources[si];
1338                let add = &added[ai];
1339                let src_mode = source_tree_entries
1340                    .iter()
1341                    .find(|(p, _, _)| p == src_path)
1342                    .map(|(_, m, _)| m.clone())
1343                    .unwrap_or_else(|| add.old_mode.clone());
1344
1345                result_entries.push(DiffEntry {
1346                    status: DiffStatus::Copied,
1347                    old_path: Some(src_path.clone()),
1348                    new_path: add.new_path.clone(),
1349                    old_mode: src_mode,
1350                    new_mode: add.new_mode.clone(),
1351                    old_oid: sources[si].1,
1352                    new_oid: add.new_oid,
1353                    score: Some(score),
1354                });
1355                used_added2[ai] = true;
1356            }
1357        }
1358    }
1359
1360    // Keep deleted entries that weren't consumed by a rename.
1361    for entry in deleted.into_iter() {
1362        if let Some(ref path) = entry.old_path {
1363            if let Some(&si) = deleted_source_idx.get(path) {
1364                if renamed_deleted.contains(&si) {
1365                    // This deletion was consumed by a rename; skip it.
1366                    continue;
1367                }
1368            }
1369        }
1370        result_entries.push(entry);
1371    }
1372
1373    let mut result = others;
1374    result.extend(result_entries);
1375    // Keep unmatched added entries.
1376    for (i, entry) in added.into_iter().enumerate() {
1377        if !used_added2[i] {
1378            result.push(entry);
1379        }
1380    }
1381
1382    result.sort_by(|a, b| a.path().cmp(b.path()));
1383    result
1384}
1385
1386/// Format a rename pair using Git's compact path format.
1387///
1388/// Examples:
1389/// - `a/b/c` → `c/b/a` → `a/b/c => c/b/a`
1390/// - `c/b/a` → `c/d/e` → `c/{b/a => d/e}`
1391/// - `c/d/e` → `d/e` → `{c/d => d}/e`
1392/// - `d/e` → `d/f/e` → `d/{ => f}/e`
1393pub fn format_rename_path(old: &str, new: &str) -> String {
1394    let ob = old.as_bytes();
1395    let nb = new.as_bytes();
1396
1397    // Find common prefix length, snapped to '/' boundary.
1398    let pfx = {
1399        let mut last_sep = 0usize;
1400        let min_len = ob.len().min(nb.len());
1401        for i in 0..min_len {
1402            if ob[i] != nb[i] {
1403                break;
1404            }
1405            if ob[i] == b'/' {
1406                last_sep = i + 1;
1407            }
1408        }
1409        last_sep
1410    };
1411
1412    // Find common suffix length, snapped to '/' boundary.
1413    let mut sfx = {
1414        let mut last_sep = 0usize;
1415        let min_len = ob.len().min(nb.len());
1416        for i in 0..min_len {
1417            let oi = ob.len() - 1 - i;
1418            let ni = nb.len() - 1 - i;
1419            if ob[oi] != nb[ni] {
1420                break;
1421            }
1422            if ob[oi] == b'/' {
1423                last_sep = i + 1;
1424            }
1425        }
1426        last_sep
1427    };
1428
1429    // Suffix starts at this position in each string.
1430    let mut sfx_at_old = ob.len() - sfx;
1431    let mut sfx_at_new = nb.len() - sfx;
1432
1433    // If prefix and suffix overlap in both strings (both middles empty),
1434    // reduce the suffix so that at least the longer string has a non-empty middle.
1435    while pfx > sfx_at_old && pfx > sfx_at_new && sfx > 0 {
1436        // Reduce suffix by snapping to the next smaller '/' boundary.
1437        let suffix_bytes = &ob[sfx_at_old..];
1438        let mut new_sfx = 0;
1439        // Find the next '/' after sfx_at_old (i.e., reduce suffix).
1440        for i in 1..suffix_bytes.len() {
1441            if suffix_bytes[i] == b'/' {
1442                new_sfx = sfx - i;
1443                break;
1444            }
1445        }
1446        if new_sfx == 0 || new_sfx >= sfx {
1447            sfx_at_old = ob.len();
1448            sfx_at_new = nb.len();
1449            break;
1450        }
1451        sfx = new_sfx;
1452        sfx_at_old = ob.len() - sfx;
1453        sfx_at_new = nb.len() - sfx;
1454    }
1455
1456    // When prefix and suffix overlap in the shorter string, they share
1457    // the '/' boundary character. In the output format, the shared '/'
1458    // appears in both positions (e.g. "d/{ => f}/e" for d/e → d/f/e).
1459    // Compute the middle parts. When prefix and suffix overlap in a
1460    // string, the middle for that string is empty. The shared '/' shows
1461    // in both prefix (trailing) and suffix (leading) positions.
1462    let prefix = &old[..pfx];
1463    let suffix = &old[sfx_at_old..];
1464    let old_mid = if pfx <= sfx_at_old {
1465        &old[pfx..sfx_at_old]
1466    } else {
1467        ""
1468    };
1469    let new_mid = if pfx <= sfx_at_new {
1470        &new[pfx..sfx_at_new]
1471    } else {
1472        ""
1473    };
1474
1475    if prefix.is_empty() && suffix.is_empty() {
1476        return format!("{old} => {new}");
1477    }
1478
1479    format!("{prefix}{{{old_mid} => {new_mid}}}{suffix}")
1480}
1481
1482/// Check if two entries share the same filename (basename).
1483fn same_basename(del: &DiffEntry, add: &DiffEntry) -> bool {
1484    let old = del.old_path.as_deref().unwrap_or("");
1485    let new = add.new_path.as_deref().unwrap_or("");
1486    let old_base = old.rsplit('/').next().unwrap_or(old);
1487    let new_base = new.rsplit('/').next().unwrap_or(new);
1488    old_base == new_base && !old_base.is_empty()
1489}
1490
1491/// Compute a similarity percentage (0–100) between two byte slices.
1492///
1493/// Uses Git's approach: count the bytes that are "shared" (appear in
1494/// equal lines), then compute `score = shared_bytes * 2 * 100 / (src_size + dst_size)`.
1495fn compute_similarity(old: &[u8], new: &[u8]) -> u32 {
1496    // Normalize CRLF → LF before comparing so that files differing
1497    // only in line endings are detected as renames.
1498    let old_norm = crate::crlf::crlf_to_lf(old);
1499    let new_norm = crate::crlf::crlf_to_lf(new);
1500
1501    let src_size = old_norm.len();
1502    let dst_size = new_norm.len();
1503
1504    if src_size == 0 && dst_size == 0 {
1505        return 100;
1506    }
1507    let total = src_size + dst_size;
1508    if total == 0 {
1509        return 100;
1510    }
1511
1512    // Use line-level diff to find shared content, then count bytes.
1513    use similar::{ChangeTag, TextDiff};
1514    let old_str = String::from_utf8_lossy(&old_norm);
1515    let new_str = String::from_utf8_lossy(&new_norm);
1516    let diff = TextDiff::from_lines(&old_str as &str, &new_str as &str);
1517
1518    let mut shared_bytes = 0usize;
1519    for change in diff.iter_all_changes() {
1520        if change.tag() == ChangeTag::Equal {
1521            // Count bytes in the matching line (including newline).
1522            shared_bytes += change.value().len();
1523        }
1524    }
1525
1526    // Git: score = copied * MAX_SCORE / max(src_size, dst_size)
1527    // We normalize to 0-100.
1528    let max_size = src_size.max(dst_size);
1529
1530    ((shared_bytes * 100) / max_size).min(100) as u32
1531}
1532
1533/// Compute rename/copy similarity percentage (0–100) between two byte slices.
1534///
1535/// This uses the same scoring logic as internal rename detection.
1536#[must_use]
1537pub fn rename_similarity_score(old: &[u8], new: &[u8]) -> u32 {
1538    compute_similarity(old, new)
1539}
1540
1541// ── Output formatting ───────────────────────────────────────────────
1542
1543/// Format a diff entry in Git's raw diff format.
1544///
1545/// Example: `:100644 100644 abc1234... def5678... M\tfile.txt`
1546pub fn format_raw(entry: &DiffEntry) -> String {
1547    let path = match entry.status {
1548        DiffStatus::Renamed | DiffStatus::Copied => {
1549            format!(
1550                "{}\t{}",
1551                entry.old_path.as_deref().unwrap_or(""),
1552                entry.new_path.as_deref().unwrap_or("")
1553            )
1554        }
1555        _ => entry.path().to_owned(),
1556    };
1557
1558    let status_str = match (entry.status, entry.score) {
1559        (DiffStatus::Renamed, Some(s)) => format!("R{:03}", s),
1560        (DiffStatus::Copied, Some(s)) => format!("C{:03}", s),
1561        _ => entry.status.letter().to_string(),
1562    };
1563
1564    format!(
1565        ":{} {} {} {} {}\t{}",
1566        entry.old_mode, entry.new_mode, entry.old_oid, entry.new_oid, status_str, path
1567    )
1568}
1569
1570/// Format a diff entry with abbreviated OIDs.
1571pub fn format_raw_abbrev(entry: &DiffEntry, abbrev_len: usize) -> String {
1572    let ellipsis = if std::env::var("GIT_PRINT_SHA1_ELLIPSIS").ok().as_deref() == Some("yes") {
1573        "..."
1574    } else {
1575        ""
1576    };
1577    let old_hex = format!("{}", entry.old_oid);
1578    let new_hex = format!("{}", entry.new_oid);
1579    let old_abbrev = &old_hex[..abbrev_len.min(old_hex.len())];
1580    let new_abbrev = &new_hex[..abbrev_len.min(new_hex.len())];
1581
1582    let path = entry.path();
1583
1584    format!(
1585        ":{} {} {}{} {}{} {}\t{}",
1586        entry.old_mode,
1587        entry.new_mode,
1588        old_abbrev,
1589        ellipsis,
1590        new_abbrev,
1591        ellipsis,
1592        entry.status.letter(),
1593        path
1594    )
1595}
1596
1597/// Generate a unified diff patch for two blobs.
1598///
1599/// # Parameters
1600///
1601/// - `old_content` — the old file content (empty for added files).
1602/// - `new_content` — the new file content (empty for deleted files).
1603/// - `old_path` — display path for the old side.
1604/// - `new_path` — display path for the new side.
1605/// - `context_lines` — number of context lines around changes (default: 3).
1606///
1607/// # Returns
1608///
1609/// The unified diff as a string.
1610pub fn unified_diff(
1611    old_content: &str,
1612    new_content: &str,
1613    old_path: &str,
1614    new_path: &str,
1615    context_lines: usize,
1616) -> String {
1617    unified_diff_with_prefix(
1618        old_content,
1619        new_content,
1620        old_path,
1621        new_path,
1622        context_lines,
1623        "a/",
1624        "b/",
1625    )
1626}
1627
1628/// Same as `unified_diff` but with configurable source/destination prefixes.
1629pub fn unified_diff_with_prefix(
1630    old_content: &str,
1631    new_content: &str,
1632    old_path: &str,
1633    new_path: &str,
1634    context_lines: usize,
1635    src_prefix: &str,
1636    dst_prefix: &str,
1637) -> String {
1638    unified_diff_with_prefix_and_funcname(
1639        old_content,
1640        new_content,
1641        old_path,
1642        new_path,
1643        context_lines,
1644        src_prefix,
1645        dst_prefix,
1646        None,
1647    )
1648}
1649
1650/// Same as [`unified_diff_with_prefix`] with optional custom hunk-header
1651/// function-name matching.
1652pub fn unified_diff_with_prefix_and_funcname(
1653    old_content: &str,
1654    new_content: &str,
1655    old_path: &str,
1656    new_path: &str,
1657    context_lines: usize,
1658    src_prefix: &str,
1659    dst_prefix: &str,
1660    funcname_matcher: Option<&FuncnameMatcher>,
1661) -> String {
1662    unified_diff_with_prefix_and_funcname_and_algorithm(
1663        old_content,
1664        new_content,
1665        old_path,
1666        new_path,
1667        context_lines,
1668        src_prefix,
1669        dst_prefix,
1670        funcname_matcher,
1671        similar::Algorithm::Myers,
1672    )
1673}
1674
1675/// Same as [`unified_diff_with_prefix_and_funcname`] but allows callers to
1676/// choose the line diff algorithm used for hunk generation.
1677pub fn unified_diff_with_prefix_and_funcname_and_algorithm(
1678    old_content: &str,
1679    new_content: &str,
1680    old_path: &str,
1681    new_path: &str,
1682    context_lines: usize,
1683    src_prefix: &str,
1684    dst_prefix: &str,
1685    funcname_matcher: Option<&FuncnameMatcher>,
1686    algorithm: similar::Algorithm,
1687) -> String {
1688    use similar::TextDiff;
1689
1690    let diff = TextDiff::configure()
1691        .algorithm(algorithm)
1692        .diff_lines(old_content, new_content);
1693
1694    let mut output = String::new();
1695    if old_path == "/dev/null" {
1696        output.push_str("--- /dev/null\n");
1697    } else {
1698        output.push_str(&format!("--- {src_prefix}{old_path}\n"));
1699    }
1700    if new_path == "/dev/null" {
1701        output.push_str("+++ /dev/null\n");
1702    } else {
1703        output.push_str(&format!("+++ {dst_prefix}{new_path}\n"));
1704    }
1705
1706    let old_lines: Vec<&str> = old_content.lines().collect();
1707
1708    for hunk in diff
1709        .unified_diff()
1710        .context_radius(context_lines)
1711        .iter_hunks()
1712    {
1713        let hunk_str = format!("{hunk}");
1714        // The similar crate outputs @@ -a,b +c,d @@\n but Git adds
1715        // function context after the closing @@. Extract the hunk header
1716        // and add function context.
1717        if let Some(first_newline) = hunk_str.find('\n') {
1718            let header_line = &hunk_str[..first_newline];
1719            let rest = &hunk_str[first_newline..];
1720
1721            // Parse the old start line from the @@ header
1722            if let Some(func_ctx) =
1723                extract_function_context(header_line, &old_lines, funcname_matcher)
1724            {
1725                output.push_str(header_line);
1726                output.push(' ');
1727                output.push_str(&func_ctx);
1728                output.push_str(rest);
1729            } else {
1730                output.push_str(&hunk_str);
1731            }
1732        } else {
1733            output.push_str(&hunk_str);
1734        }
1735    }
1736
1737    output
1738}
1739
1740/// Compute a unified diff with anchored lines.
1741///
1742/// Anchored lines that appear exactly once in both old and new content are
1743/// forced to match, splitting the diff into segments around those anchor points.
1744/// This produces diffs where the anchored text stays as context and surrounding
1745/// lines are shown as additions/removals.
1746pub fn anchored_unified_diff(
1747    old_content: &str,
1748    new_content: &str,
1749    old_path: &str,
1750    new_path: &str,
1751    context_lines: usize,
1752    anchors: &[String],
1753) -> String {
1754    use similar::TextDiff;
1755
1756    let old_lines: Vec<&str> = old_content.lines().collect();
1757    let new_lines: Vec<&str> = new_content.lines().collect();
1758
1759    // Find anchored lines that appear exactly once in both old and new
1760    let mut anchor_pairs: Vec<(usize, usize)> = Vec::new(); // (old_idx, new_idx)
1761
1762    for anchor in anchors {
1763        let anchor_str = anchor.as_str();
1764
1765        // Count occurrences in old
1766        let old_positions: Vec<usize> = old_lines
1767            .iter()
1768            .enumerate()
1769            .filter(|(_, l)| l.trim_end() == anchor_str)
1770            .map(|(i, _)| i)
1771            .collect();
1772
1773        // Count occurrences in new
1774        let new_positions: Vec<usize> = new_lines
1775            .iter()
1776            .enumerate()
1777            .filter(|(_, l)| l.trim_end() == anchor_str)
1778            .map(|(i, _)| i)
1779            .collect();
1780
1781        // Only anchor if unique in both
1782        if old_positions.len() == 1 && new_positions.len() == 1 {
1783            anchor_pairs.push((old_positions[0], new_positions[0]));
1784        }
1785    }
1786
1787    // If no valid anchors, fall back to normal diff
1788    if anchor_pairs.is_empty() {
1789        return unified_diff(old_content, new_content, old_path, new_path, context_lines);
1790    }
1791
1792    // Sort anchor pairs by their position in the old file
1793    anchor_pairs.sort_by_key(|&(old_idx, _)| old_idx);
1794
1795    // Filter to only keep pairs where new positions are also increasing
1796    // (longest increasing subsequence of new positions)
1797    let mut filtered: Vec<(usize, usize)> = Vec::new();
1798    for &pair in &anchor_pairs {
1799        if filtered.is_empty() || pair.1 > filtered.last().unwrap().1 {
1800            filtered.push(pair);
1801        }
1802    }
1803    let anchor_pairs = filtered;
1804
1805    // Build a modified version of old/new where we diff segments between anchors.
1806    // We'll construct the diff by processing segments:
1807    // - Before first anchor
1808    // - Between consecutive anchors
1809    // - After last anchor
1810    // Each anchor line itself is a fixed context match.
1811
1812    // Collect all diff operations
1813    struct DiffOp {
1814        tag: char, // ' ', '+', '-'
1815        line: String,
1816    }
1817
1818    let mut ops: Vec<DiffOp> = Vec::new();
1819    let mut old_pos = 0usize;
1820    let mut new_pos = 0usize;
1821
1822    for &(old_anchor, new_anchor) in &anchor_pairs {
1823        // Diff the segment before this anchor
1824        let old_segment: Vec<&str> = old_lines[old_pos..old_anchor].to_vec();
1825        let new_segment: Vec<&str> = new_lines[new_pos..new_anchor].to_vec();
1826
1827        let old_seg_text = old_segment.join("\n");
1828        let new_seg_text = new_segment.join("\n");
1829
1830        if !old_seg_text.is_empty() || !new_seg_text.is_empty() {
1831            let old_seg_input = if old_seg_text.is_empty() {
1832                String::new()
1833            } else {
1834                format!("{}\n", old_seg_text)
1835            };
1836            let new_seg_input = if new_seg_text.is_empty() {
1837                String::new()
1838            } else {
1839                format!("{}\n", new_seg_text)
1840            };
1841            let seg_diff = TextDiff::from_lines(&old_seg_input, &new_seg_input);
1842            for change in seg_diff.iter_all_changes() {
1843                let tag = match change.tag() {
1844                    similar::ChangeTag::Equal => ' ',
1845                    similar::ChangeTag::Delete => '-',
1846                    similar::ChangeTag::Insert => '+',
1847                };
1848                ops.push(DiffOp {
1849                    tag,
1850                    line: change.value().trim_end_matches('\n').to_string(),
1851                });
1852            }
1853        }
1854
1855        // The anchor line itself is always context
1856        ops.push(DiffOp {
1857            tag: ' ',
1858            line: old_lines[old_anchor].to_string(),
1859        });
1860
1861        old_pos = old_anchor + 1;
1862        new_pos = new_anchor + 1;
1863    }
1864
1865    // Diff the remaining segment after the last anchor
1866    let old_segment: Vec<&str> = old_lines[old_pos..].to_vec();
1867    let new_segment: Vec<&str> = new_lines[new_pos..].to_vec();
1868    let old_seg_text = old_segment.join("\n");
1869    let new_seg_text = new_segment.join("\n");
1870
1871    if !old_seg_text.is_empty() || !new_seg_text.is_empty() {
1872        let old_seg_input = if old_seg_text.is_empty() {
1873            String::new()
1874        } else {
1875            format!("{}\n", old_seg_text)
1876        };
1877        let new_seg_input = if new_seg_text.is_empty() {
1878            String::new()
1879        } else {
1880            format!("{}\n", new_seg_text)
1881        };
1882        let seg_diff = TextDiff::from_lines(&old_seg_input, &new_seg_input);
1883        for change in seg_diff.iter_all_changes() {
1884            let tag = match change.tag() {
1885                similar::ChangeTag::Equal => ' ',
1886                similar::ChangeTag::Delete => '-',
1887                similar::ChangeTag::Insert => '+',
1888            };
1889            ops.push(DiffOp {
1890                tag,
1891                line: change.value().trim_end_matches('\n').to_string(),
1892            });
1893        }
1894    }
1895
1896    // Now format as unified diff with hunks
1897    let mut output = String::new();
1898    if old_path == "/dev/null" {
1899        output.push_str("--- /dev/null\n");
1900    } else {
1901        output.push_str(&format!("--- a/{old_path}\n"));
1902    }
1903    if new_path == "/dev/null" {
1904        output.push_str("+++ /dev/null\n");
1905    } else {
1906        output.push_str(&format!("+++ b/{new_path}\n"));
1907    }
1908
1909    // Group ops into hunks with context
1910    let total_ops = ops.len();
1911    if total_ops == 0 {
1912        return output;
1913    }
1914
1915    // Find ranges of changes
1916    let mut hunks: Vec<(usize, usize)> = Vec::new(); // (start, end) indices into ops
1917    let mut i = 0;
1918    while i < total_ops {
1919        if ops[i].tag != ' ' {
1920            let start = i.saturating_sub(context_lines);
1921            let mut end = i;
1922            // Extend to include consecutive changes and their context
1923            while end < total_ops {
1924                if ops[end].tag != ' ' {
1925                    end += 1;
1926                    continue;
1927                }
1928                // Check if there's another change within context_lines
1929                let mut next_change = end;
1930                while next_change < total_ops && ops[next_change].tag == ' ' {
1931                    next_change += 1;
1932                }
1933                if next_change < total_ops && next_change - end <= context_lines * 2 {
1934                    end = next_change + 1;
1935                } else {
1936                    end = (end + context_lines).min(total_ops);
1937                    break;
1938                }
1939            }
1940            // Merge with previous hunk if overlapping
1941            if let Some(last) = hunks.last_mut() {
1942                if start <= last.1 {
1943                    last.1 = end;
1944                } else {
1945                    hunks.push((start, end));
1946                }
1947            } else {
1948                hunks.push((start, end));
1949            }
1950            i = end;
1951        } else {
1952            i += 1;
1953        }
1954    }
1955
1956    // Output each hunk
1957    for (start, end) in hunks {
1958        // Count old/new lines in this hunk
1959        let mut old_start = 1usize;
1960        let mut new_start = 1usize;
1961        // Calculate line numbers by counting ops before this hunk
1962        for op in &ops[..start] {
1963            match op.tag {
1964                ' ' => {
1965                    old_start += 1;
1966                    new_start += 1;
1967                }
1968                '-' => {
1969                    old_start += 1;
1970                }
1971                '+' => {
1972                    new_start += 1;
1973                }
1974                _ => {}
1975            }
1976        }
1977        let mut old_count = 0usize;
1978        let mut new_count = 0usize;
1979        for op in &ops[start..end] {
1980            match op.tag {
1981                ' ' => {
1982                    old_count += 1;
1983                    new_count += 1;
1984                }
1985                '-' => {
1986                    old_count += 1;
1987                }
1988                '+' => {
1989                    new_count += 1;
1990                }
1991                _ => {}
1992            }
1993        }
1994
1995        output.push_str(&format!(
1996            "@@ -{},{} +{},{} @@\n",
1997            old_start, old_count, new_start, new_count
1998        ));
1999        for op in &ops[start..end] {
2000            output.push(op.tag);
2001            output.push_str(&op.line);
2002            output.push('\n');
2003        }
2004    }
2005
2006    output
2007}
2008
2009/// Extract function context for a hunk header.
2010///
2011/// Given a hunk header like `@@ -8,7 +8,7 @@`, find the last line
2012/// before line 8 in the old content that looks like a function header
2013/// (starts with a non-whitespace character, like Git's default).
2014fn extract_function_context(
2015    header: &str,
2016    old_lines: &[&str],
2017    funcname_matcher: Option<&FuncnameMatcher>,
2018) -> Option<String> {
2019    // Parse the old start line number from "@@ -<start>,<count> ..."
2020    let at_pos = header.find("-")?;
2021    let rest = &header[at_pos + 1..];
2022    let comma_or_space = rest.find([',', ' '])?;
2023    let start_str = &rest[..comma_or_space];
2024    let start_line: usize = start_str.parse().ok()?;
2025
2026    if start_line <= 1 {
2027        return None;
2028    }
2029
2030    // Look backwards from the line before the hunk start for a line that
2031    // starts with a non-whitespace character (Git's default funcname pattern).
2032    // start_line is 1-indexed, so the hunk starts at old_lines[start_line-1].
2033    // We want to look at lines before that: old_lines[0..start_line-1].
2034    let search_end = (start_line - 1).min(old_lines.len());
2035    let truncate = |text: &str| {
2036        if text.len() > 80 {
2037            let mut end = 80;
2038            while end > 0 && !text.is_char_boundary(end) {
2039                end -= 1;
2040            }
2041            text[..end].to_owned()
2042        } else {
2043            text.to_owned()
2044        }
2045    };
2046
2047    for i in (0..search_end).rev() {
2048        let line = old_lines[i];
2049        if line.is_empty() {
2050            continue;
2051        }
2052        if let Some(matcher) = funcname_matcher {
2053            if let Some(matched) = matcher.match_line(line) {
2054                return Some(truncate(&matched));
2055            }
2056            continue;
2057        }
2058
2059        let first = line.as_bytes()[0];
2060        if first.is_ascii_alphabetic() || first == b'_' || first == b'$' {
2061            return Some(truncate(line.trim_end_matches(char::is_whitespace)));
2062        }
2063    }
2064    None
2065}
2066
2067/// Generate diff stat output (file name + insertions/deletions).
2068///
2069/// Returns a single line like: ` file.txt | 5 ++---`
2070pub fn format_stat_line(
2071    path: &str,
2072    insertions: usize,
2073    deletions: usize,
2074    max_path_len: usize,
2075) -> String {
2076    format_stat_line_width(path, insertions, deletions, max_path_len, 0)
2077}
2078
2079pub fn format_stat_line_width(
2080    path: &str,
2081    insertions: usize,
2082    deletions: usize,
2083    max_path_len: usize,
2084    count_width: usize,
2085) -> String {
2086    let total = insertions + deletions;
2087    let plus = "+".repeat(insertions.min(50));
2088    let minus = "-".repeat(deletions.min(50));
2089    let cw = if count_width > 0 {
2090        count_width
2091    } else {
2092        format!("{}", total).len()
2093    };
2094    let bar = format!("{}{}", plus, minus);
2095    if bar.is_empty() {
2096        format!(
2097            " {:<width$} | {:>cw$}",
2098            path,
2099            total,
2100            width = max_path_len,
2101            cw = cw
2102        )
2103    } else {
2104        format!(
2105            " {:<width$} | {:>cw$} {}",
2106            path,
2107            total,
2108            bar,
2109            width = max_path_len,
2110            cw = cw
2111        )
2112    }
2113}
2114
2115/// Count insertions and deletions between two strings.
2116///
2117/// Returns `(insertions, deletions)`.
2118pub fn count_changes(old_content: &str, new_content: &str) -> (usize, usize) {
2119    use similar::{ChangeTag, TextDiff};
2120
2121    let diff = TextDiff::from_lines(old_content, new_content);
2122    let mut ins = 0;
2123    let mut del = 0;
2124
2125    for change in diff.iter_all_changes() {
2126        match change.tag() {
2127            ChangeTag::Insert => ins += 1,
2128            ChangeTag::Delete => del += 1,
2129            ChangeTag::Equal => {}
2130        }
2131    }
2132
2133    (ins, del)
2134}
2135
2136// ── Helpers ─────────────────────────────────────────────────────────
2137
2138/// Flatten a tree object recursively into a sorted list of (path, mode, oid).
2139struct FlatEntry {
2140    path: String,
2141    mode: u32,
2142    oid: ObjectId,
2143}
2144
2145fn flatten_tree(odb: &Odb, tree_oid: &ObjectId, prefix: &str) -> Result<Vec<FlatEntry>> {
2146    let entries = read_tree(odb, tree_oid)?;
2147    let mut result = Vec::new();
2148
2149    for entry in entries {
2150        let name_str = String::from_utf8_lossy(&entry.name);
2151        let path = format_path(prefix, &name_str);
2152        if is_tree_mode(entry.mode) {
2153            let nested = flatten_tree(odb, &entry.oid, &path)?;
2154            result.extend(nested);
2155        } else {
2156            result.push(FlatEntry {
2157                path,
2158                mode: entry.mode,
2159                oid: entry.oid,
2160            });
2161        }
2162    }
2163
2164    Ok(result)
2165}
2166
2167/// Whether a mode represents a tree (directory).
2168fn is_tree_mode(mode: u32) -> bool {
2169    mode == 0o040000
2170}
2171
2172/// Build a path with an optional prefix.
2173fn format_path(prefix: &str, name: &str) -> String {
2174    if prefix.is_empty() {
2175        name.to_owned()
2176    } else {
2177        format!("{prefix}/{name}")
2178    }
2179}
2180
2181/// Format a numeric mode as a zero-padded octal string.
2182fn format_mode(mode: u32) -> String {
2183    format!("{mode:06o}")
2184}
2185
2186/// Read the HEAD commit OID from a submodule directory.
2187/// Returns `None` if the submodule dir doesn't exist or has no HEAD.
2188fn read_submodule_head(sub_dir: &Path) -> Option<ObjectId> {
2189    // Also handle gitfile: .git is a file containing "gitdir: <path>"
2190    let git_dir = if sub_dir.join(".git").is_file() {
2191        let content = fs::read_to_string(sub_dir.join(".git")).ok()?;
2192        let gitdir = content
2193            .lines()
2194            .find_map(|l| l.strip_prefix("gitdir: "))?
2195            .trim()
2196            .to_owned();
2197        if Path::new(&gitdir).is_absolute() {
2198            PathBuf::from(gitdir)
2199        } else {
2200            sub_dir.join(gitdir)
2201        }
2202    } else if sub_dir.join(".git").is_dir() {
2203        sub_dir.join(".git")
2204    } else {
2205        return None;
2206    };
2207    let head_content = fs::read_to_string(git_dir.join("HEAD")).ok()?;
2208    let head_content = head_content.trim();
2209    if let Some(refname) = head_content.strip_prefix("ref: ") {
2210        // Symbolic ref — resolve it
2211        let ref_path = git_dir.join(refname);
2212        let oid_hex = fs::read_to_string(&ref_path).ok()?;
2213        ObjectId::from_hex(oid_hex.trim()).ok()
2214    } else {
2215        // Detached HEAD — direct OID
2216        ObjectId::from_hex(head_content).ok()
2217    }
2218}