Skip to main content

sley_worktree/
lib.rs

1use sley_config::GitConfig;
2use sley_core::{
3    BString, GitError, MissingObjectContext, MissingObjectKind, ObjectFormat, ObjectId, RepoPath,
4    Result,
5};
6use sley_index::{BorrowedIndex, CacheTree, Index, IndexEntry, IndexEntryRef, Stage};
7use sley_object::{Commit, EncodedObject, ObjectType, Tree, TreeEntry, tree_entry_object_type};
8use sley_odb::{FileObjectDatabase, ObjectPresenceChecker, ObjectReader, ObjectWriter};
9use sley_refs::{FileRefStore, RefTarget, RefUpdate, ReflogEntry, branch_ref_name};
10use std::borrow::Cow;
11use std::cell::RefCell;
12use std::cmp::Ordering;
13use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
14use std::io::Write;
15use std::path::{Path, PathBuf};
16use std::process::{Command, Stdio};
17use std::sync::{Mutex, OnceLock};
18use std::time::UNIX_EPOCH;
19use std::{env, fs};
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub enum WorktreeStatus {
23    Clean,
24    Modified(RepoPath),
25    Added(RepoPath),
26    Deleted(RepoPath),
27    Untracked(RepoPath),
28}
29
30pub trait WorktreeScanner {
31    fn status(&self) -> Result<Vec<WorktreeStatus>>;
32}
33
34#[derive(Debug, Clone, PartialEq, Eq)]
35pub struct SparseCheckout {
36    pub patterns: Vec<Vec<u8>>,
37    pub sparse_index: bool,
38}
39
40/// Selects how the patterns in a [`SparseCheckout`] are interpreted when
41/// deciding which index paths are "in cone" (kept in the worktree).
42///
43/// * [`SparseCheckoutMode::Full`] interprets the patterns exactly like
44///   `.gitignore` lines (full pattern matching, including `*`, `?`, `**`,
45///   character classes, anchoring with a leading `/`, directory-only `/`
46///   suffixes, and `!` negation). A path is *included* when the last pattern
47///   that matches it is not negated. This mirrors upstream Git's non-cone
48///   `core.sparseCheckout` behaviour.
49/// * [`SparseCheckoutMode::Cone`] interprets the patterns as the restricted
50///   directory-prefix form Git emits for `core.sparseCheckoutCone`: a literal
51///   `/*` (top-level files), the recursive-parent guard `!/*/`, and anchored
52///   directory patterns such as `/dir/` (everything under `dir/`) plus the
53///   parent guards `/dir/*` and `!/dir/*/`. Matching is purely prefix based,
54///   so glob metacharacters are treated literally.
55/// * [`SparseCheckoutMode::Auto`] inspects the patterns and uses cone matching
56///   when every pattern fits the cone grammar above, otherwise full matching.
57#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
58pub enum SparseCheckoutMode {
59    #[default]
60    Auto,
61    Full,
62    Cone,
63}
64
65#[derive(Debug, Clone, PartialEq, Eq)]
66pub struct ApplySparseResult {
67    /// Paths whose worktree file was (re)materialized because they are in cone.
68    pub materialized: Vec<Vec<u8>>,
69    /// Paths that were taken out of the worktree because they are out of cone;
70    /// their index entry now has the skip-worktree bit set.
71    pub skipped: Vec<Vec<u8>>,
72    /// Out-of-cone paths whose worktree file was *not* up to date with the index
73    /// and was therefore left in place (and its skip-worktree bit left clear),
74    /// matching git's data-loss-avoiding behavior. The caller surfaces these as
75    /// git's "The following paths are not up to date …" warning. Sorted by path.
76    pub not_up_to_date: Vec<Vec<u8>>,
77}
78
79#[derive(Debug, Clone, PartialEq, Eq)]
80pub struct UpdateIndexResult {
81    pub entries: usize,
82    pub updated: Vec<ObjectId>,
83}
84
85#[derive(Debug, Clone, PartialEq, Eq)]
86pub enum AddUpdateTrackedAction {
87    Add(Vec<u8>),
88    Remove(Vec<u8>),
89}
90
91#[derive(Debug, Clone, PartialEq, Eq)]
92pub enum AddExactTrackedPathResult {
93    Handled(Option<AddUpdateTrackedAction>),
94    Unsupported,
95}
96
97#[derive(Debug, Clone, PartialEq, Eq)]
98pub struct CacheInfoEntry {
99    pub mode: u32,
100    pub oid: ObjectId,
101    pub path: Vec<u8>,
102    pub stage: u16,
103}
104
105#[derive(Debug, Clone, PartialEq, Eq)]
106pub enum IndexInfoRecord {
107    Add(CacheInfoEntry),
108    Remove { path: Vec<u8> },
109}
110
111/// Batch-wide options for the `git add`-style callers that apply one uniform
112/// mode to every path. The positional `add`/`remove`/`force_remove`/`info_only`/
113/// `chmod` fields describe that uniform mode; `ignore_skip_worktree_entries` is
114/// a genuine whole-invocation toggle (it is not positional in git either).
115///
116/// `git update-index <flag> <path>...` does NOT use this for its per-path mode —
117/// it builds [`UpdateIndexPath`] values directly, each carrying the sticky mode
118/// in effect when that path was parsed. See [`UpdateIndexPath`].
119#[derive(Debug, Clone, Copy, PartialEq, Eq)]
120pub struct UpdateIndexOptions {
121    pub add: bool,
122    pub remove: bool,
123    pub force_remove: bool,
124    pub chmod: Option<bool>,
125    pub info_only: bool,
126    pub ignore_skip_worktree_entries: bool,
127}
128
129impl UpdateIndexOptions {
130    /// The uniform per-path mode this batch applies to every path.
131    fn path_mode(&self) -> UpdateIndexPathMode {
132        UpdateIndexPathMode {
133            add: self.add,
134            remove: self.remove,
135            force_remove: self.force_remove,
136            info_only: self.info_only,
137            chmod: self.chmod,
138        }
139    }
140}
141
142/// A single positional path passed to `update-index`, together with the
143/// *mode* that was active at the point the path was seen on the command line.
144///
145/// git's `update-index` processes argv left-to-right with `parse_options_step`
146/// (`PARSE_OPT_STOP_AT_NON_OPTION`): the mode flags `--add`/`--remove`/
147/// `--force-remove`/`--info-only`/`--chmod` set sticky global state, and each
148/// non-option path is handed to `update_one()` under whatever state is in
149/// effect *at that point*. So `--add foo --force-remove bar` ADDs `foo` and
150/// FORCE-REMOVEs `bar` — the flags are positional, not global. We mirror that
151/// by snapshotting the mode onto each path as it is parsed, rather than
152/// applying one batch-wide `UpdateIndexOptions` to every path.
153///
154/// `--chmod=(+|-)x` is likewise sticky (`--chmod=+x A --chmod=-x B` flips A
155/// executable and B non-executable). Each path reports its action
156/// (`add '<p>'`, `remove '<p>'`, `chmod (+|-)x '<p>'`) inline under `--verbose`,
157/// interleaved in command-line order — which is why the mode must travel with
158/// the path.
159#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
160pub struct UpdateIndexPathMode {
161    pub add: bool,
162    pub remove: bool,
163    pub force_remove: bool,
164    pub info_only: bool,
165    /// `--chmod=+x` → `Some(true)`, `--chmod=-x` → `Some(false)`, else `None`.
166    pub chmod: Option<bool>,
167}
168
169#[derive(Debug, Clone)]
170pub struct UpdateIndexPath {
171    pub path: PathBuf,
172    pub mode: UpdateIndexPathMode,
173}
174
175#[derive(Debug, Clone, PartialEq, Eq, Default)]
176pub struct WriteTreeOptions {
177    pub missing_ok: bool,
178    pub prefix: Option<Vec<u8>>,
179}
180
181#[derive(Debug, Clone, PartialEq, Eq)]
182pub struct ShortStatusEntry {
183    pub index: u8,
184    pub worktree: u8,
185    pub path: Vec<u8>,
186    pub head_mode: Option<u32>,
187    pub index_mode: Option<u32>,
188    pub worktree_mode: Option<u32>,
189    pub head_oid: Option<ObjectId>,
190    pub index_oid: Option<ObjectId>,
191    /// For a tracked gitlink (submodule) path: how the submodule's working
192    /// state differs from the staged gitlink. `None` for ordinary paths.
193    pub submodule: Option<SubmoduleStatus>,
194}
195
196/// Submodule-specific change detail for a status entry, mirroring upstream's
197/// `wt_status_change_data` trio: `new_submodule_commits` plus the
198/// `DIRTY_SUBMODULE_MODIFIED`/`DIRTY_SUBMODULE_UNTRACKED` dirty bits.
199#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
200pub struct SubmoduleStatus {
201    /// The submodule's checked-out HEAD differs from the staged gitlink oid.
202    pub new_commits: bool,
203    /// The submodule has staged or unstaged changes to tracked files.
204    pub modified_content: bool,
205    /// The submodule has untracked files.
206    pub untracked_content: bool,
207}
208
209impl SubmoduleStatus {
210    pub fn any(&self) -> bool {
211        self.new_commits || self.modified_content || self.untracked_content
212    }
213}
214
215/// Bit set in a submodule dirt mask when the submodule has staged or unstaged
216/// changes to tracked files (upstream `DIRTY_SUBMODULE_MODIFIED`).
217pub const DIRTY_SUBMODULE_MODIFIED: u8 = 1;
218/// Bit set in a submodule dirt mask when the submodule has untracked files
219/// (upstream `DIRTY_SUBMODULE_UNTRACKED`).
220pub const DIRTY_SUBMODULE_UNTRACKED: u8 = 2;
221
222/// Inspect the working state of the submodule whose worktree is at `sub_root`
223/// and report its dirt mask: [`DIRTY_SUBMODULE_MODIFIED`] for staged/unstaged
224/// changes to tracked files, [`DIRTY_SUBMODULE_UNTRACKED`] for untracked
225/// files. Returns 0 for a clean submodule — and for a directory that is not a
226/// populated repository at all (upstream treats an unpopulated gitlink as
227/// always unchanged). The native equivalent of upstream's
228/// `is_submodule_modified()` (which runs `git status --porcelain=2` inside the
229/// submodule and classifies `?` lines as untracked, everything else as
230/// modified).
231pub fn submodule_dirt(sub_root: &Path) -> u8 {
232    let Some(git_dir) = sley_diff_merge::gitlink_git_dir(sub_root) else {
233        return 0;
234    };
235    let Ok(config) = sley_config::read_repo_config(&git_dir, None) else {
236        return 0;
237    };
238    let Ok(format) = config.repository_object_format() else {
239        return 0;
240    };
241    let Ok(entries) = short_status_with_options(
242        sub_root,
243        &git_dir,
244        format,
245        ShortStatusOptions {
246            include_ignored: false,
247            ignored_mode: StatusIgnoredMode::Traditional,
248            untracked_mode: StatusUntrackedMode::Normal,
249        },
250    ) else {
251        return 0;
252    };
253    let mut dirt = 0;
254    for entry in entries {
255        if entry.index == b'?' && entry.worktree == b'?' {
256            dirt |= DIRTY_SUBMODULE_UNTRACKED;
257        } else {
258            dirt |= DIRTY_SUBMODULE_MODIFIED;
259        }
260    }
261    dirt
262}
263
264#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
265pub enum StatusUntrackedMode {
266    #[default]
267    All,
268    Normal,
269    None,
270}
271
272#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
273pub enum StatusIgnoredMode {
274    #[default]
275    Traditional,
276    Matching,
277}
278
279#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
280pub struct ShortStatusOptions {
281    pub include_ignored: bool,
282    pub ignored_mode: StatusIgnoredMode,
283    pub untracked_mode: StatusUntrackedMode,
284}
285
286/// The worktree state of one tracked path relative to an expected index/tree
287/// entry.
288#[derive(Debug, Clone, Copy, PartialEq, Eq)]
289pub enum WorktreeEntryState {
290    /// The path exists in the worktree and matches the expected mode/object id.
291    Clean,
292    /// The path exists, but its type, mode, filtered content, symlink target, or
293    /// gitlink HEAD differs from the expected entry.
294    Modified,
295    /// The path, or one of its parents, is missing from the worktree.
296    Deleted,
297}
298
299#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
300pub struct AtomicMetadataWriteOptions {
301    pub fsync_file: bool,
302    pub fsync_dir: bool,
303}
304
305#[derive(Debug, Clone, PartialEq, Eq)]
306pub struct AtomicMetadataWriteResult {
307    pub path: PathBuf,
308    pub len: u64,
309    pub mtime: Option<(u64, u64)>,
310}
311
312/// Stage-0 index stat data that can prove a worktree path clean without
313/// re-reading and re-hashing it.
314///
315/// This is the public carrier for sley's racy-git shortcut. Callers that already
316/// parsed `.git/index` can build a probe from the matching [`IndexEntry`] and
317/// the index file's mtime, then pass it to [`worktree_entry_state`] or
318/// [`worktree_entry_state_by_git_path`]. The probe is trusted only when its path,
319/// mode, and object id match the expected entry and the cached stat is not
320/// racily clean; otherwise the helper falls back to the same content hashing
321/// path used by [`short_status_with_options`].
322#[derive(Debug, Clone, PartialEq, Eq)]
323pub struct IndexStatProbe {
324    entry: IndexEntry,
325    index_mtime: Option<(u64, u64)>,
326}
327
328/// Reusable stage-0 index stat probes for many worktree paths.
329///
330/// Prefer this over repeated [`IndexStatProbe::from_repository_index`] calls
331/// when an embedder needs to verify many paths. It parses `.git/index` once,
332/// records the index file mtime used for racy-git checks, and serves cheap
333/// per-path probes from memory.
334#[derive(Debug, Clone, PartialEq, Eq, Default)]
335pub struct IndexStatProbeCache {
336    entries: HashMap<Vec<u8>, IndexEntry>,
337    index_mtime: Option<(u64, u64)>,
338}
339
340impl IndexStatProbe {
341    /// Build a probe from a parsed stage-0 index entry and the index file's mtime
342    /// split as `(seconds, nanoseconds)`.
343    pub fn from_index_entry(entry: IndexEntry, index_mtime: Option<(u64, u64)>) -> Self {
344        Self { entry, index_mtime }
345    }
346
347    /// Build a probe from a parsed index entry and the path of the index file on
348    /// disk, using that file's mtime as the racy-clean reference timestamp.
349    pub fn from_index_entry_and_index_path(
350        entry: IndexEntry,
351        index_path: impl AsRef<Path>,
352    ) -> Self {
353        let index_mtime = fs::metadata(index_path.as_ref())
354            .ok()
355            .and_then(|metadata| file_mtime_parts(&metadata));
356        Self { entry, index_mtime }
357    }
358
359    /// Read this repository's index and return a probe for `git_path` when a
360    /// stage-0 entry exists.
361    ///
362    /// For repeated lookups prefer [`IndexStatProbeCache::from_repository_index`]
363    /// and [`IndexStatProbeCache::probe_for_git_path`]. This one-shot helper
364    /// keeps a small process-local cache for back-to-back calls against an
365    /// unchanged index, but the explicit cache makes ownership and invalidation
366    /// clearer for high-volume embedders.
367    pub fn from_repository_index(
368        git_dir: impl AsRef<Path>,
369        format: ObjectFormat,
370        git_path: &[u8],
371    ) -> Result<Option<Self>> {
372        let index_path = repository_index_path(git_dir);
373        cached_repository_index_stat_probe(&index_path, format, git_path)
374    }
375
376    /// The parsed index entry this probe was built from.
377    pub fn entry(&self) -> &IndexEntry {
378        &self.entry
379    }
380
381    /// The index file mtime used as the racy-clean reference timestamp.
382    pub fn index_mtime(&self) -> Option<(u64, u64)> {
383        self.index_mtime
384    }
385
386    fn stat_cache_for(
387        &self,
388        git_path: &[u8],
389        expected_oid: &ObjectId,
390        expected_mode: u32,
391    ) -> Option<IndexStatCache> {
392        if index_entry_stage(&self.entry) != 0
393            || self.entry.path.as_bytes() != git_path
394            || self.entry.oid != *expected_oid
395            || self.entry.mode != expected_mode
396        {
397            return None;
398        }
399        let mut entries = HashMap::new();
400        entries.insert(git_path.to_vec(), self.entry.clone());
401        Some(IndexStatCache {
402            entries,
403            index_mtime: self.index_mtime,
404        })
405    }
406}
407
408impl IndexStatProbeCache {
409    /// Build a reusable probe cache from an already parsed index and index-file
410    /// mtime.
411    pub fn from_index(index: &Index, index_mtime: Option<(u64, u64)>) -> Self {
412        Self {
413            entries: stage0_index_entries(index),
414            index_mtime,
415        }
416    }
417
418    /// Read this repository's index once and build reusable stat probes.
419    ///
420    /// A missing index returns an empty cache, matching the one-shot helper's
421    /// `Ok(None)` result for every path.
422    pub fn from_repository_index(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Result<Self> {
423        let index_path = repository_index_path(git_dir);
424        read_index_stat_probe_cache(&index_path, format)
425    }
426
427    /// Return a per-path probe for a stage-0 entry, if present.
428    pub fn probe_for_git_path(&self, git_path: &[u8]) -> Option<IndexStatProbe> {
429        self.entries
430            .get(git_path)
431            .cloned()
432            .map(|entry| IndexStatProbe {
433                entry,
434                index_mtime: self.index_mtime,
435            })
436    }
437
438    /// Whether this cache has a stage-0 entry for `git_path`.
439    pub fn contains_git_path(&self, git_path: &[u8]) -> bool {
440        self.entries.contains_key(git_path)
441    }
442
443    /// Number of stage-0 entries in the cache.
444    pub fn len(&self) -> usize {
445        self.entries.len()
446    }
447
448    /// Whether the cache has no stage-0 entries.
449    pub fn is_empty(&self) -> bool {
450        self.entries.is_empty()
451    }
452
453    /// The index file mtime used as the racy-clean reference timestamp.
454    pub fn index_mtime(&self) -> Option<(u64, u64)> {
455        self.index_mtime
456    }
457}
458
459#[derive(Clone)]
460struct CachedRepositoryIndexStatProbes {
461    index_path: PathBuf,
462    format: ObjectFormat,
463    len: u64,
464    mtime: Option<(u64, u64)>,
465    probes: IndexStatProbeCache,
466}
467
468static REPOSITORY_INDEX_STAT_PROBES: OnceLock<Mutex<Option<CachedRepositoryIndexStatProbes>>> =
469    OnceLock::new();
470
471fn cached_repository_index_stat_probe(
472    index_path: &Path,
473    format: ObjectFormat,
474    git_path: &[u8],
475) -> Result<Option<IndexStatProbe>> {
476    let metadata = match fs::metadata(index_path) {
477        Ok(metadata) => metadata,
478        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
479            if let Some(cache) = REPOSITORY_INDEX_STAT_PROBES.get()
480                && let Ok(mut guard) = cache.lock()
481            {
482                *guard = None;
483            }
484            return Ok(None);
485        }
486        Err(err) => return Err(err.into()),
487    };
488    let len = metadata.len();
489    let mtime = file_mtime_parts(&metadata);
490    let cache = REPOSITORY_INDEX_STAT_PROBES.get_or_init(|| Mutex::new(None));
491    if let Ok(guard) = cache.lock()
492        && let Some(cached) = guard.as_ref()
493        && cached.index_path == index_path
494        && cached.format == format
495        && cached.len == len
496        && cached.mtime == mtime
497    {
498        return Ok(cached.probes.probe_for_git_path(git_path));
499    }
500
501    let probes = read_index_stat_probe_cache_with_metadata(index_path, format, mtime)?;
502    let probe = probes.probe_for_git_path(git_path);
503    if let Ok(mut guard) = cache.lock() {
504        *guard = Some(CachedRepositoryIndexStatProbes {
505            index_path: index_path.to_path_buf(),
506            format,
507            len,
508            mtime,
509            probes: probes.clone(),
510        });
511    }
512    Ok(probe)
513}
514
515fn read_index_stat_probe_cache(
516    index_path: &Path,
517    format: ObjectFormat,
518) -> Result<IndexStatProbeCache> {
519    let metadata = match fs::metadata(index_path) {
520        Ok(metadata) => metadata,
521        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
522            return Ok(IndexStatProbeCache::default());
523        }
524        Err(err) => return Err(err.into()),
525    };
526    read_index_stat_probe_cache_with_metadata(index_path, format, file_mtime_parts(&metadata))
527}
528
529fn read_index_stat_probe_cache_with_metadata(
530    index_path: &Path,
531    format: ObjectFormat,
532    index_mtime: Option<(u64, u64)>,
533) -> Result<IndexStatProbeCache> {
534    let bytes = fs::read(index_path)?;
535    let index = Index::parse(&bytes, format)?;
536    Ok(IndexStatProbeCache::from_index(&index, index_mtime))
537}
538
539fn stage0_index_entries(index: &Index) -> HashMap<Vec<u8>, IndexEntry> {
540    let mut entries = HashMap::new();
541    for entry in &index.entries {
542        if index_entry_stage(entry) == 0 {
543            entries.insert(entry.path.as_bytes().to_vec(), entry.clone());
544        }
545    }
546    entries
547}
548
549#[derive(Debug, Clone, PartialEq, Eq)]
550pub struct CheckoutResult {
551    pub branch: String,
552    pub oid: ObjectId,
553    pub files: usize,
554}
555
556#[derive(Debug, Clone, PartialEq, Eq)]
557pub struct RestoreResult {
558    pub restored: usize,
559}
560
561#[derive(Debug, Clone, PartialEq, Eq)]
562pub struct RemoveResult {
563    pub removed: Vec<Vec<u8>>,
564}
565
566#[derive(Debug, Clone, PartialEq, Eq)]
567pub struct MoveResult {
568    pub source: Vec<u8>,
569    pub destination: Vec<u8>,
570    pub skipped: bool,
571    pub fatal: Option<String>,
572    pub details: Vec<MoveDetail>,
573}
574
575#[derive(Debug, Clone, PartialEq, Eq)]
576pub struct MoveDetail {
577    pub source: Vec<u8>,
578    pub destination: Vec<u8>,
579    pub skipped: bool,
580}
581
582pub fn repository_index_path(git_dir: impl AsRef<Path>) -> PathBuf {
583    env::var_os("GIT_INDEX_FILE")
584        .map(PathBuf::from)
585        .unwrap_or_else(|| git_dir.as_ref().join("index"))
586}
587
588pub fn read_repository_index(
589    git_dir: impl AsRef<Path>,
590    format: ObjectFormat,
591) -> Result<Option<Index>> {
592    let index_path = repository_index_path(git_dir);
593    if !index_path.exists() {
594        return Ok(None);
595    }
596    Ok(Some(Index::parse(&fs::read(index_path)?, format)?))
597}
598
599fn empty_index() -> Index {
600    Index {
601        version: 2,
602        entries: Vec::new(),
603        extensions: Vec::new(),
604        checksum: None,
605    }
606}
607
608/// Resolve the working-tree root for a repository identified by its git
609/// directory, returning `Ok(None)` for a bare repository.
610///
611/// This is the repository-intrinsic worktree resolution (it does *not* consult
612/// `GIT_WORK_TREE`/`GIT_DIR` or CLI overrides — those are the caller's job):
613///
614/// 0. if `core.bare` is true the repository is bare and `Ok(None)` is returned
615///    immediately — `core.bare` takes precedence, so a bare repo ignores
616///    `core.worktree` and the `.git`-parent fallback;
617/// 1. otherwise, a `core.worktree` setting in `<git_dir>/config` (absolute, or
618///    relative to the git directory), canonicalised;
619/// 2. otherwise, for a linked worktree (a git directory that has both a
620///    `commondir` and a `gitdir` administrative file), the directory containing
621///    the worktree's `.git` link, canonicalised;
622/// 3. otherwise, when the git directory is a `.git` directory, its parent (the
623///    ordinary non-bare layout) — returned verbatim, not canonicalised;
624/// 4. otherwise the repository is bare and `Ok(None)` is returned.
625///
626/// `Ok(None)` means specifically "bare" (case 0 or case 4). A [`GitError::Io`] is
627/// returned if a path that should exist cannot be canonicalised, and a
628/// [`GitError::InvalidPath`] if a `.git` directory has no parent (a malformed
629/// layout).
630pub fn worktree_root_for_git_dir(git_dir: &Path) -> Result<Option<PathBuf>> {
631    if let Ok(config) = sley_config::read_repo_config(git_dir, None) {
632        // A bare repository has no working tree, and `core.bare` takes precedence:
633        // a bare repo ignores `core.worktree`. Check it before any worktree
634        // resolution so a bare `.git`-named directory does not fall through to the
635        // "parent of .git" case below.
636        if config.get_bool("core", None, "bare") == Some(true) {
637            return Ok(None);
638        }
639        if let Some(worktree) = config.get("core", None, "worktree") {
640            let worktree = PathBuf::from(worktree);
641            let worktree = if worktree.is_absolute() {
642                worktree
643            } else {
644                git_dir.join(worktree)
645            };
646            return fs::canonicalize(worktree)
647                .map(Some)
648                .map_err(|err| GitError::Io(err.to_string()));
649        }
650    }
651    if git_dir.join("commondir").is_file() {
652        let gitdir_file = git_dir.join("gitdir");
653        if gitdir_file.is_file() {
654            let value = fs::read_to_string(&gitdir_file)?;
655            let worktree_git_file = resolve_worktree_admin_path(git_dir, value.trim());
656            if let Some(worktree) = worktree_git_file.parent() {
657                return fs::canonicalize(worktree)
658                    .map(Some)
659                    .map_err(|err| GitError::Io(err.to_string()));
660            }
661        }
662    }
663    if git_dir.file_name().and_then(|name| name.to_str()) != Some(".git") {
664        return Ok(None);
665    }
666    git_dir
667        .parent()
668        .map(Path::to_path_buf)
669        .map(Some)
670        .ok_or_else(|| GitError::InvalidPath("git dir has no parent worktree".into()))
671}
672
673/// Resolve a path read from a git-directory administrative file (e.g. the
674/// `gitdir` link of a linked worktree): absolute paths are kept as-is, relative
675/// paths are joined onto the administrative directory.
676fn resolve_worktree_admin_path(admin_dir: &Path, value: &str) -> PathBuf {
677    let path = PathBuf::from(value);
678    if path.is_absolute() {
679        path
680    } else {
681        admin_dir.join(path)
682    }
683}
684
685/// Whether the repository at `git_dir` is shallow — i.e. it has a `shallow`
686/// file recording grafted commit boundaries (`git clone --depth`).
687pub fn is_shallow_repository(git_dir: &Path) -> bool {
688    git_dir.join("shallow").exists()
689}
690
691#[derive(Debug, Clone, Copy, PartialEq, Eq)]
692pub struct RemoveOptions {
693    pub recursive: bool,
694    pub cached: bool,
695    pub force: bool,
696    pub dry_run: bool,
697    pub ignore_unmatch: bool,
698}
699
700#[derive(Debug, Clone, Copy, PartialEq, Eq)]
701pub struct MoveOptions {
702    pub force: bool,
703    pub dry_run: bool,
704    pub skip_errors: bool,
705}
706
707impl ShortStatusEntry {
708    pub fn line(&self) -> String {
709        format!(
710            "{}{} {}",
711            self.index as char,
712            self.worktree as char,
713            String::from_utf8_lossy(&self.path)
714        )
715    }
716}
717
718pub fn add_paths_to_index(
719    worktree_root: impl AsRef<Path>,
720    git_dir: impl AsRef<Path>,
721    format: ObjectFormat,
722    paths: &[PathBuf],
723) -> Result<UpdateIndexResult> {
724    update_index_paths(
725        worktree_root,
726        git_dir,
727        format,
728        paths,
729        UpdateIndexOptions {
730            add: true,
731            remove: false,
732            force_remove: false,
733            chmod: None,
734            info_only: false,
735            ignore_skip_worktree_entries: false,
736        },
737    )
738}
739
740pub fn update_index_paths(
741    worktree_root: impl AsRef<Path>,
742    git_dir: impl AsRef<Path>,
743    format: ObjectFormat,
744    paths: &[PathBuf],
745    options: UpdateIndexOptions,
746) -> Result<UpdateIndexResult> {
747    let git_dir = git_dir.as_ref();
748    let index = read_repository_index(git_dir, format)?.unwrap_or_else(empty_index);
749    update_index_paths_with_index(worktree_root, git_dir, format, index, paths, options)
750}
751
752pub fn update_index_paths_with_index(
753    worktree_root: impl AsRef<Path>,
754    git_dir: impl AsRef<Path>,
755    format: ObjectFormat,
756    index: Index,
757    paths: &[PathBuf],
758    options: UpdateIndexOptions,
759) -> Result<UpdateIndexResult> {
760    let ordered = ordered_paths_from_plain(paths, options);
761    update_index_paths_impl(
762        worktree_root.as_ref(),
763        git_dir.as_ref(),
764        format,
765        index,
766        &ordered,
767        options,
768        None,
769        false,
770    )
771}
772
773/// Stamp a single uniform mode (from a batch-wide [`UpdateIndexOptions`]) onto
774/// every path. Used by the `git add`-style callers that genuinely apply one
775/// mode to all paths; the positional `git update-index <flag> <path>...` path
776/// instead snapshots a distinct mode per path in the CLI parse walk.
777fn ordered_paths_from_plain(paths: &[PathBuf], options: UpdateIndexOptions) -> Vec<UpdateIndexPath> {
778    let mode = options.path_mode();
779    paths
780        .iter()
781        .map(|path| UpdateIndexPath {
782            path: path.clone(),
783            mode,
784        })
785        .collect()
786}
787
788/// Stage an ordered list of paths, each carrying its own `--chmod` state, and
789/// (under `verbose`) print the `add`/`remove`/`chmod` action lines inline in
790/// command-line order. This is the entry point `git update-index <path>...`
791/// uses so that `--chmod=+x A --chmod=-x B --verbose` produces the interleaved
792/// `add 'A'` / `chmod +x 'A'` / `add 'B'` / `chmod -x 'B'` output git emits.
793pub fn update_index_ordered_paths_filtered(
794    worktree_root: impl AsRef<Path>,
795    git_dir: impl AsRef<Path>,
796    format: ObjectFormat,
797    paths: &[UpdateIndexPath],
798    options: UpdateIndexOptions,
799    config: &GitConfig,
800    verbose: bool,
801) -> Result<UpdateIndexResult> {
802    let git_dir = git_dir.as_ref();
803    let index = read_repository_index(git_dir, format)?.unwrap_or_else(empty_index);
804    update_index_ordered_paths_filtered_with_index(
805        worktree_root,
806        git_dir,
807        format,
808        index,
809        paths,
810        options,
811        config,
812        verbose,
813    )
814}
815
816pub fn update_index_ordered_paths_filtered_with_index(
817    worktree_root: impl AsRef<Path>,
818    git_dir: impl AsRef<Path>,
819    format: ObjectFormat,
820    index: Index,
821    paths: &[UpdateIndexPath],
822    options: UpdateIndexOptions,
823    config: &GitConfig,
824    verbose: bool,
825) -> Result<UpdateIndexResult> {
826    update_index_paths_impl(
827        worktree_root.as_ref(),
828        git_dir.as_ref(),
829        format,
830        index,
831        paths,
832        options,
833        Some(config),
834        verbose,
835    )
836}
837
838/// Like [`add_paths_to_index`], but runs the configured content filters
839/// (`core.autocrlf`/`text`/`eol` EOL conversion and `filter.<name>.clean`
840/// drivers) on each file's contents before hashing it into the object store.
841///
842/// `config` is the repository config used to resolve the filters; pass the
843/// parsed `<git_dir>/config` (the orchestrator typically already has this).
844pub fn add_paths_to_index_filtered(
845    worktree_root: impl AsRef<Path>,
846    git_dir: impl AsRef<Path>,
847    format: ObjectFormat,
848    paths: &[PathBuf],
849    config: &GitConfig,
850) -> Result<UpdateIndexResult> {
851    update_index_paths_filtered(
852        worktree_root,
853        git_dir,
854        format,
855        paths,
856        UpdateIndexOptions {
857            add: true,
858            remove: false,
859            force_remove: false,
860            chmod: None,
861            info_only: false,
862            ignore_skip_worktree_entries: false,
863        },
864        config,
865    )
866}
867
868/// Like [`update_index_paths`], but applies the clean-side content filters (see
869/// [`apply_clean_filter`]) to file contents before they are hashed/written.
870pub fn update_index_paths_filtered(
871    worktree_root: impl AsRef<Path>,
872    git_dir: impl AsRef<Path>,
873    format: ObjectFormat,
874    paths: &[PathBuf],
875    options: UpdateIndexOptions,
876    config: &GitConfig,
877) -> Result<UpdateIndexResult> {
878    let git_dir = git_dir.as_ref();
879    let index = read_repository_index(git_dir, format)?.unwrap_or_else(empty_index);
880    update_index_paths_filtered_with_index(
881        worktree_root,
882        git_dir,
883        format,
884        index,
885        paths,
886        options,
887        config,
888    )
889}
890
891pub fn update_index_paths_filtered_with_index(
892    worktree_root: impl AsRef<Path>,
893    git_dir: impl AsRef<Path>,
894    format: ObjectFormat,
895    index: Index,
896    paths: &[PathBuf],
897    options: UpdateIndexOptions,
898    config: &GitConfig,
899) -> Result<UpdateIndexResult> {
900    let ordered = ordered_paths_from_plain(paths, options);
901    update_index_paths_impl(
902        worktree_root.as_ref(),
903        git_dir.as_ref(),
904        format,
905        index,
906        &ordered,
907        options,
908        Some(config),
909        false,
910    )
911}
912
913pub fn add_update_all_tracked_filtered(
914    worktree_root: impl AsRef<Path>,
915    git_dir: impl AsRef<Path>,
916    format: ObjectFormat,
917    clean_config: &GitConfig,
918) -> Result<Vec<AddUpdateTrackedAction>> {
919    let worktree_root = worktree_root.as_ref();
920    let git_dir = git_dir.as_ref();
921    let index_path = repository_index_path(git_dir);
922    if !index_path.exists() {
923        return Ok(Vec::new());
924    }
925    let mut index = Index::parse(&fs::read(&index_path)?, format)?;
926    let index_mtime = fs::metadata(&index_path)
927        .ok()
928        .and_then(|metadata| file_mtime_parts(&metadata));
929    let stat_cache = IndexStatCache::from_index_mtime_only(index_mtime);
930    let prechecks = tracked_only_non_clean_prechecks_parallel(worktree_root, &index, &stat_cache)?;
931    if prechecks.is_empty() {
932        return Ok(Vec::new());
933    }
934
935    let pending = prechecks
936        .into_iter()
937        .map(|precheck| match precheck {
938            TrackedOnlyPrecheck::Deleted(idx) => {
939                (precheck, index.entries[idx].path.as_bytes().to_vec())
940            }
941            TrackedOnlyPrecheck::Slow(idx) => {
942                (precheck, index.entries[idx].path.as_bytes().to_vec())
943            }
944        })
945        .collect::<Vec<_>>();
946    let odb = FileObjectDatabase::from_git_dir(git_dir, format);
947    let mut actions = Vec::new();
948    let mut index_dirty = false;
949    let mut clean_filter = None;
950    for (precheck, path) in pending {
951        match precheck {
952            TrackedOnlyPrecheck::Deleted(_) => {
953                if remove_index_entries_with_path(&mut index.entries, &path) {
954                    actions.push(AddUpdateTrackedAction::Remove(path));
955                    index_dirty = true;
956                }
957            }
958            TrackedOnlyPrecheck::Slow(_) => {
959                let (action, dirty) = add_update_tracked_path(
960                    worktree_root,
961                    git_dir,
962                    format,
963                    Some(clean_config),
964                    &odb,
965                    &stat_cache,
966                    &mut clean_filter,
967                    &mut index,
968                    &path,
969                )?;
970                index_dirty |= dirty;
971                if let Some(action) = action {
972                    actions.push(action);
973                }
974            }
975        }
976    }
977
978    if index_dirty {
979        normalize_index_version_for_extended_flags(&mut index);
980        index.extensions = index_extensions_without_cache_tree(&index.extensions);
981        fs::write(index_path, index.write(format)?)?;
982    }
983    Ok(actions)
984}
985
986pub fn add_exact_tracked_path_from_disk(
987    worktree_root: impl AsRef<Path>,
988    git_dir: impl AsRef<Path>,
989    format: ObjectFormat,
990    git_path: &[u8],
991    ignore_removal: bool,
992    config_parameters_env: Option<&str>,
993) -> Result<AddExactTrackedPathResult> {
994    let worktree_root = worktree_root.as_ref();
995    let git_dir = git_dir.as_ref();
996    let index_path = repository_index_path(git_dir);
997    let index_metadata = match fs::metadata(&index_path) {
998        Ok(metadata) => metadata,
999        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
1000            return Ok(AddExactTrackedPathResult::Unsupported);
1001        }
1002        Err(err) => return Err(err.into()),
1003    };
1004    let mut index_bytes = fs::read(&index_path)?;
1005    let Some(raw) = raw_exact_index_entry(&index_bytes, format, git_path)? else {
1006        return Ok(AddExactTrackedPathResult::Unsupported);
1007    };
1008    if !raw_exact_entry_can_patch(&raw, git_path) {
1009        return Ok(AddExactTrackedPathResult::Unsupported);
1010    }
1011    if !raw_index_extensions_are_filterable(&index_bytes, raw.entries_end, raw.checksum_offset) {
1012        return Ok(AddExactTrackedPathResult::Unsupported);
1013    }
1014
1015    let entry = raw.entry.clone();
1016    if entry.stage() != Stage::Normal || index_entry_skip_worktree(&entry) || entry.mode == 0o160000
1017    {
1018        return Ok(AddExactTrackedPathResult::Unsupported);
1019    }
1020    let absolute = worktree_root.join(repo_path_to_os_path(git_path)?);
1021    let metadata = match fs::symlink_metadata(&absolute) {
1022        Ok(metadata) => metadata,
1023        Err(err)
1024            if matches!(
1025                err.kind(),
1026                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
1027            ) =>
1028        {
1029            return Ok(if ignore_removal {
1030                AddExactTrackedPathResult::Handled(None)
1031            } else {
1032                AddExactTrackedPathResult::Unsupported
1033            });
1034        }
1035        Err(err) => return Err(err.into()),
1036    };
1037    let file_type = metadata.file_type();
1038    if metadata.is_dir() || !(file_type.is_file() || file_type.is_symlink()) {
1039        return Ok(AddExactTrackedPathResult::Unsupported);
1040    }
1041    let index_mtime = file_mtime_parts(&index_metadata);
1042    let stat_cache = IndexStatCache::from_index_mtime_only(index_mtime);
1043    if stat_cache.reuse_index_entry(&entry, &metadata).is_some() {
1044        return Ok(AddExactTrackedPathResult::Handled(None));
1045    }
1046
1047    let odb = FileObjectDatabase::from_git_dir(git_dir, format);
1048    let is_symlink = file_type.is_symlink();
1049    let body = if is_symlink {
1050        symlink_target_bytes(&absolute)?
1051    } else {
1052        let body = fs::read(&absolute)?;
1053        // Resolve the effective config WITH command-line `-c` / `--config-env`
1054        // overrides folded in (e.g. upstream t0027's `git -c core.autocrlf=true
1055        // add`); the plain repo-config reader would drop them and the fast path
1056        // would convert/warn against the wrong EOL policy.
1057        let config = sley_config::read_repo_config(git_dir, config_parameters_env)
1058            .unwrap_or_default();
1059        let mut clean_filter = None;
1060        let clean_filter =
1061            tracked_only_clean_filter_with_config(&mut clean_filter, worktree_root, &config);
1062        clean_filter.read_attributes_for_path(worktree_root, git_path)?;
1063        let checks =
1064            clean_filter
1065                .matcher
1066                .attributes_for_path(git_path, &clean_filter.requested, false);
1067        // git's index update folds in `global_conv_flags_eol`, so `git add`
1068        // emits the `core.safecrlf` round-trip warning (default: warn). The
1069        // current index blob (`entry.oid`) drives the auto-crlf
1070        // `has_crlf_in_index` decision. Mirror the slow `add_update_tracked_path`
1071        // path here so the exact-patch fast path does not silently drop the
1072        // warning (upstream t0020 'safecrlf: print warning only once').
1073        let conv_flags = ConvFlags::from_config(&clean_filter.config);
1074        let index_blob = match conv_flags {
1075            ConvFlags::Off => SafeCrlfIndexBlob::None,
1076            _ => SafeCrlfIndexBlob::Lookup {
1077                odb: &odb,
1078                oid: entry.oid,
1079            },
1080        };
1081        apply_clean_filter_with_attributes_cow_safecrlf(
1082            &clean_filter.config,
1083            &checks,
1084            git_path,
1085            &body,
1086            conv_flags,
1087            index_blob,
1088        )?
1089        .into_owned()
1090    };
1091    let object = EncodedObject::new(ObjectType::Blob, body);
1092    let oid = object.object_id(format)?;
1093    if oid != entry.oid {
1094        odb.write_object(object)?;
1095    }
1096
1097    let mut updated_entry = index_entry_from_metadata(entry.path.clone(), oid, &metadata);
1098    if is_symlink {
1099        updated_entry.mode = 0o120000;
1100    }
1101    if updated_entry == entry {
1102        return Ok(AddExactTrackedPathResult::Handled(None));
1103    }
1104    if !raw_updated_entry_can_patch(&entry, &updated_entry, git_path) {
1105        return Ok(AddExactTrackedPathResult::Unsupported);
1106    }
1107    patch_raw_index_entry(&mut index_bytes, format, &raw, &updated_entry)?;
1108    fs::write(index_path, index_bytes)?;
1109    let changed = updated_entry.oid != entry.oid || updated_entry.mode != entry.mode;
1110    Ok(AddExactTrackedPathResult::Handled(
1111        changed.then(|| AddUpdateTrackedAction::Add(git_path.to_vec())),
1112    ))
1113}
1114
1115pub fn add_exact_tracked_path_with_index(
1116    worktree_root: impl AsRef<Path>,
1117    git_dir: impl AsRef<Path>,
1118    format: ObjectFormat,
1119    mut index: Index,
1120    git_path: &[u8],
1121) -> Result<Option<AddUpdateTrackedAction>> {
1122    let worktree_root = worktree_root.as_ref();
1123    let git_dir = git_dir.as_ref();
1124    let range = index_entries_path_range(&index.entries, git_path);
1125    if range.len() != 1 {
1126        return Ok(None);
1127    }
1128    let entry = &index.entries[range.start];
1129    if entry.stage() != Stage::Normal || index_entry_skip_worktree(entry) {
1130        return Ok(None);
1131    }
1132    let index_path = repository_index_path(git_dir);
1133    let index_mtime = fs::metadata(&index_path)
1134        .ok()
1135        .and_then(|metadata| file_mtime_parts(&metadata));
1136    let stat_cache = IndexStatCache::from_index_mtime_only(index_mtime);
1137    let odb = FileObjectDatabase::from_git_dir(git_dir, format);
1138    let mut clean_filter = None;
1139    let (action, dirty) = add_update_tracked_path(
1140        worktree_root,
1141        git_dir,
1142        format,
1143        None,
1144        &odb,
1145        &stat_cache,
1146        &mut clean_filter,
1147        &mut index,
1148        git_path,
1149    )?;
1150    if dirty {
1151        normalize_index_version_for_extended_flags(&mut index);
1152        index.extensions = index_extensions_without_cache_tree(&index.extensions);
1153        fs::write(index_path, index.write(format)?)?;
1154    }
1155    Ok(action)
1156}
1157
1158struct RawExactIndexEntry {
1159    version: u32,
1160    entry: IndexEntry,
1161    entry_start: usize,
1162    entries_end: usize,
1163    checksum_offset: usize,
1164}
1165
1166fn raw_exact_index_entry(
1167    bytes: &[u8],
1168    format: ObjectFormat,
1169    git_path: &[u8],
1170) -> Result<Option<RawExactIndexEntry>> {
1171    let hash_len = format.raw_len();
1172    if bytes.len() < 12 + hash_len {
1173        return Err(GitError::InvalidFormat("index header too short".into()));
1174    }
1175    let checksum_offset = bytes.len() - hash_len;
1176    let actual_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
1177    let expected_checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
1178    if actual_checksum != expected_checksum {
1179        return Err(GitError::InvalidFormat(format!(
1180            "index checksum mismatch: expected {expected_checksum}, got {actual_checksum}"
1181        )));
1182    }
1183    if &bytes[..4] != b"DIRC" {
1184        return Err(GitError::InvalidFormat("missing DIRC signature".into()));
1185    }
1186    let version = u32_from_be(&bytes[4..8]);
1187    if !(2..=3).contains(&version) {
1188        return Ok(None);
1189    }
1190    let count = u32_from_be(&bytes[8..12]) as usize;
1191    let mut offset = 12;
1192    let mut found = None;
1193    for _ in 0..count {
1194        let entry_header_len = 40 + hash_len + 2;
1195        if checksum_offset.saturating_sub(offset) < entry_header_len {
1196            return Err(GitError::InvalidFormat("truncated index entry".into()));
1197        }
1198        let start = offset;
1199        let oid_start = offset + 40;
1200        let oid_end = oid_start + hash_len;
1201        let flags = u16_from_be(&bytes[oid_end..oid_end + 2]);
1202        offset = oid_end + 2;
1203        let flags_extended = if flags & INDEX_FLAG_EXTENDED != 0 {
1204            if checksum_offset.saturating_sub(offset) < 2 {
1205                return Err(GitError::InvalidFormat(
1206                    "truncated index extended flags".into(),
1207                ));
1208            }
1209            let flags_extended = u16_from_be(&bytes[offset..offset + 2]);
1210            offset += 2;
1211            flags_extended
1212        } else {
1213            0
1214        };
1215        let path_start = offset;
1216        while bytes.get(offset).copied() != Some(0) {
1217            offset += 1;
1218            if offset >= checksum_offset {
1219                return Err(GitError::InvalidFormat("unterminated index path".into()));
1220            }
1221        }
1222        let path = &bytes[path_start..offset];
1223        offset += 1;
1224        while (offset - start) % 8 != 0 {
1225            offset += 1;
1226            if offset > checksum_offset {
1227                return Err(GitError::InvalidFormat("truncated index padding".into()));
1228            }
1229        }
1230        if path == git_path {
1231            if found.is_some() {
1232                return Ok(None);
1233            }
1234            let oid = ObjectId::from_raw(format, &bytes[oid_start..oid_end])?;
1235            found = Some(RawExactIndexEntry {
1236                version,
1237                entry: IndexEntry {
1238                    ctime_seconds: u32_from_be(&bytes[start..start + 4]),
1239                    ctime_nanoseconds: u32_from_be(&bytes[start + 4..start + 8]),
1240                    mtime_seconds: u32_from_be(&bytes[start + 8..start + 12]),
1241                    mtime_nanoseconds: u32_from_be(&bytes[start + 12..start + 16]),
1242                    dev: u32_from_be(&bytes[start + 16..start + 20]),
1243                    ino: u32_from_be(&bytes[start + 20..start + 24]),
1244                    mode: u32_from_be(&bytes[start + 24..start + 28]),
1245                    uid: u32_from_be(&bytes[start + 28..start + 32]),
1246                    gid: u32_from_be(&bytes[start + 32..start + 36]),
1247                    size: u32_from_be(&bytes[start + 36..start + 40]),
1248                    oid,
1249                    flags,
1250                    flags_extended,
1251                    path: BString::from(path),
1252                },
1253                entry_start: start,
1254                entries_end: 0,
1255                checksum_offset,
1256            });
1257        } else if found.is_none() && path > git_path {
1258            return Ok(None);
1259        }
1260    }
1261    if let Some(mut found) = found {
1262        found.entries_end = offset;
1263        Ok(Some(found))
1264    } else {
1265        Ok(None)
1266    }
1267}
1268
1269fn raw_exact_entry_can_patch(raw: &RawExactIndexEntry, git_path: &[u8]) -> bool {
1270    raw.version == 2
1271        && raw.entry.flags_extended == 0
1272        && raw.entry.flags & INDEX_FLAG_EXTENDED == 0
1273        && raw.entry.flags == index_flags(git_path.len(), 0)
1274        && raw.entry.path.as_bytes() == git_path
1275}
1276
1277fn raw_updated_entry_can_patch(
1278    previous: &IndexEntry,
1279    updated: &IndexEntry,
1280    git_path: &[u8],
1281) -> bool {
1282    updated.path.as_bytes() == git_path
1283        && updated.flags_extended == 0
1284        && updated.flags & INDEX_FLAG_EXTENDED == 0
1285        && updated.flags == previous.flags
1286}
1287
1288fn raw_index_extensions_are_filterable(bytes: &[u8], entries_end: usize, checksum_offset: usize) -> bool {
1289    let mut offset = entries_end;
1290    while offset < checksum_offset {
1291        if checksum_offset.saturating_sub(offset) < 8 {
1292            return false;
1293        }
1294        let size = u32_from_be(&bytes[offset + 4..offset + 8]) as usize;
1295        let Some(end) = offset.checked_add(8).and_then(|offset| offset.checked_add(size)) else {
1296            return false;
1297        };
1298        if end > checksum_offset {
1299            return false;
1300        }
1301        offset = end;
1302    }
1303    true
1304}
1305
1306fn patch_raw_index_entry(
1307    bytes: &mut Vec<u8>,
1308    format: ObjectFormat,
1309    raw: &RawExactIndexEntry,
1310    entry: &IndexEntry,
1311) -> Result<()> {
1312    let hash_len = format.raw_len();
1313    let start = raw.entry_start;
1314    bytes[start..start + 4].copy_from_slice(&entry.ctime_seconds.to_be_bytes());
1315    bytes[start + 4..start + 8].copy_from_slice(&entry.ctime_nanoseconds.to_be_bytes());
1316    bytes[start + 8..start + 12].copy_from_slice(&entry.mtime_seconds.to_be_bytes());
1317    bytes[start + 12..start + 16].copy_from_slice(&entry.mtime_nanoseconds.to_be_bytes());
1318    bytes[start + 16..start + 20].copy_from_slice(&entry.dev.to_be_bytes());
1319    bytes[start + 20..start + 24].copy_from_slice(&entry.ino.to_be_bytes());
1320    bytes[start + 24..start + 28].copy_from_slice(&entry.mode.to_be_bytes());
1321    bytes[start + 28..start + 32].copy_from_slice(&entry.uid.to_be_bytes());
1322    bytes[start + 32..start + 36].copy_from_slice(&entry.gid.to_be_bytes());
1323    bytes[start + 36..start + 40].copy_from_slice(&entry.size.to_be_bytes());
1324    bytes[start + 40..start + 40 + hash_len].copy_from_slice(entry.oid.as_bytes());
1325    bytes[start + 40 + hash_len..start + 40 + hash_len + 2]
1326        .copy_from_slice(&entry.flags.to_be_bytes());
1327
1328    let mut extension_offset = raw.entries_end;
1329    let mut removed_cache_tree = false;
1330    let mut rewritten = Vec::new();
1331    while extension_offset < raw.checksum_offset {
1332        let signature = &bytes[extension_offset..extension_offset + 4];
1333        let size = u32_from_be(&bytes[extension_offset + 4..extension_offset + 8]) as usize;
1334        let end = extension_offset + 8 + size;
1335        if signature == b"TREE" {
1336            removed_cache_tree = true;
1337        } else {
1338            rewritten.extend_from_slice(&bytes[extension_offset..end]);
1339        }
1340        extension_offset = end;
1341    }
1342
1343    if removed_cache_tree {
1344        bytes.truncate(raw.entries_end);
1345        bytes.extend_from_slice(&rewritten);
1346        let checksum = sley_core::digest_bytes(format, bytes)?;
1347        bytes.extend_from_slice(checksum.as_bytes());
1348    } else {
1349        let checksum = sley_core::digest_bytes(format, &bytes[..raw.checksum_offset])?;
1350        bytes[raw.checksum_offset..raw.checksum_offset + hash_len]
1351            .copy_from_slice(checksum.as_bytes());
1352    }
1353    Ok(())
1354}
1355
1356fn u32_from_be(bytes: &[u8]) -> u32 {
1357    u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
1358}
1359
1360fn u16_from_be(bytes: &[u8]) -> u16 {
1361    u16::from_be_bytes([bytes[0], bytes[1]])
1362}
1363
1364fn add_update_tracked_path(
1365    worktree_root: &Path,
1366    git_dir: &Path,
1367    format: ObjectFormat,
1368    clean_config: Option<&GitConfig>,
1369    odb: &FileObjectDatabase,
1370    stat_cache: &IndexStatCache,
1371    clean_filter: &mut Option<TrackedOnlyCleanFilter>,
1372    index: &mut Index,
1373    git_path: &[u8],
1374) -> Result<(Option<AddUpdateTrackedAction>, bool)> {
1375    let range = index_entries_path_range(&index.entries, git_path);
1376    if range.is_empty() {
1377        return Ok((None, false));
1378    }
1379    let entry = index.entries[range.start].clone();
1380    if entry.stage() != Stage::Normal {
1381        return Ok((None, false));
1382    }
1383    let absolute = worktree_root.join(repo_path_to_os_path(git_path)?);
1384    let metadata = match fs::symlink_metadata(&absolute) {
1385        Ok(metadata) => metadata,
1386        Err(err)
1387            if matches!(
1388                err.kind(),
1389                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
1390            ) =>
1391        {
1392            if remove_index_entries_with_path(&mut index.entries, git_path) {
1393                return Ok((
1394                    Some(AddUpdateTrackedAction::Remove(git_path.to_vec())),
1395                    true,
1396                ));
1397            }
1398            return Ok((None, false));
1399        }
1400        Err(err) => return Err(err.into()),
1401    };
1402    if metadata.is_dir() {
1403        if entry.mode != 0o160000 {
1404            return Ok((None, false));
1405        }
1406        let oid = sley_diff_merge::gitlink_head_oid(&absolute, format).unwrap_or(entry.oid);
1407        let mut updated_entry = index_entry_from_metadata(entry.path.clone(), oid, &metadata);
1408        updated_entry.mode = 0o160000;
1409        let changed = updated_entry.oid != entry.oid || updated_entry.mode != entry.mode;
1410        if updated_entry != entry {
1411            replace_index_entries_with_entry(&mut index.entries, updated_entry);
1412            return Ok((
1413                changed.then(|| AddUpdateTrackedAction::Add(git_path.to_vec())),
1414                true,
1415            ));
1416        }
1417        return Ok((None, false));
1418    }
1419    if !(metadata.is_file() || metadata.file_type().is_symlink()) {
1420        return Ok((None, false));
1421    }
1422    if stat_cache.reuse_index_entry(&entry, &metadata).is_some() {
1423        return Ok((None, false));
1424    }
1425
1426    let is_symlink = metadata.file_type().is_symlink();
1427    let body = if is_symlink {
1428        symlink_target_bytes(&absolute)?
1429    } else {
1430        let body = fs::read(&absolute)?;
1431        let clean_filter = match clean_config {
1432            Some(config) => {
1433                tracked_only_clean_filter_with_config(clean_filter, worktree_root, config)
1434            }
1435            None => tracked_only_clean_filter(clean_filter, worktree_root, git_dir),
1436        };
1437        clean_filter.read_attributes_for_path(worktree_root, git_path)?;
1438        let checks =
1439            clean_filter
1440                .matcher
1441                .attributes_for_path(git_path, &clean_filter.requested, false);
1442        // git's `add -u` index update folds in `global_conv_flags_eol`, so emit
1443        // the `core.safecrlf` round-trip warning (default: warn). The current
1444        // index blob (`entry.oid`) drives the auto-crlf `has_crlf_in_index`
1445        // decision.
1446        let conv_flags = ConvFlags::from_config(&clean_filter.config);
1447        let index_blob = match conv_flags {
1448            ConvFlags::Off => SafeCrlfIndexBlob::None,
1449            _ => SafeCrlfIndexBlob::Lookup {
1450                odb,
1451                oid: entry.oid,
1452            },
1453        };
1454        apply_clean_filter_with_attributes_cow_safecrlf(
1455            &clean_filter.config,
1456            &checks,
1457            git_path,
1458            &body,
1459            conv_flags,
1460            index_blob,
1461        )?
1462        .into_owned()
1463    };
1464    let object = EncodedObject::new(ObjectType::Blob, body);
1465    let oid = object.object_id(format)?;
1466    if oid != entry.oid {
1467        odb.write_object(object)?;
1468    }
1469    let mut updated_entry = index_entry_from_metadata(entry.path.clone(), oid, &metadata);
1470    if is_symlink {
1471        updated_entry.mode = 0o120000;
1472    }
1473    let changed = updated_entry.oid != entry.oid || updated_entry.mode != entry.mode;
1474    if updated_entry != entry {
1475        replace_index_entries_with_entry(&mut index.entries, updated_entry);
1476        return Ok((
1477            changed.then(|| AddUpdateTrackedAction::Add(git_path.to_vec())),
1478            true,
1479        ));
1480    }
1481    Ok((None, false))
1482}
1483
1484enum UpdateIndexCleanFilter {
1485    Full(AttributeMatcher),
1486    PathLocal,
1487}
1488
1489fn index_entries_path_range(entries: &[IndexEntry], path: &[u8]) -> std::ops::Range<usize> {
1490    let mut start = match entries.binary_search_by(|entry| entry.path.as_bytes().cmp(path)) {
1491        Ok(index) => index,
1492        Err(insert) => return insert..insert,
1493    };
1494    while start > 0 && entries[start - 1].path.as_bytes() == path {
1495        start -= 1;
1496    }
1497    let mut end = start;
1498    while end < entries.len() && entries[end].path.as_bytes() == path {
1499        end += 1;
1500    }
1501    start..end
1502}
1503
1504fn remove_index_entries_with_path(entries: &mut Vec<IndexEntry>, path: &[u8]) -> bool {
1505    let range = index_entries_path_range(entries, path);
1506    if range.is_empty() {
1507        return false;
1508    }
1509    entries.drain(range);
1510    true
1511}
1512
1513/// Remove every index entry whose path lives *under* `name/` (a strict
1514/// directory-prefix collision). Mirrors git's `has_file_name`
1515/// (read-cache.c): when a *file* entry `a/b` is being added, any entry
1516/// `a/b/...` already in the index would produce a tree that records `a/b`
1517/// both as a blob and as a tree — `write-tree` would emit a malformed tree.
1518/// Entries are sorted by path, so the conflicting children form a contiguous
1519/// run immediately after `name`'s insertion point.
1520fn remove_index_entries_under_dir(entries: &mut Vec<IndexEntry>, name: &[u8]) {
1521    let start = match entries.binary_search_by(|entry| entry.path.as_bytes().cmp(name)) {
1522        Ok(found) => found + 1,
1523        Err(insert) => insert,
1524    };
1525    let mut end = start;
1526    while end < entries.len() {
1527        let candidate = entries[end].path.as_bytes();
1528        // `candidate` is under `name/` iff it is strictly longer, shares the
1529        // `name` prefix, and the next byte is the path separator.
1530        if candidate.len() > name.len()
1531            && candidate[name.len()] == b'/'
1532            && candidate[..name.len()] == *name
1533        {
1534            end += 1;
1535        } else {
1536            break;
1537        }
1538    }
1539    if end > start {
1540        entries.drain(start..end);
1541    }
1542}
1543
1544/// Remove any *file* entry that is a strict directory-prefix of `name` (e.g.
1545/// when adding `a/b/c`, drop a file entry `a/b` or `a`). Mirrors git's
1546/// `has_dir_name` (read-cache.c): such an entry would make the resulting tree
1547/// record the prefix both as a blob and as the directory containing `name`.
1548/// We walk every parent directory of `name`, longest first; the moment a
1549/// real subdirectory already exists at a prefix, no shorter prefix can
1550/// conflict, so we stop early (git's "already matches the sub-directory"
1551/// trivial optimization).
1552fn remove_index_dir_name_conflicts(entries: &mut Vec<IndexEntry>, name: &[u8]) {
1553    let mut slash = name.len();
1554    // Walk back over each '/' (longest parent dir first) until the path has no
1555    // more components.
1556    while let Some(pos) = name[..slash].iter().rposition(|&byte| byte == b'/') {
1557        slash = pos;
1558        let prefix = &name[..slash];
1559        match entries.binary_search_by(|entry| entry.path.as_bytes().cmp(prefix)) {
1560            Ok(found) => {
1561                // A file entry sits exactly at this directory prefix — drop it.
1562                entries.remove(found);
1563            }
1564            Err(insert) => {
1565                // No file at `prefix`. If a child `prefix/...` already exists,
1566                // the directory is established and nothing at this prefix (or
1567                // any shorter one) can conflict; stop.
1568                if insert < entries.len() {
1569                    let candidate = entries[insert].path.as_bytes();
1570                    if candidate.len() > prefix.len()
1571                        && candidate[prefix.len()] == b'/'
1572                        && candidate[..prefix.len()] == *prefix
1573                    {
1574                        break;
1575                    }
1576                }
1577            }
1578        }
1579    }
1580}
1581
1582fn replace_index_entries_with_entry(entries: &mut Vec<IndexEntry>, entry: IndexEntry) {
1583    let path = entry.path.as_bytes().to_vec();
1584    // Enforce directory/file replacement *before* computing the insert
1585    // position: git's `add_index_entry_with_check` removes the conflicting
1586    // entries, then recomputes where the new entry lands. Adding the entry
1587    // as a file drops any `path/...` children; adding it drops any file that
1588    // is a directory-prefix of `path`. Skipping this leaves a D/F-corrupt
1589    // index that `write-tree` turns into a malformed tree.
1590    remove_index_entries_under_dir(entries, &path);
1591    remove_index_dir_name_conflicts(entries, &path);
1592    let range = index_entries_path_range(entries, &path);
1593    if range.is_empty() {
1594        entries.insert(range.start, entry);
1595    } else {
1596        entries.splice(range, [entry]);
1597    }
1598}
1599
1600fn update_index_paths_impl(
1601    worktree_root: &Path,
1602    git_dir: &Path,
1603    format: ObjectFormat,
1604    mut index: Index,
1605    paths: &[UpdateIndexPath],
1606    options: UpdateIndexOptions,
1607    clean_config: Option<&GitConfig>,
1608    verbose: bool,
1609) -> Result<UpdateIndexResult> {
1610    let index_path = repository_index_path(git_dir);
1611    let odb = FileObjectDatabase::from_git_dir(git_dir, format);
1612    // For small batches, read only each path's `.gitattributes` chain; a
1613    // whole-worktree matcher can dominate `add -u` when only a few files are
1614    // dirty in a huge checkout. Large batches still amortize the full matcher.
1615    let clean_filter = match clean_config {
1616        Some(_) if paths.len() >= 64 => Some(UpdateIndexCleanFilter::Full(
1617            AttributeMatcher::from_worktree_root(worktree_root)?,
1618        )),
1619        Some(_) => Some(UpdateIndexCleanFilter::PathLocal),
1620        None => None,
1621    };
1622    // git's index-update path (object-file.c `get_conv_flags`) folds in
1623    // `global_conv_flags_eol`, so `git add`/`commit` emit the `core.safecrlf`
1624    // round-trip warning (default: warn). It only applies when content filters
1625    // run at all (i.e. when we have a config).
1626    let conv_flags = clean_config.map_or(ConvFlags::Off, ConvFlags::from_config);
1627    let requested_filter_attrs = filter_attribute_names();
1628    let mut updated = Vec::new();
1629    let mut reports: Vec<String> = Vec::new();
1630    for update_path in paths {
1631        let path = &update_path.path;
1632        // Each path carries the sticky mode that was in effect when it was
1633        // parsed on the command line (git processes argv left-to-right). Read
1634        // the action from the path's own mode, NOT a batch-wide flag, so
1635        // `--add foo --force-remove bar` adds foo and force-removes bar.
1636        let path_mode = update_path.mode;
1637        let path_chmod = path_mode.chmod;
1638        let absolute = if path.is_absolute() {
1639            path.clone()
1640        } else {
1641            worktree_root.join(path)
1642        };
1643        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
1644            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
1645        })?;
1646        let git_path = git_path_bytes(relative)?;
1647        if path_mode.force_remove {
1648            remove_index_entries_with_path(&mut index.entries, &git_path);
1649            // git's update_one() reports `remove` for a --force-remove path.
1650            reports.push(format!("remove '{}'", String::from_utf8_lossy(&git_path)));
1651            continue;
1652        }
1653        let existing_range = index_entries_path_range(&index.entries, &git_path);
1654        if index.entries[existing_range.clone()]
1655            .iter()
1656            .any(index_entry_skip_worktree)
1657        {
1658            if path_mode.remove && !options.ignore_skip_worktree_entries {
1659                index.entries.drain(existing_range);
1660            }
1661            continue;
1662        }
1663        // lstat (not stat): a symlink must be inspected as the link itself, never
1664        // followed to its target. `Path::exists`/`fs::metadata` both stat through
1665        // the link, which makes a symlink-to-directory look like a directory
1666        // (fs::read then fails with "Is a directory") and a symlink-to-file get
1667        // staged with the target's content + a regular-file mode. git stages a
1668        // symlink as mode 120000 whose blob is the link target string, regardless
1669        // of what (if anything) the target resolves to.
1670        let symlink_metadata = match fs::symlink_metadata(&absolute) {
1671            Ok(metadata) => Some(metadata),
1672            // ENOTDIR (a leading path component is now a file, e.g. staging the
1673            // stale `a/b/c` entry after `a/b` became a regular file in a D/F
1674            // flip) means the path no longer exists as a file — git's lstat
1675            // returns ENOTDIR here and treats it exactly like ENOENT. Fold both
1676            // into the "missing" arm so the `--remove` path drops the stale
1677            // entry instead of aborting the whole add with an I/O error.
1678            Err(err)
1679                if matches!(
1680                    err.kind(),
1681                    std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
1682                ) =>
1683            {
1684                None
1685            }
1686            Err(err) => return Err(err.into()),
1687        };
1688        let Some(metadata) = symlink_metadata else {
1689            if path_mode.remove {
1690                remove_index_entries_with_path(&mut index.entries, &git_path);
1691                // git's update_one() unconditionally reports `add '<path>'`
1692                // after process_path(), even when the missing file was removed
1693                // from the index via the `--remove` (not --force-remove) path.
1694                reports.push(format!("add '{}'", String::from_utf8_lossy(&git_path)));
1695                continue;
1696            }
1697            print_update_index_path_error(&git_path, "does not exist and --remove not passed");
1698            return Err(GitError::Exit(128));
1699        };
1700        if !path_mode.add && index_entries_path_range(&index.entries, &git_path).is_empty() {
1701            print_update_index_path_error(
1702                &git_path,
1703                "cannot add to the index - missing --add option?",
1704            );
1705            return Err(GitError::Exit(128));
1706        }
1707        if metadata.is_dir() {
1708            // A directory is stageable only as a gitlink: when it is an
1709            // embedded repository with a commit checked out, git records a
1710            // mode-160000 entry whose oid is that commit (no object is
1711            // written). Otherwise it errors — with upstream's exact messages
1712            // for the embedded-repo-without-commit and plain-directory cases
1713            // (object-file.c index_path / builtin/update-index.c
1714            // process_directory).
1715            let display = String::from_utf8_lossy(&git_path).into_owned();
1716            let has_dot_git = absolute.join(".git").exists();
1717            let Some(head_oid) = sley_diff_merge::gitlink_head_oid(&absolute, format) else {
1718                if has_dot_git {
1719                    eprintln!("error: '{display}' does not have a commit checked out");
1720                } else {
1721                    eprintln!("error: {display}: is a directory - add files inside instead");
1722                }
1723                eprintln!("fatal: Unable to process path {display}");
1724                return Err(GitError::Exit(128));
1725            };
1726            if path_chmod.is_some() {
1727                eprintln!(
1728                    "fatal: git update-index: cannot chmod {}x '{display}'",
1729                    if path_chmod == Some(true) { '+' } else { '-' },
1730                );
1731                return Err(GitError::Exit(128));
1732            }
1733            let mut entry = index_entry_from_metadata(git_path.clone(), head_oid, &metadata);
1734            entry.mode = 0o160000;
1735            reports.push(format!("add '{display}'"));
1736            replace_index_entries_with_entry(&mut index.entries, entry);
1737            updated.push(head_oid);
1738            continue;
1739        }
1740        let is_symlink = metadata.file_type().is_symlink();
1741        let body = if is_symlink {
1742            // The blob is the raw link target bytes; clean filters never apply to
1743            // a symlink (git treats it as binary content, not a text path).
1744            symlink_target_bytes(&absolute)?
1745        } else {
1746            let body = fs::read(&absolute)?;
1747            // The safecrlf auto-crlf decision needs the path's *current* index
1748            // blob (git's `has_crlf_in_index`); the stage-0 entry, if any, has it.
1749            let index_blob = match conv_flags {
1750                ConvFlags::Off => SafeCrlfIndexBlob::None,
1751                _ => stage0_oid_in_range(&index.entries, existing_range.clone())
1752                    .map_or(SafeCrlfIndexBlob::None, |oid| SafeCrlfIndexBlob::Lookup {
1753                        odb: &odb,
1754                        oid,
1755                    }),
1756            };
1757            match (clean_config, &clean_filter) {
1758                (Some(config), Some(UpdateIndexCleanFilter::Full(matcher))) => {
1759                    // Identical to `apply_clean_filter`, but reuses the batch's
1760                    // matcher instead of rebuilding it (and re-walking the tree)
1761                    // for this path.
1762                    let checks =
1763                        matcher.attributes_for_path(&git_path, &requested_filter_attrs, false);
1764                    apply_clean_filter_with_attributes_cow_safecrlf(
1765                        config, &checks, &git_path, &body, conv_flags, index_blob,
1766                    )?
1767                    .into_owned()
1768                }
1769                (Some(config), Some(UpdateIndexCleanFilter::PathLocal)) => {
1770                    let checks = filter_attribute_checks(worktree_root, &git_path)?;
1771                    apply_clean_filter_with_attributes_cow_safecrlf(
1772                        config, &checks, &git_path, &body, conv_flags, index_blob,
1773                    )?
1774                    .into_owned()
1775                }
1776                _ => body,
1777            }
1778        };
1779        let object = EncodedObject::new(ObjectType::Blob, body);
1780        let oid = if path_mode.info_only {
1781            object.object_id(format)?
1782        } else {
1783            odb.write_object(object)?
1784        };
1785        let mut entry = index_entry_from_metadata(git_path.clone(), oid, &metadata);
1786        if is_symlink {
1787            entry.mode = 0o120000;
1788        }
1789        // git's update_one() reports `add` for every staged path (whether the
1790        // entry is new or an update), then chmod_path() reports the chmod after.
1791        reports.push(format!("add '{}'", String::from_utf8_lossy(&git_path)));
1792        if let Some(executable) = path_chmod {
1793            // git's chmod_path() refuses to flip the executable bit on anything
1794            // that is not a regular file (a symlink/gitlink has no such bit). It
1795            // writes the blob first, then errors with this exact message and
1796            // leaves the index untouched.
1797            if is_symlink {
1798                eprintln!(
1799                    "fatal: git update-index: cannot chmod {}x '{}'",
1800                    if executable { '+' } else { '-' },
1801                    String::from_utf8_lossy(&git_path)
1802                );
1803                return Err(GitError::Exit(128));
1804            }
1805            entry.mode = if executable { 0o100755 } else { 0o100644 };
1806            reports.push(format!(
1807                "chmod {}x '{}'",
1808                if executable { '+' } else { '-' },
1809                String::from_utf8_lossy(&git_path)
1810            ));
1811        }
1812        replace_index_entries_with_entry(&mut index.entries, entry);
1813        updated.push(oid);
1814    }
1815    normalize_index_version_for_extended_flags(&mut index);
1816    index.extensions = index_extensions_without_cache_tree(&index.extensions);
1817    fs::write(index_path, index.write(format)?)?;
1818    if verbose {
1819        let mut stdout = std::io::stdout().lock();
1820        for line in &reports {
1821            writeln!(stdout, "{line}")?;
1822        }
1823        stdout.flush()?;
1824    }
1825    Ok(UpdateIndexResult {
1826        entries: index.entries.len(),
1827        updated,
1828    })
1829}
1830
1831pub fn refresh_index_paths(
1832    worktree_root: impl AsRef<Path>,
1833    git_dir: impl AsRef<Path>,
1834    format: ObjectFormat,
1835    paths: &[PathBuf],
1836    quiet: bool,
1837    ignore_missing: bool,
1838    really_refresh: bool,
1839) -> Result<UpdateIndexResult> {
1840    let worktree_root = worktree_root.as_ref();
1841    let git_dir = git_dir.as_ref();
1842    let index_path = repository_index_path(git_dir);
1843    if !index_path.exists() {
1844        return Ok(UpdateIndexResult {
1845            entries: 0,
1846            updated: Vec::new(),
1847        });
1848    }
1849    let mut index = Index::parse(&fs::read(&index_path)?, format)?;
1850    // git's `update-index --refresh` trusts the cached stat: a stage-0 entry
1851    // whose size+mtime still match the worktree file (and is not racily clean) is
1852    // known unchanged, so its content is NOT re-read or re-hashed
1853    // (read-cache.c `refresh_cache_ent` → `ie_match_stat`). Without this shortcut
1854    // sley re-hashed every tracked file on every refresh — the 3.2x slowdown in
1855    // sley#27. We build the cache from the same parsed index + the index file's
1856    // own mtime (the racy-clean reference) so no extra parse is needed.
1857    let index_mtime = fs::metadata(&index_path)
1858        .ok()
1859        .and_then(|metadata| file_mtime_parts(&metadata));
1860    let stat_cache = IndexStatCache::from_index_mtime_only(index_mtime);
1861    let selected_paths = paths
1862        .iter()
1863        .map(|path| {
1864            let absolute = if path.is_absolute() {
1865                path.clone()
1866            } else {
1867                worktree_root.join(path)
1868            };
1869            let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
1870                GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
1871            })?;
1872            git_path_bytes(relative)
1873        })
1874        .collect::<Result<Vec<_>>>()?;
1875    let selected_paths = selected_paths.into_iter().collect::<BTreeSet<_>>();
1876    if selected_paths.is_empty()
1877        && !really_refresh
1878        && !index
1879            .entries
1880            .iter()
1881            .any(|entry| entry.flags & INDEX_FLAG_ASSUME_UNCHANGED != 0)
1882    {
1883        return refresh_all_index_paths_parallel(
1884            worktree_root,
1885            &index_path,
1886            format,
1887            index,
1888            stat_cache,
1889            quiet,
1890            ignore_missing,
1891        );
1892    }
1893    let mut needs_update = false;
1894    let mut index_dirty = false;
1895    for entry in &mut index.entries {
1896        if index_entry_stage(entry) != 0 {
1897            continue;
1898        }
1899        let selected_for_update =
1900            !selected_paths.is_empty() && selected_paths.contains(entry.path.as_bytes());
1901        if entry.flags & INDEX_FLAG_ASSUME_UNCHANGED != 0 {
1902            if !really_refresh {
1903                continue;
1904            }
1905            entry.flags &= !INDEX_FLAG_ASSUME_UNCHANGED;
1906            index_dirty = true;
1907        }
1908        let absolute = worktree_root.join(repo_path_to_os_path(entry.path.as_bytes())?);
1909        let Ok(metadata) = fs::metadata(&absolute) else {
1910            if ignore_missing {
1911                continue;
1912            }
1913            if !quiet {
1914                print_update_index_needs_update(entry.path.as_bytes());
1915            }
1916            needs_update = true;
1917            continue;
1918        };
1919        if !metadata.is_file() {
1920            if !quiet {
1921                print_update_index_needs_update(entry.path.as_bytes());
1922            }
1923            needs_update = true;
1924            continue;
1925        }
1926        // Stat shortcut: when the cached stat proves the file is unchanged since
1927        // it was staged, its content hashes to the cached oid by construction
1928        // (see `IndexStatCache`'s safety invariant). Skip the read+hash and just
1929        // refresh the stat fields from current metadata — byte-identical to the
1930        // clean arm below, since the oid stamped is the cached one and the
1931        // metadata is the same one that re-stamp would read.
1932        if stat_cache
1933            .reuse_index_entry(entry, &metadata)
1934            .is_some()
1935        {
1936            continue;
1937        }
1938        let body = fs::read(&absolute)?;
1939        let object = EncodedObject::new(ObjectType::Blob, body);
1940        let oid = object.object_id(format)?;
1941        if oid != entry.oid || file_mode(&metadata) != entry.mode {
1942            if !quiet {
1943                print_update_index_needs_update(entry.path.as_bytes());
1944            }
1945            needs_update = true;
1946            if selected_for_update {
1947                let updated_entry = index_entry_from_metadata(entry.path.clone(), oid, &metadata);
1948                if updated_entry != *entry {
1949                    *entry = updated_entry;
1950                    index_dirty = true;
1951                }
1952            }
1953            continue;
1954        }
1955        let updated_entry = index_entry_from_metadata(entry.path.clone(), oid, &metadata);
1956        if updated_entry != *entry {
1957            *entry = updated_entry;
1958            index_dirty = true;
1959        }
1960    }
1961    if index_dirty {
1962        fs::write(&index_path, index.write(format)?)?;
1963    }
1964    if needs_update && !quiet {
1965        return Err(GitError::Exit(1));
1966    }
1967    Ok(UpdateIndexResult {
1968        entries: index.entries.len(),
1969        updated: Vec::new(),
1970    })
1971}
1972
1973fn refresh_all_index_paths_parallel(
1974    worktree_root: &Path,
1975    index_path: &Path,
1976    format: ObjectFormat,
1977    mut index: Index,
1978    stat_cache: IndexStatCache,
1979    quiet: bool,
1980    ignore_missing: bool,
1981) -> Result<UpdateIndexResult> {
1982    let prechecks = tracked_only_non_clean_prechecks_parallel(worktree_root, &index, &stat_cache)?;
1983    let mut needs_update = false;
1984    let mut index_dirty = false;
1985    for precheck in prechecks {
1986        match precheck {
1987            TrackedOnlyPrecheck::Deleted(idx) => {
1988                if ignore_missing {
1989                    continue;
1990                }
1991                if !quiet {
1992                    print_update_index_needs_update(index.entries[idx].path.as_bytes());
1993                }
1994                needs_update = true;
1995            }
1996            TrackedOnlyPrecheck::Slow(idx) => {
1997                let entry = &mut index.entries[idx];
1998                let path = entry.path.as_bytes().to_vec();
1999                let absolute = worktree_root.join(repo_path_to_os_path(&path)?);
2000                let Ok(metadata) = fs::metadata(&absolute) else {
2001                    if ignore_missing {
2002                        continue;
2003                    }
2004                    if !quiet {
2005                        print_update_index_needs_update(&path);
2006                    }
2007                    needs_update = true;
2008                    continue;
2009                };
2010                if !metadata.is_file() {
2011                    if !quiet {
2012                        print_update_index_needs_update(&path);
2013                    }
2014                    needs_update = true;
2015                    continue;
2016                }
2017                if stat_cache.reuse_index_entry(entry, &metadata).is_some() {
2018                    continue;
2019                }
2020                let body = fs::read(&absolute)?;
2021                let object = EncodedObject::new(ObjectType::Blob, body);
2022                let oid = object.object_id(format)?;
2023                if oid != entry.oid || file_mode(&metadata) != entry.mode {
2024                    if !quiet {
2025                        print_update_index_needs_update(&path);
2026                    }
2027                    needs_update = true;
2028                    continue;
2029                }
2030                let updated_entry = index_entry_from_metadata(entry.path.clone(), oid, &metadata);
2031                if updated_entry != *entry {
2032                    *entry = updated_entry;
2033                    index_dirty = true;
2034                }
2035            }
2036        }
2037    }
2038    if index_dirty {
2039        fs::write(index_path, index.write(format)?)?;
2040    }
2041    if needs_update && !quiet {
2042        return Err(GitError::Exit(1));
2043    }
2044    Ok(UpdateIndexResult {
2045        entries: index.entries.len(),
2046        updated: Vec::new(),
2047    })
2048}
2049
2050pub fn update_index_again(
2051    worktree_root: impl AsRef<Path>,
2052    git_dir: impl AsRef<Path>,
2053    format: ObjectFormat,
2054    paths: &[PathBuf],
2055    options: UpdateIndexOptions,
2056) -> Result<UpdateIndexResult> {
2057    let worktree_root = worktree_root.as_ref();
2058    let git_dir = git_dir.as_ref();
2059    let index_path = repository_index_path(git_dir);
2060    if !index_path.exists() {
2061        return Ok(UpdateIndexResult {
2062            entries: 0,
2063            updated: Vec::new(),
2064        });
2065    }
2066    let index = Index::parse(&fs::read(&index_path)?, format)?;
2067    let db = FileObjectDatabase::from_git_dir(git_dir, format);
2068    let head_entries = head_tree_entries(git_dir, format, &db)?;
2069    let selected_paths = selected_git_paths(worktree_root, paths)?;
2070    let mut again_paths = Vec::new();
2071    for entry in &index.entries {
2072        if index_entry_stage(entry) != 0 {
2073            continue;
2074        }
2075        if !selected_paths.is_empty() && !git_path_selected(entry.path.as_bytes(), &selected_paths)
2076        {
2077            continue;
2078        }
2079        let differs_from_head = match head_entries.get(entry.path.as_bytes()) {
2080            Some(head_entry) => head_entry.oid != entry.oid || head_entry.mode != entry.mode,
2081            None => true,
2082        };
2083        if differs_from_head {
2084            again_paths.push(worktree_root.join(repo_path_to_os_path(entry.path.as_bytes())?));
2085        }
2086    }
2087    if again_paths.is_empty() {
2088        return Ok(UpdateIndexResult {
2089            entries: index.entries.len(),
2090            updated: Vec::new(),
2091        });
2092    }
2093    update_index_paths(worktree_root, git_dir, format, &again_paths, options)
2094}
2095
2096pub fn set_index_assume_unchanged_paths(
2097    worktree_root: impl AsRef<Path>,
2098    git_dir: impl AsRef<Path>,
2099    format: ObjectFormat,
2100    paths: &[PathBuf],
2101    assume_unchanged: bool,
2102) -> Result<UpdateIndexResult> {
2103    let worktree_root = worktree_root.as_ref();
2104    let git_dir = git_dir.as_ref();
2105    let index_path = repository_index_path(git_dir);
2106    let mut index = if index_path.exists() {
2107        Index::parse(&fs::read(&index_path)?, format)?
2108    } else {
2109        Index {
2110            version: 2,
2111            entries: Vec::new(),
2112            extensions: Vec::new(),
2113            checksum: None,
2114        }
2115    };
2116    let selected_paths = paths
2117        .iter()
2118        .map(|path| {
2119            let absolute = if path.is_absolute() {
2120                path.clone()
2121            } else {
2122                worktree_root.join(path)
2123            };
2124            let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
2125                GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
2126            })?;
2127            git_path_bytes(relative)
2128        })
2129        .collect::<Result<Vec<_>>>()?;
2130    for path in selected_paths {
2131        if let Some(entry) = index.entries.iter_mut().find(|entry| entry.path == path) {
2132            if assume_unchanged {
2133                entry.flags |= INDEX_FLAG_ASSUME_UNCHANGED;
2134            } else {
2135                entry.flags &= !INDEX_FLAG_ASSUME_UNCHANGED;
2136            }
2137        }
2138    }
2139    normalize_index_version_for_extended_flags(&mut index);
2140    fs::write(index_path, index.write(format)?)?;
2141    Ok(UpdateIndexResult {
2142        entries: index.entries.len(),
2143        updated: Vec::new(),
2144    })
2145}
2146
2147fn selected_git_paths(worktree_root: &Path, paths: &[PathBuf]) -> Result<BTreeSet<Vec<u8>>> {
2148    paths
2149        .iter()
2150        .map(|path| {
2151            let absolute = if path.is_absolute() {
2152                path.clone()
2153            } else {
2154                worktree_root.join(path)
2155            };
2156            let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
2157                GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
2158            })?;
2159            git_path_bytes(relative)
2160        })
2161        .collect()
2162}
2163
2164fn git_path_selected(path: &[u8], selected_paths: &BTreeSet<Vec<u8>>) -> bool {
2165    selected_paths
2166        .iter()
2167        .any(|selected| path == selected || index_entry_is_under_path(path, selected))
2168}
2169
2170pub fn set_index_skip_worktree_paths(
2171    worktree_root: impl AsRef<Path>,
2172    git_dir: impl AsRef<Path>,
2173    format: ObjectFormat,
2174    paths: &[PathBuf],
2175    skip_worktree: bool,
2176) -> Result<UpdateIndexResult> {
2177    let worktree_root = worktree_root.as_ref();
2178    let git_dir = git_dir.as_ref();
2179    let index_path = repository_index_path(git_dir);
2180    let mut index = if index_path.exists() {
2181        Index::parse(&fs::read(&index_path)?, format)?
2182    } else {
2183        Index {
2184            version: 2,
2185            entries: Vec::new(),
2186            extensions: Vec::new(),
2187            checksum: None,
2188        }
2189    };
2190    let selected_paths = paths
2191        .iter()
2192        .map(|path| {
2193            let absolute = if path.is_absolute() {
2194                path.clone()
2195            } else {
2196                worktree_root.join(path)
2197            };
2198            let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
2199                GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
2200            })?;
2201            git_path_bytes(relative)
2202        })
2203        .collect::<Result<Vec<_>>>()?;
2204    for path in selected_paths {
2205        if let Some(entry) = index.entries.iter_mut().find(|entry| entry.path == path) {
2206            if skip_worktree {
2207                entry.flags |= INDEX_FLAG_EXTENDED;
2208                entry.flags_extended |= INDEX_EXTENDED_FLAG_SKIP_WORKTREE;
2209            } else {
2210                entry.flags_extended &= !INDEX_EXTENDED_FLAG_SKIP_WORKTREE;
2211                if entry.flags_extended == 0 {
2212                    entry.flags &= !INDEX_FLAG_EXTENDED;
2213                }
2214            }
2215        }
2216    }
2217    normalize_index_version_for_extended_flags(&mut index);
2218    fs::write(index_path, index.write(format)?)?;
2219    Ok(UpdateIndexResult {
2220        entries: index.entries.len(),
2221        updated: Vec::new(),
2222    })
2223}
2224
2225pub fn set_index_fsmonitor_valid_paths(
2226    worktree_root: impl AsRef<Path>,
2227    git_dir: impl AsRef<Path>,
2228    format: ObjectFormat,
2229    paths: &[PathBuf],
2230    _fsmonitor_valid: bool,
2231) -> Result<UpdateIndexResult> {
2232    let worktree_root = worktree_root.as_ref();
2233    let git_dir = git_dir.as_ref();
2234    let index_path = repository_index_path(git_dir);
2235    let index = if index_path.exists() {
2236        Index::parse(&fs::read(&index_path)?, format)?
2237    } else {
2238        Index {
2239            version: 2,
2240            entries: Vec::new(),
2241            extensions: Vec::new(),
2242            checksum: None,
2243        }
2244    };
2245    let selected_paths = paths
2246        .iter()
2247        .map(|path| {
2248            let absolute = if path.is_absolute() {
2249                path.clone()
2250            } else {
2251                worktree_root.join(path)
2252            };
2253            let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
2254                GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
2255            })?;
2256            git_path_bytes(relative)
2257        })
2258        .collect::<Result<Vec<_>>>()?;
2259    for path in selected_paths {
2260        if !index.entries.iter().any(|entry| entry.path == path) {
2261            eprintln!(
2262                "fatal: Unable to mark file {}",
2263                String::from_utf8_lossy(&path)
2264            );
2265            return Err(GitError::Exit(128));
2266        }
2267    }
2268    Ok(UpdateIndexResult {
2269        entries: index.entries.len(),
2270        updated: Vec::new(),
2271    })
2272}
2273
2274pub fn set_index_version(
2275    git_dir: impl AsRef<Path>,
2276    format: ObjectFormat,
2277    version: u32,
2278    verbose: bool,
2279) -> Result<UpdateIndexResult> {
2280    if !matches!(version, 2..=4) {
2281        return Err(GitError::Unsupported(format!(
2282            "update-index currently supports --index-version 2, 3, or 4, got {version}"
2283        )));
2284    }
2285    let git_dir = git_dir.as_ref();
2286    let index_path = repository_index_path(git_dir);
2287    let mut index = if index_path.exists() {
2288        Index::parse(&fs::read(&index_path)?, format)?
2289    } else {
2290        Index {
2291            version: 2,
2292            entries: Vec::new(),
2293            extensions: Vec::new(),
2294            checksum: None,
2295        }
2296    };
2297    // git reports the transition unconditionally under --verbose, even when the
2298    // requested version equals the current one ("was 4, set to 4").
2299    let previous = index.version;
2300    if verbose {
2301        println!("index-version: was {previous}, set to {version}");
2302    }
2303    index.version = version;
2304    normalize_index_version_for_extended_flags(&mut index);
2305    fs::write(index_path, index.write(format)?)?;
2306    Ok(UpdateIndexResult {
2307        entries: index.entries.len(),
2308        updated: Vec::new(),
2309    })
2310}
2311
2312pub fn force_write_index(
2313    git_dir: impl AsRef<Path>,
2314    format: ObjectFormat,
2315) -> Result<UpdateIndexResult> {
2316    let git_dir = git_dir.as_ref();
2317    let index_path = repository_index_path(git_dir);
2318    let mut index = if index_path.exists() {
2319        Index::parse(&fs::read(&index_path)?, format)?
2320    } else {
2321        Index {
2322            version: 2,
2323            entries: Vec::new(),
2324            extensions: Vec::new(),
2325            checksum: None,
2326        }
2327    };
2328    normalize_index_version_for_extended_flags(&mut index);
2329    fs::write(index_path, index.write(format)?)?;
2330    Ok(UpdateIndexResult {
2331        entries: index.entries.len(),
2332        updated: Vec::new(),
2333    })
2334}
2335
2336fn index_extensions_without_cache_tree(extensions: &[u8]) -> Vec<u8> {
2337    let mut offset = 0;
2338    let mut filtered = Vec::new();
2339    while offset < extensions.len() {
2340        if extensions.len().saturating_sub(offset) < 8 {
2341            return Vec::new();
2342        }
2343        let signature = &extensions[offset..offset + 4];
2344        let size = u32::from_be_bytes([
2345            extensions[offset + 4],
2346            extensions[offset + 5],
2347            extensions[offset + 6],
2348            extensions[offset + 7],
2349        ]) as usize;
2350        let end = offset + 8 + size;
2351        if end > extensions.len() {
2352            return Vec::new();
2353        }
2354        if signature != b"TREE" {
2355            filtered.extend_from_slice(&extensions[offset..end]);
2356        }
2357        offset = end;
2358    }
2359    filtered
2360}
2361
2362pub fn update_index_cacheinfo(
2363    git_dir: impl AsRef<Path>,
2364    format: ObjectFormat,
2365    entries: &[CacheInfoEntry],
2366    add: bool,
2367    verbose: bool,
2368) -> Result<UpdateIndexResult> {
2369    let git_dir = git_dir.as_ref();
2370    let index_path = repository_index_path(git_dir);
2371    let mut index = if index_path.exists() {
2372        Index::parse(&fs::read(&index_path)?, format)?
2373    } else {
2374        Index {
2375            version: 2,
2376            entries: Vec::new(),
2377            extensions: Vec::new(),
2378            checksum: None,
2379        }
2380    };
2381    let mut updated = Vec::new();
2382    let mut reports: Vec<String> = Vec::new();
2383    for cacheinfo in entries {
2384        if !add
2385            && !index
2386                .entries
2387                .iter()
2388                .any(|existing| existing.path == cacheinfo.path)
2389        {
2390            let path = String::from_utf8_lossy(&cacheinfo.path);
2391            eprintln!("error: {path}: cannot add to the index - missing --add option?");
2392            eprintln!("fatal: git update-index: --cacheinfo cannot add {path}");
2393            return Err(GitError::Exit(128));
2394        }
2395        let flags = index_flags(cacheinfo.path.len(), cacheinfo.stage);
2396        let entry = IndexEntry {
2397            ctime_seconds: 0,
2398            ctime_nanoseconds: 0,
2399            mtime_seconds: 0,
2400            mtime_nanoseconds: 0,
2401            dev: 0,
2402            ino: 0,
2403            mode: cacheinfo.mode,
2404            uid: 0,
2405            gid: 0,
2406            size: 0,
2407            oid: cacheinfo.oid,
2408            flags,
2409            flags_extended: 0,
2410            path: BString::from(cacheinfo.path.as_slice()),
2411        };
2412        index.entries.retain(|existing| {
2413            existing.path != cacheinfo.path || index_entry_stage(existing) != cacheinfo.stage
2414        });
2415        index.entries.push(entry);
2416        updated.push(cacheinfo.oid);
2417        // git's add_cacheinfo() calls report("add '%s'") *after* the entry is
2418        // staged, regardless of whether the subsequent index write succeeds.
2419        reports.push(format!(
2420            "add '{}'",
2421            String::from_utf8_lossy(&cacheinfo.path)
2422        ));
2423    }
2424    index
2425        .entries
2426        .sort_by(|left, right| left.path.cmp(&right.path));
2427    // git refuses to write an index entry whose object id is the null oid:
2428    // do_write_index() emits `error: cache entry has null sha1: <path>` and
2429    // returns nonzero, leaving the on-disk index untouched. The verbose `add`
2430    // line has already been printed by then.
2431    let null_entry = index.entries.iter().find(|entry| entry.oid.is_null());
2432    if let Some(entry) = null_entry {
2433        if verbose {
2434            flush_update_index_reports(&reports)?;
2435        }
2436        eprintln!(
2437            "error: cache entry has null sha1: {}",
2438            String::from_utf8_lossy(&entry.path)
2439        );
2440        return Err(GitError::Exit(128));
2441    }
2442    fs::write(index_path, index.write(format)?)?;
2443    if verbose {
2444        flush_update_index_reports(&reports)?;
2445    }
2446    Ok(UpdateIndexResult {
2447        entries: index.entries.len(),
2448        updated,
2449    })
2450}
2451
2452fn flush_update_index_reports(reports: &[String]) -> Result<()> {
2453    let mut stdout = std::io::stdout().lock();
2454    for line in reports {
2455        writeln!(stdout, "{line}")?;
2456    }
2457    stdout.flush()?;
2458    Ok(())
2459}
2460
2461pub fn update_index_index_info(
2462    git_dir: impl AsRef<Path>,
2463    format: ObjectFormat,
2464    records: &[IndexInfoRecord],
2465) -> Result<UpdateIndexResult> {
2466    let git_dir = git_dir.as_ref();
2467    let index_path = repository_index_path(git_dir);
2468    let mut index = if index_path.exists() {
2469        Index::parse(&fs::read(&index_path)?, format)?
2470    } else {
2471        Index {
2472            version: 2,
2473            entries: Vec::new(),
2474            extensions: Vec::new(),
2475            checksum: None,
2476        }
2477    };
2478    let mut updated = Vec::new();
2479    for record in records {
2480        match record {
2481            IndexInfoRecord::Remove { path } => {
2482                index.entries.retain(|existing| existing.path != *path);
2483            }
2484            IndexInfoRecord::Add(cacheinfo) => {
2485                let flags = index_flags(cacheinfo.path.len(), cacheinfo.stage);
2486                let entry = IndexEntry {
2487                    ctime_seconds: 0,
2488                    ctime_nanoseconds: 0,
2489                    mtime_seconds: 0,
2490                    mtime_nanoseconds: 0,
2491                    dev: 0,
2492                    ino: 0,
2493                    mode: cacheinfo.mode,
2494                    uid: 0,
2495                    gid: 0,
2496                    size: 0,
2497                    oid: cacheinfo.oid,
2498                    flags,
2499                    flags_extended: 0,
2500                    path: BString::from(cacheinfo.path.as_slice()),
2501                };
2502                if cacheinfo.stage == 0 {
2503                    index
2504                        .entries
2505                        .retain(|existing| existing.path != cacheinfo.path);
2506                } else {
2507                    index.entries.retain(|existing| {
2508                        existing.path != cacheinfo.path
2509                            || index_entry_stage(existing) != cacheinfo.stage
2510                    });
2511                }
2512                index.entries.push(entry);
2513                updated.push(cacheinfo.oid);
2514            }
2515        }
2516    }
2517    index.entries.sort_by(|left, right| {
2518        left.path
2519            .cmp(&right.path)
2520            .then_with(|| index_entry_stage(left).cmp(&index_entry_stage(right)))
2521    });
2522    fs::write(index_path, index.write(format)?)?;
2523    Ok(UpdateIndexResult {
2524        entries: index.entries.len(),
2525        updated,
2526    })
2527}
2528
2529fn index_flags(path_len: usize, stage: u16) -> u16 {
2530    ((stage & 0x3) << 12) | ((path_len.min(0xfff) as u16) & 0x0fff)
2531}
2532
2533const INDEX_FLAG_ASSUME_UNCHANGED: u16 = 0x8000;
2534const INDEX_FLAG_EXTENDED: u16 = 0x4000;
2535const INDEX_EXTENDED_FLAG_SKIP_WORKTREE: u16 = 0x4000;
2536
2537fn normalize_index_version_for_extended_flags(index: &mut Index) {
2538    let has_extended_flags = index
2539        .entries
2540        .iter()
2541        .any(|entry| entry.flags & INDEX_FLAG_EXTENDED != 0 || entry.flags_extended != 0);
2542    if has_extended_flags && index.version < 3 {
2543        index.version = 3;
2544    } else if !has_extended_flags && index.version == 3 {
2545        index.version = 2;
2546    }
2547}
2548
2549fn index_entry_stage(entry: &IndexEntry) -> u16 {
2550    (entry.flags >> 12) & 0x3
2551}
2552
2553/// The oid of the stage-0 entry in `range` (the path's currently-tracked blob),
2554/// if any. Used by the safecrlf check to fetch `has_crlf_in_index`.
2555fn stage0_oid_in_range(
2556    entries: &[IndexEntry],
2557    range: std::ops::Range<usize>,
2558) -> Option<ObjectId> {
2559    entries[range]
2560        .iter()
2561        .find(|entry| index_entry_stage(entry) == 0)
2562        .map(|entry| entry.oid)
2563}
2564
2565fn index_entry_skip_worktree(entry: &IndexEntry) -> bool {
2566    entry.flags & INDEX_FLAG_EXTENDED != 0
2567        && entry.flags_extended & INDEX_EXTENDED_FLAG_SKIP_WORKTREE != 0
2568}
2569
2570fn print_update_index_path_error(path: &[u8], message: &str) {
2571    let path = String::from_utf8_lossy(path);
2572    eprintln!("error: {path}: {message}");
2573    eprintln!("fatal: Unable to process path {path}");
2574}
2575
2576fn print_update_index_needs_update(path: &[u8]) {
2577    let path = String::from_utf8_lossy(path);
2578    println!("{path}: needs update");
2579}
2580
2581pub fn write_tree_from_index(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Result<ObjectId> {
2582    write_tree_from_index_with_options(git_dir, format, WriteTreeOptions::default())
2583}
2584
2585pub fn write_tree_from_index_with_odb(
2586    git_dir: impl AsRef<Path>,
2587    format: ObjectFormat,
2588    odb: &FileObjectDatabase,
2589) -> Result<ObjectId> {
2590    write_tree_from_index_with_options_and_odb(
2591        git_dir.as_ref(),
2592        format,
2593        WriteTreeOptions::default(),
2594        odb,
2595    )
2596}
2597
2598pub fn write_tree_from_index_with_options(
2599    git_dir: impl AsRef<Path>,
2600    format: ObjectFormat,
2601    options: WriteTreeOptions,
2602) -> Result<ObjectId> {
2603    let git_dir = git_dir.as_ref();
2604    let odb = FileObjectDatabase::from_git_dir(git_dir, format);
2605    write_tree_from_index_with_options_and_odb(git_dir, format, options, &odb)
2606}
2607
2608fn write_tree_from_index_with_options_and_odb(
2609    git_dir: &Path,
2610    format: ObjectFormat,
2611    options: WriteTreeOptions,
2612    odb: &FileObjectDatabase,
2613) -> Result<ObjectId> {
2614    let index_path = repository_index_path(git_dir);
2615    // A repository with no index file yet (fresh init, nothing staged) is an
2616    // empty index: `git write-tree` / `git commit --allow-empty` produce the
2617    // empty tree rather than erroring.
2618    let index_bytes = match fs::read(&index_path) {
2619        Ok(bytes) => bytes,
2620        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
2621            let mut checker = odb.presence_checker();
2622            let empty: &[WriteTreeEntry<'_>] = &[];
2623            return write_tree_entries_stream(
2624                empty,
2625                b"",
2626                None,
2627                odb,
2628                &mut checker,
2629                options.missing_ok,
2630            );
2631        }
2632        Err(err) => return Err(err.into()),
2633    };
2634    let mut checker = odb.presence_checker();
2635    match BorrowedIndex::parse(&index_bytes, format) {
2636        Ok(index) => write_tree_from_borrowed_index(&index, format, &options, odb, &mut checker),
2637        Err(GitError::Unsupported(_)) => {
2638            let index = Index::parse(&index_bytes, format)?;
2639            write_tree_from_owned_index(&index, format, &options, odb, &mut checker)
2640        }
2641        Err(err) => Err(err),
2642    }
2643}
2644
2645fn write_tree_from_borrowed_index(
2646    index: &BorrowedIndex<'_>,
2647    format: ObjectFormat,
2648    options: &WriteTreeOptions,
2649    odb: &FileObjectDatabase,
2650    checker: &mut ObjectPresenceChecker,
2651) -> Result<ObjectId> {
2652    let cache_tree = if options.prefix.is_none() {
2653        index.cache_tree(format).ok().flatten()
2654    } else {
2655        None
2656    };
2657    if options.prefix.is_none() && !index.entries.iter().any(|entry| entry.is_intent_to_add()) {
2658        return write_tree_entries_stream(
2659            &index.entries,
2660            b"",
2661            cache_tree.as_ref(),
2662            odb,
2663            checker,
2664            options.missing_ok,
2665        );
2666    }
2667    // intent-to-add entries (`git add -N`, `git reset -N`) are placeholders that do
2668    // NOT belong in a written tree — git's cache_tree_update skips CE_INTENT_TO_ADD.
2669    // Drop them before building, so `write-tree` succeeds and the tree omits them
2670    // (their empty-blob oid is also typically absent from the odb).
2671    let entries = write_tree_entries_for_prefix(
2672        index
2673            .entries
2674            .iter()
2675            .filter(|entry| !entry.is_intent_to_add()),
2676        options.prefix.as_deref(),
2677    )?;
2678    write_tree_entries_stream(
2679        &entries,
2680        b"",
2681        cache_tree.as_ref(),
2682        odb,
2683        checker,
2684        options.missing_ok,
2685    )
2686}
2687
2688fn write_tree_from_owned_index(
2689    index: &Index,
2690    format: ObjectFormat,
2691    options: &WriteTreeOptions,
2692    odb: &FileObjectDatabase,
2693    checker: &mut ObjectPresenceChecker,
2694) -> Result<ObjectId> {
2695    let cache_tree = if options.prefix.is_none() {
2696        index.cache_tree(format).ok().flatten()
2697    } else {
2698        None
2699    };
2700    if options.prefix.is_none() && !index.entries.iter().any(|entry| entry.is_intent_to_add()) {
2701        return write_tree_entries_stream(
2702            &index.entries,
2703            b"",
2704            cache_tree.as_ref(),
2705            odb,
2706            checker,
2707            options.missing_ok,
2708        );
2709    }
2710    let entries = write_tree_entries_for_prefix(
2711        index
2712            .entries
2713            .iter()
2714            .filter(|entry| !entry.is_intent_to_add()),
2715        options.prefix.as_deref(),
2716    )?;
2717    write_tree_entries_stream(
2718        &entries,
2719        b"",
2720        cache_tree.as_ref(),
2721        odb,
2722        checker,
2723        options.missing_ok,
2724    )
2725}
2726
2727#[derive(Clone, Copy)]
2728struct WriteTreeEntry<'a> {
2729    path: &'a [u8],
2730    mode: u32,
2731    oid: ObjectId,
2732}
2733
2734trait WriteTreeIndexEntry {
2735    fn write_tree_path(&self) -> &[u8];
2736    fn write_tree_mode(&self) -> u32;
2737    fn write_tree_oid(&self) -> ObjectId;
2738}
2739
2740impl WriteTreeIndexEntry for IndexEntry {
2741    fn write_tree_path(&self) -> &[u8] {
2742        self.path.as_bytes()
2743    }
2744
2745    fn write_tree_mode(&self) -> u32 {
2746        self.mode
2747    }
2748
2749    fn write_tree_oid(&self) -> ObjectId {
2750        self.oid
2751    }
2752}
2753
2754impl WriteTreeIndexEntry for IndexEntryRef<'_> {
2755    fn write_tree_path(&self) -> &[u8] {
2756        self.path
2757    }
2758
2759    fn write_tree_mode(&self) -> u32 {
2760        self.mode
2761    }
2762
2763    fn write_tree_oid(&self) -> ObjectId {
2764        self.oid
2765    }
2766}
2767
2768impl WriteTreeIndexEntry for WriteTreeEntry<'_> {
2769    fn write_tree_path(&self) -> &[u8] {
2770        self.path
2771    }
2772
2773    fn write_tree_mode(&self) -> u32 {
2774        self.mode
2775    }
2776
2777    fn write_tree_oid(&self) -> ObjectId {
2778        self.oid
2779    }
2780}
2781
2782fn write_tree_entries_for_prefix<'a, E>(
2783    entries: impl IntoIterator<Item = &'a E>,
2784    prefix: Option<&[u8]>,
2785) -> Result<Vec<WriteTreeEntry<'a>>>
2786where
2787    E: WriteTreeIndexEntry + 'a,
2788{
2789    let Some(prefix) = prefix else {
2790        return Ok(entries
2791            .into_iter()
2792            .map(|entry| WriteTreeEntry {
2793                path: entry.write_tree_path(),
2794                mode: entry.write_tree_mode(),
2795                oid: entry.write_tree_oid(),
2796            })
2797            .collect());
2798    };
2799    let trimmed_len = prefix
2800        .iter()
2801        .rposition(|byte| *byte != b'/')
2802        .map(|idx| idx + 1)
2803        .unwrap_or(0);
2804    let trimmed = &prefix[..trimmed_len];
2805    if trimmed.is_empty() {
2806        return Ok(entries
2807            .into_iter()
2808            .map(|entry| WriteTreeEntry {
2809                path: entry.write_tree_path(),
2810                mode: entry.write_tree_mode(),
2811                oid: entry.write_tree_oid(),
2812            })
2813            .collect());
2814    }
2815    let mut prefixed = Vec::new();
2816    for entry in entries {
2817        let Some(remainder) = entry.write_tree_path().strip_prefix(trimmed) else {
2818            continue;
2819        };
2820        let Some(stripped) = remainder.strip_prefix(b"/") else {
2821            continue;
2822        };
2823        if stripped.is_empty() {
2824            continue;
2825        }
2826        prefixed.push(WriteTreeEntry {
2827            path: stripped,
2828            mode: entry.write_tree_mode(),
2829            oid: entry.write_tree_oid(),
2830        });
2831    }
2832    if prefixed.is_empty() {
2833        eprintln!(
2834            "fatal: git-write-tree: prefix {} not found",
2835            String::from_utf8_lossy(prefix)
2836        );
2837        return Err(GitError::Exit(128));
2838    }
2839    Ok(prefixed)
2840}
2841
2842fn write_tree_entries_stream<E>(
2843    entries: &[E],
2844    prefix: &[u8],
2845    cache_tree: Option<&CacheTree>,
2846    odb: &FileObjectDatabase,
2847    checker: &mut ObjectPresenceChecker,
2848    missing_ok: bool,
2849) -> Result<ObjectId>
2850where
2851    E: WriteTreeIndexEntry,
2852{
2853    if let Some(oid) = valid_cache_tree_oid(cache_tree, entries.len()) {
2854        return Ok(oid);
2855    }
2856
2857    let mut tree_entries = Vec::new();
2858    let mut index = 0usize;
2859    while index < entries.len() {
2860        let entry = &entries[index];
2861        let path = entry.write_tree_path();
2862        let Some(remainder) = path.strip_prefix(prefix) else {
2863            return Err(GitError::InvalidPath(format!(
2864                "invalid index path {}",
2865                String::from_utf8_lossy(path)
2866            )));
2867        };
2868        if remainder.is_empty() || remainder[0] == b'/' {
2869            return Err(GitError::InvalidPath(format!(
2870                "invalid index path {}",
2871                String::from_utf8_lossy(path)
2872            )));
2873        }
2874
2875        if let Some(slash) = remainder.iter().position(|byte| *byte == b'/') {
2876            let name = &remainder[..slash];
2877            if name.is_empty() {
2878                return Err(GitError::InvalidPath(format!(
2879                    "invalid index path {}",
2880                    String::from_utf8_lossy(path)
2881                )));
2882            }
2883            let start = index;
2884            let child_cache = cache_tree.and_then(|tree| {
2885                tree.subtrees
2886                    .iter()
2887                    .find(|child| child.name.as_slice() == name)
2888                    .map(|child| &child.tree)
2889            });
2890            if let Some(cached_count) = valid_cache_tree_entry_count(child_cache) {
2891                let end = start.saturating_add(cached_count);
2892                if cached_count > 0
2893                    && end <= entries.len()
2894                    && same_tree_component(entries[end - 1].write_tree_path(), prefix, name)?
2895                    && (end == entries.len()
2896                        || !same_tree_component(entries[end].write_tree_path(), prefix, name)?)
2897                {
2898                    index = end;
2899                } else {
2900                    index += 1;
2901                    while index < entries.len()
2902                        && same_tree_component(entries[index].write_tree_path(), prefix, name)?
2903                    {
2904                        index += 1;
2905                    }
2906                }
2907            } else {
2908                index += 1;
2909                while index < entries.len()
2910                    && same_tree_component(entries[index].write_tree_path(), prefix, name)?
2911                {
2912                    index += 1;
2913                }
2914            }
2915            if let Some(oid) = valid_cache_tree_oid(child_cache, index - start) {
2916                tree_entries.push(TreeEntry {
2917                    mode: 0o040000,
2918                    name: BString::from(name),
2919                    oid,
2920                });
2921                continue;
2922            }
2923            let mut child_prefix = Vec::with_capacity(prefix.len() + name.len() + 1);
2924            child_prefix.extend_from_slice(prefix);
2925            child_prefix.extend_from_slice(name);
2926            child_prefix.push(b'/');
2927            let oid = write_tree_entries_stream(
2928                &entries[start..index],
2929                &child_prefix,
2930                child_cache,
2931                odb,
2932                checker,
2933                missing_ok,
2934            )?;
2935            tree_entries.push(TreeEntry {
2936                mode: 0o040000,
2937                name: BString::from(name),
2938                oid,
2939            });
2940            continue;
2941        }
2942
2943        let mode = entry.write_tree_mode();
2944        let oid = entry.write_tree_oid();
2945        if !missing_ok && mode != 0o160000 && !checker.contains(&oid)? {
2946            eprintln!(
2947                "error: invalid object {:o} {} for '{}'",
2948                mode,
2949                oid,
2950                String::from_utf8_lossy(path)
2951            );
2952            eprintln!("fatal: git-write-tree: error building trees");
2953            return Err(GitError::Exit(128));
2954        }
2955        tree_entries.push(TreeEntry {
2956            mode,
2957            name: BString::from(remainder),
2958            oid,
2959        });
2960        index += 1;
2961    }
2962
2963    tree_entries.sort_by(|left, right| {
2964        git_tree_entry_cmp(
2965            left.name.as_bytes(),
2966            left.mode,
2967            right.name.as_bytes(),
2968            right.mode,
2969        )
2970    });
2971    odb.write_object(EncodedObject::new(
2972        ObjectType::Tree,
2973        Tree {
2974            entries: tree_entries,
2975        }
2976        .write(),
2977    ))
2978}
2979
2980fn valid_cache_tree_oid(tree: Option<&CacheTree>, entry_count: usize) -> Option<ObjectId> {
2981    let tree = tree?;
2982    if valid_cache_tree_entry_count(Some(tree))? != entry_count {
2983        return None;
2984    }
2985    tree.oid
2986}
2987
2988fn valid_cache_tree_entry_count(tree: Option<&CacheTree>) -> Option<usize> {
2989    let tree = tree?;
2990    if tree.entry_count < 0 || tree.oid.is_none() {
2991        return None;
2992    }
2993    Some(tree.entry_count as usize)
2994}
2995
2996fn same_tree_component(path: &[u8], prefix: &[u8], name: &[u8]) -> Result<bool> {
2997    let Some(remainder) = path.strip_prefix(prefix) else {
2998        return Err(GitError::InvalidPath(format!(
2999            "invalid index path {}",
3000            String::from_utf8_lossy(path)
3001        )));
3002    };
3003    Ok(remainder.starts_with(name) && remainder.get(name.len()) == Some(&b'/'))
3004}
3005
3006pub fn short_status(
3007    worktree_root: impl AsRef<Path>,
3008    git_dir: impl AsRef<Path>,
3009    format: ObjectFormat,
3010) -> Result<Vec<ShortStatusEntry>> {
3011    short_status_with_options(
3012        worktree_root,
3013        git_dir,
3014        format,
3015        ShortStatusOptions::default(),
3016    )
3017}
3018
3019/// Compare one expected tracked entry to the worktree path named by `path`.
3020///
3021/// `path` is repository-relative and uses the platform path representation. For
3022/// callers that already carry git's byte path form, use
3023/// [`worktree_entry_state_by_git_path`].
3024pub fn worktree_entry_state(
3025    worktree_root: impl AsRef<Path>,
3026    git_dir: impl AsRef<Path>,
3027    format: ObjectFormat,
3028    path: impl AsRef<Path>,
3029    expected_oid: &ObjectId,
3030    expected_mode: u32,
3031    index_probe: Option<&IndexStatProbe>,
3032) -> Result<WorktreeEntryState> {
3033    let path = path.as_ref();
3034    if path.is_absolute() {
3035        return Err(GitError::InvalidPath(format!(
3036            "worktree entry path {} is absolute",
3037            path.display()
3038        )));
3039    }
3040    let git_path = git_path_bytes(path)?;
3041    worktree_entry_state_by_git_path(
3042        worktree_root,
3043        git_dir,
3044        format,
3045        &git_path,
3046        expected_oid,
3047        expected_mode,
3048        index_probe,
3049    )
3050}
3051
3052/// Compare one expected tracked entry to the worktree path named by a
3053/// repository-relative git path (`/` separators, raw bytes).
3054///
3055/// The comparison uses the same clean-filter, symlink-target, gitlink, and
3056/// racy-clean stat shortcut rules as [`short_status_with_options`].
3057pub fn worktree_entry_state_by_git_path(
3058    worktree_root: impl AsRef<Path>,
3059    git_dir: impl AsRef<Path>,
3060    format: ObjectFormat,
3061    git_path: &[u8],
3062    expected_oid: &ObjectId,
3063    expected_mode: u32,
3064    index_probe: Option<&IndexStatProbe>,
3065) -> Result<WorktreeEntryState> {
3066    let worktree_root = worktree_root.as_ref();
3067    let git_dir = git_dir.as_ref();
3068    let stat_cache =
3069        index_probe.and_then(|probe| probe.stat_cache_for(git_path, expected_oid, expected_mode));
3070    let Some(worktree_entry) = worktree_entry_for_git_path(
3071        worktree_root,
3072        git_dir,
3073        format,
3074        git_path,
3075        expected_oid,
3076        expected_mode,
3077        stat_cache.as_ref(),
3078    )?
3079    else {
3080        return Ok(WorktreeEntryState::Deleted);
3081    };
3082    if worktree_entry.mode == expected_mode && worktree_entry.oid == *expected_oid {
3083        Ok(WorktreeEntryState::Clean)
3084    } else {
3085        Ok(WorktreeEntryState::Modified)
3086    }
3087}
3088
3089pub fn short_status_with_options(
3090    worktree_root: impl AsRef<Path>,
3091    git_dir: impl AsRef<Path>,
3092    format: ObjectFormat,
3093    options: ShortStatusOptions,
3094) -> Result<Vec<ShortStatusEntry>> {
3095    let worktree_root = worktree_root.as_ref();
3096    let git_dir = git_dir.as_ref();
3097    let db = FileObjectDatabase::from_git_dir(git_dir, format);
3098    if !options.include_ignored
3099        && let Some(entries) = short_status_borrowed_head_matches_index_if_possible(
3100            worktree_root,
3101            git_dir,
3102            format,
3103            &db,
3104            options.untracked_mode,
3105        )?
3106    {
3107        return Ok(entries);
3108    }
3109    // Parse the index once: the stat cache lets the worktree walk skip
3110    // re-hashing files whose stat proves they are unchanged since staging
3111    // (git's racy-git shortcut). When HEAD matches the index, the status
3112    // comparison can stream directly from the parsed index and avoid building a
3113    // second path-sorted copy of every tracked entry.
3114    let (parsed_index, stat_cache, head_matches_index) =
3115        read_index_with_stat_cache(git_dir, format, &db)?;
3116    if head_matches_index && !options.include_ignored {
3117        let mut ignores = IgnoreMatcher::from_worktree_base(worktree_root)?;
3118        let entries = short_status_tracked_only(
3119            worktree_root,
3120            git_dir,
3121            format,
3122            &db,
3123            &parsed_index,
3124            &stat_cache,
3125            true,
3126            options.untracked_mode,
3127        );
3128        let mut entries = entries?;
3129        let untracked_paths = status_untracked_paths_from_index(
3130            worktree_root,
3131            git_dir,
3132            &parsed_index,
3133            &stat_cache,
3134            &mut ignores,
3135            options.untracked_mode,
3136        )?;
3137        for path in untracked_paths {
3138            entries.push(ShortStatusEntry {
3139                index: b'?',
3140                worktree: b'?',
3141                path,
3142                head_mode: None,
3143                index_mode: None,
3144                worktree_mode: None,
3145                head_oid: None,
3146                index_oid: None,
3147                submodule: None,
3148            });
3149        }
3150        entries.sort_by(|left, right| {
3151            status_sort_category(left)
3152                .cmp(&status_sort_category(right))
3153                .then_with(|| left.path.cmp(&right.path))
3154        });
3155        return Ok(entries);
3156    }
3157    let index = index_entries_from_index(parsed_index);
3158    let head = if head_matches_index {
3159        None
3160    } else {
3161        Some(head_tree_entries(git_dir, format, &db)?)
3162    };
3163    let tracked_paths = if options.untracked_mode == StatusUntrackedMode::None {
3164        Some(index.keys().cloned().collect::<BTreeSet<_>>())
3165    } else {
3166        None
3167    };
3168    let mut ignores = IgnoreMatcher::from_worktree_base(worktree_root)?;
3169    let (worktree, submodule_dirt_map, tracked_presence) =
3170        status_worktree_entries_with_submodule_dirt(
3171            worktree_root,
3172            git_dir,
3173            format,
3174            &stat_cache,
3175            tracked_paths.as_ref(),
3176            Some(&mut ignores),
3177        )?;
3178    let mut entries = Vec::new();
3179    if head_matches_index {
3180        collect_status_entries_head_matches_index(
3181            &index,
3182            &worktree,
3183            &tracked_presence,
3184            &submodule_dirt_map,
3185            options.untracked_mode,
3186            &mut entries,
3187        );
3188    } else if let Some(head) = head.as_ref() {
3189        collect_status_entries_with_head(
3190            StatusComparisonInputs {
3191                head,
3192                index: &index,
3193                worktree: &worktree,
3194                tracked_presence: &tracked_presence,
3195                submodule_dirt_map: &submodule_dirt_map,
3196                ignores: &ignores,
3197            },
3198            options.untracked_mode,
3199            &mut entries,
3200        );
3201    }
3202    if options.include_ignored {
3203        let ignored_paths = ignored_untracked_paths(worktree_root, git_dir, &index, &ignores, true)?;
3204        let ignored_paths: Vec<Vec<u8>> = match options.ignored_mode {
3205            StatusIgnoredMode::Matching => ignored_paths,
3206            StatusIgnoredMode::Traditional => {
3207                let mut rolled = BTreeSet::new();
3208                for path in ignored_paths {
3209                    let path = untracked_normal_rollup_path(&path, &index, &ignores);
3210                    if ignored_traditional_path_is_empty_directory(worktree_root, &path)? {
3211                        continue;
3212                    }
3213                    rolled.insert(path);
3214                }
3215                rolled.into_iter().collect()
3216            }
3217        };
3218        for path in ignored_paths {
3219            entries.push(ShortStatusEntry {
3220                index: b'!',
3221                worktree: b'!',
3222                path,
3223                head_mode: None,
3224                index_mode: None,
3225                worktree_mode: None,
3226                head_oid: None,
3227                index_oid: None,
3228                submodule: None,
3229            });
3230        }
3231    }
3232    let untracked_paths: Vec<Vec<u8>> = match options.untracked_mode {
3233        StatusUntrackedMode::All => worktree
3234            .keys()
3235            .filter(|path| !index.contains_key(*path) && !ignores.is_ignored(path, false))
3236            .cloned()
3237            .collect(),
3238        StatusUntrackedMode::Normal => {
3239            normal_untracked_paths_from_worktree(&worktree, &index, &ignores)
3240        }
3241        StatusUntrackedMode::None => Vec::new(),
3242    };
3243    for path in untracked_paths {
3244        entries.push(ShortStatusEntry {
3245            index: b'?',
3246            worktree: b'?',
3247            path,
3248            head_mode: None,
3249            index_mode: None,
3250            worktree_mode: None,
3251            head_oid: None,
3252            index_oid: None,
3253            submodule: None,
3254        });
3255    }
3256    entries.sort_by(|left, right| {
3257        status_sort_category(left)
3258            .cmp(&status_sort_category(right))
3259            .then_with(|| left.path.cmp(&right.path))
3260    });
3261    Ok(entries)
3262}
3263
3264fn collect_status_entries_head_matches_index(
3265    index: &BTreeMap<Vec<u8>, TrackedEntry>,
3266    worktree: &BTreeMap<Vec<u8>, TrackedEntry>,
3267    tracked_presence: &HashSet<Vec<u8>>,
3268    submodule_dirt_map: &BTreeMap<Vec<u8>, u8>,
3269    untracked_mode: StatusUntrackedMode,
3270    entries: &mut Vec<ShortStatusEntry>,
3271) {
3272    for (path, index_entry) in index {
3273        let worktree_entry = worktree.get(path);
3274        let worktree_present =
3275            worktree_entry.is_some() || tracked_presence.contains(path.as_slice());
3276        let submodule = status_submodule_from_entries(
3277            path,
3278            index_entry,
3279            worktree_entry,
3280            submodule_dirt_map,
3281            untracked_mode,
3282        );
3283        let worktree_code = match worktree_entry {
3284            None if !worktree_present => b'D',
3285            Some(worktree_entry) if worktree_entry != index_entry => b'M',
3286            _ if submodule.is_some_and(|sub| sub.any()) => b'M',
3287            _ => b' ',
3288        };
3289        if worktree_code != b' ' {
3290            entries.push(ShortStatusEntry {
3291                index: b' ',
3292                worktree: worktree_code,
3293                path: path.clone(),
3294                head_mode: Some(index_entry.mode),
3295                index_mode: Some(index_entry.mode),
3296                worktree_mode: status_worktree_mode(
3297                    Some(index_entry),
3298                    worktree_entry,
3299                    worktree_present,
3300                ),
3301                head_oid: Some(index_entry.oid),
3302                index_oid: Some(index_entry.oid),
3303                submodule: submodule.filter(|sub| sub.any()),
3304            });
3305        }
3306    }
3307}
3308
3309struct StatusComparisonInputs<'a> {
3310    head: &'a BTreeMap<Vec<u8>, TrackedEntry>,
3311    index: &'a BTreeMap<Vec<u8>, TrackedEntry>,
3312    worktree: &'a BTreeMap<Vec<u8>, TrackedEntry>,
3313    tracked_presence: &'a HashSet<Vec<u8>>,
3314    submodule_dirt_map: &'a BTreeMap<Vec<u8>, u8>,
3315    ignores: &'a IgnoreMatcher,
3316}
3317
3318fn collect_status_entries_with_head(
3319    inputs: StatusComparisonInputs<'_>,
3320    untracked_mode: StatusUntrackedMode,
3321    entries: &mut Vec<ShortStatusEntry>,
3322) {
3323    let mut paths = BTreeSet::new();
3324    paths.extend(inputs.head.keys().cloned());
3325    paths.extend(inputs.index.keys().cloned());
3326    paths.extend(
3327        inputs
3328            .worktree
3329            .keys()
3330            .filter(|path| inputs.index.contains_key(*path))
3331            .cloned(),
3332    );
3333
3334    for path in paths {
3335        let head_entry = inputs.head.get(&path);
3336        let index_entry = inputs.index.get(&path);
3337        let worktree_entry = inputs.worktree.get(&path);
3338        let worktree_present =
3339            worktree_entry.is_some() || inputs.tracked_presence.contains(path.as_slice());
3340        if head_entry.is_none()
3341            && index_entry.is_none()
3342            && worktree_entry.is_some()
3343            && inputs.ignores.is_ignored(&path, false)
3344        {
3345            continue;
3346        }
3347        let submodule = match index_entry {
3348            Some(index_entry) => status_submodule_from_entries(
3349                &path,
3350                index_entry,
3351                worktree_entry,
3352                inputs.submodule_dirt_map,
3353                untracked_mode,
3354            ),
3355            None => None,
3356        };
3357        let (index_code, worktree_code) =
3358            if head_entry.is_none() && index_entry.is_none() && worktree_entry.is_some() {
3359                (b'?', b'?')
3360            } else {
3361                let index_code = match (head_entry, index_entry) {
3362                    (None, Some(_)) => b'A',
3363                    (Some(_), None) => b'D',
3364                    (Some(left), Some(right)) if left != right => b'M',
3365                    _ => b' ',
3366                };
3367                let worktree_code = match (index_entry, worktree_entry) {
3368                    (None, Some(_)) => b'?',
3369                    (Some(_), None) if !worktree_present => b'D',
3370                    (Some(left), Some(right)) if left != right => b'M',
3371                    _ if submodule.is_some_and(|sub| sub.any()) => b'M',
3372                    _ => b' ',
3373                };
3374                (index_code, worktree_code)
3375            };
3376        if index_code != b' ' || worktree_code != b' ' {
3377            entries.push(ShortStatusEntry {
3378                index: index_code,
3379                worktree: worktree_code,
3380                path,
3381                head_mode: head_entry.map(|entry| entry.mode),
3382                index_mode: index_entry.map(|entry| entry.mode),
3383                worktree_mode: status_worktree_mode(index_entry, worktree_entry, worktree_present),
3384                head_oid: head_entry.map(|entry| entry.oid),
3385                index_oid: index_entry.map(|entry| entry.oid),
3386                submodule: submodule.filter(|sub| sub.any()),
3387            });
3388        }
3389    }
3390}
3391
3392fn status_worktree_mode(
3393    index_entry: Option<&TrackedEntry>,
3394    worktree_entry: Option<&TrackedEntry>,
3395    worktree_present: bool,
3396) -> Option<u32> {
3397    worktree_entry.map(|entry| entry.mode).or_else(|| {
3398        worktree_present
3399            .then(|| index_entry.map(|entry| entry.mode))
3400            .flatten()
3401    })
3402}
3403
3404fn status_submodule_from_entries(
3405    path: &[u8],
3406    index_entry: &TrackedEntry,
3407    worktree_entry: Option<&TrackedEntry>,
3408    submodule_dirt_map: &BTreeMap<Vec<u8>, u8>,
3409    untracked_mode: StatusUntrackedMode,
3410) -> Option<SubmoduleStatus> {
3411    let worktree_entry = worktree_entry?;
3412    if index_entry.mode != 0o160000 || worktree_entry.mode != 0o160000 {
3413        return None;
3414    }
3415    let dirt = submodule_dirt_map.get(path).copied().unwrap_or(0);
3416    Some(SubmoduleStatus {
3417        new_commits: index_entry.oid != worktree_entry.oid,
3418        modified_content: dirt & DIRTY_SUBMODULE_MODIFIED != 0,
3419        untracked_content: dirt & DIRTY_SUBMODULE_UNTRACKED != 0
3420            && !matches!(untracked_mode, StatusUntrackedMode::None),
3421    })
3422}
3423
3424fn short_status_tracked_only(
3425    worktree_root: &Path,
3426    git_dir: &Path,
3427    format: ObjectFormat,
3428    db: &FileObjectDatabase,
3429    index: &Index,
3430    stat_cache: &IndexStatCache,
3431    head_matches_index: bool,
3432    untracked_mode: StatusUntrackedMode,
3433) -> Result<Vec<ShortStatusEntry>> {
3434    let normal_entry_count = index
3435        .entries
3436        .iter()
3437        .filter(|entry| entry.stage() == Stage::Normal)
3438        .count();
3439    if head_matches_index && normal_entry_count >= 512 {
3440        return short_status_tracked_only_head_matches_index_parallel(
3441            worktree_root,
3442            git_dir,
3443            format,
3444            index,
3445            stat_cache,
3446            untracked_mode,
3447        );
3448    }
3449    let head = if head_matches_index {
3450        None
3451    } else {
3452        Some(head_tree_entries(git_dir, format, db)?)
3453    };
3454    if !head_matches_index && normal_entry_count >= 512 {
3455        if let Some(head) = head.as_ref() {
3456            return short_status_tracked_only_with_head_parallel(
3457                worktree_root,
3458                git_dir,
3459                format,
3460                index,
3461                stat_cache,
3462                head,
3463                untracked_mode,
3464            );
3465        }
3466    }
3467    let mut clean_filter = None;
3468    let mut entries = Vec::new();
3469    for entry in index
3470        .entries
3471        .iter()
3472        .filter(|entry| entry.stage() == Stage::Normal)
3473    {
3474        let path = entry.path.as_bytes();
3475        let index_entry = TrackedEntry {
3476            mode: entry.mode,
3477            oid: entry.oid,
3478        };
3479        let head_entry = if head_matches_index {
3480            Some(&index_entry)
3481        } else {
3482            head.as_ref().and_then(|head| head.get(path))
3483        };
3484        let worktree_entry = worktree_entry_for_index_entry_with_attributes(
3485            worktree_root,
3486            git_dir,
3487            format,
3488            entry,
3489            stat_cache,
3490            &mut clean_filter,
3491        )?;
3492        let submodule = tracked_only_submodule_status(
3493            worktree_root,
3494            path,
3495            &index_entry,
3496            worktree_entry.as_ref(),
3497            untracked_mode,
3498        )?;
3499        let index_code = match head_entry {
3500            None => b'A',
3501            Some(head_entry) if *head_entry != index_entry => b'M',
3502            _ => b' ',
3503        };
3504        let worktree_code = match worktree_entry.as_ref() {
3505            None => b'D',
3506            Some(worktree_entry) if *worktree_entry != index_entry => b'M',
3507            _ if submodule.is_some_and(|sub| sub.any()) => b'M',
3508            _ => b' ',
3509        };
3510        if index_code != b' ' || worktree_code != b' ' {
3511            entries.push(ShortStatusEntry {
3512                index: index_code,
3513                worktree: worktree_code,
3514                path: path.to_vec(),
3515                head_mode: head_entry.map(|entry| entry.mode),
3516                index_mode: Some(index_entry.mode),
3517                worktree_mode: worktree_entry.as_ref().map(|entry| entry.mode),
3518                head_oid: head_entry.map(|entry| entry.oid),
3519                index_oid: Some(index_entry.oid),
3520                submodule: submodule.filter(|sub| sub.any()),
3521            });
3522        }
3523    }
3524    if let Some(head) = head.as_ref() {
3525        let index_paths = index
3526            .entries
3527            .iter()
3528            .filter(|entry| entry.stage() == Stage::Normal)
3529            .map(|entry| entry.path.as_bytes().to_vec())
3530            .collect::<HashSet<_>>();
3531        for (path, head_entry) in head {
3532            if index_paths.contains(path.as_slice()) {
3533                continue;
3534            }
3535            entries.push(ShortStatusEntry {
3536                index: b'D',
3537                worktree: b' ',
3538                path: path.clone(),
3539                head_mode: Some(head_entry.mode),
3540                index_mode: None,
3541                worktree_mode: None,
3542                head_oid: Some(head_entry.oid),
3543                index_oid: None,
3544                submodule: None,
3545            });
3546        }
3547    }
3548    entries.sort_by(|left, right| {
3549        status_sort_category(left)
3550            .cmp(&status_sort_category(right))
3551            .then_with(|| left.path.cmp(&right.path))
3552    });
3553    Ok(entries)
3554}
3555
3556fn short_status_borrowed_head_matches_index_if_possible(
3557    worktree_root: &Path,
3558    git_dir: &Path,
3559    format: ObjectFormat,
3560    db: &FileObjectDatabase,
3561    untracked_mode: StatusUntrackedMode,
3562) -> Result<Option<Vec<ShortStatusEntry>>> {
3563    let index_path = repository_index_path(git_dir);
3564    let index_metadata = match fs::metadata(&index_path) {
3565        Ok(metadata) => metadata,
3566        Err(err)
3567            if err.kind() == std::io::ErrorKind::NotFound
3568                && matches!(untracked_mode, StatusUntrackedMode::None) =>
3569        {
3570            return Ok(Some(Vec::new()));
3571        }
3572        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
3573        Err(err) => return Err(err.into()),
3574    };
3575    let index_bytes = fs::read(&index_path)?;
3576    let borrowed = match BorrowedIndex::parse(&index_bytes, format) {
3577        Ok(index) => index,
3578        Err(GitError::Unsupported(_)) => return Ok(None),
3579        Err(err) => return Err(err),
3580    };
3581    let Some(head_tree_oid) = resolve_head_tree_oid(git_dir, format, db)? else {
3582        return Ok(None);
3583    };
3584    let stage0_entry_count = borrowed
3585        .entries
3586        .iter()
3587        .filter(|entry| entry.stage() == Stage::Normal)
3588        .count();
3589    if !head_matches_borrowed_index_from_cache_tree(
3590        &borrowed,
3591        format,
3592        &head_tree_oid,
3593        stage0_entry_count,
3594    )? {
3595        return Ok(None);
3596    }
3597
3598    let index_mtime = file_mtime_parts(&index_metadata);
3599    let stat_cache = IndexStatCache::from_index_mtime_only(index_mtime);
3600    let mut entries = short_status_borrowed_tracked_only_head_matches_index_parallel(
3601        worktree_root,
3602        git_dir,
3603        format,
3604        &borrowed,
3605        &stat_cache,
3606        untracked_mode,
3607    )?;
3608    if !matches!(untracked_mode, StatusUntrackedMode::None) {
3609        let mut ignores = IgnoreMatcher::from_worktree_base(worktree_root)?;
3610        let untracked_paths = status_untracked_paths_from_borrowed_index(
3611            worktree_root,
3612            git_dir,
3613            &borrowed,
3614            &mut ignores,
3615            untracked_mode,
3616        )?;
3617        for path in untracked_paths {
3618            entries.push(ShortStatusEntry {
3619                index: b'?',
3620                worktree: b'?',
3621                path,
3622                head_mode: None,
3623                index_mode: None,
3624                worktree_mode: None,
3625                head_oid: None,
3626                index_oid: None,
3627                submodule: None,
3628            });
3629        }
3630        entries.sort_by(|left, right| {
3631            status_sort_category(left)
3632                .cmp(&status_sort_category(right))
3633                .then_with(|| left.path.cmp(&right.path))
3634        });
3635    }
3636    Ok(Some(entries))
3637}
3638
3639#[derive(Debug, Clone, Copy)]
3640enum TrackedOnlyPrecheck {
3641    Deleted(usize),
3642    Slow(usize),
3643}
3644
3645#[derive(Debug)]
3646enum TrackedOnlyPrecheckOutcome {
3647    Clean,
3648    Deleted,
3649    Slow,
3650}
3651
3652fn short_status_tracked_only_head_matches_index_parallel(
3653    worktree_root: &Path,
3654    git_dir: &Path,
3655    format: ObjectFormat,
3656    index: &Index,
3657    stat_cache: &IndexStatCache,
3658    untracked_mode: StatusUntrackedMode,
3659) -> Result<Vec<ShortStatusEntry>> {
3660    let prechecks = tracked_only_non_clean_prechecks_parallel(worktree_root, index, stat_cache)?;
3661
3662    let mut clean_filter = None;
3663    let mut entries = Vec::new();
3664    for precheck in prechecks {
3665        match precheck {
3666            TrackedOnlyPrecheck::Deleted(idx) => {
3667                let entry = &index.entries[idx];
3668                let path = entry.path.as_bytes();
3669                entries.push(ShortStatusEntry {
3670                    index: b' ',
3671                    worktree: b'D',
3672                    path: path.to_vec(),
3673                    head_mode: Some(entry.mode),
3674                    index_mode: Some(entry.mode),
3675                    worktree_mode: None,
3676                    head_oid: Some(entry.oid),
3677                    index_oid: Some(entry.oid),
3678                    submodule: None,
3679                });
3680            }
3681            TrackedOnlyPrecheck::Slow(idx) => {
3682                let entry = &index.entries[idx];
3683                let path = entry.path.as_bytes();
3684                let index_entry = TrackedEntry {
3685                    mode: entry.mode,
3686                    oid: entry.oid,
3687                };
3688                let worktree_entry = worktree_entry_for_index_entry_with_attributes(
3689                    worktree_root,
3690                    git_dir,
3691                    format,
3692                    entry,
3693                    stat_cache,
3694                    &mut clean_filter,
3695                )?;
3696                let submodule = tracked_only_submodule_status(
3697                    worktree_root,
3698                    path,
3699                    &index_entry,
3700                    worktree_entry.as_ref(),
3701                    untracked_mode,
3702                )?;
3703                let worktree_code = match worktree_entry.as_ref() {
3704                    None => b'D',
3705                    Some(worktree_entry) if *worktree_entry != index_entry => b'M',
3706                    _ if submodule.is_some_and(|sub| sub.any()) => b'M',
3707                    _ => b' ',
3708                };
3709                if worktree_code != b' ' {
3710                    entries.push(ShortStatusEntry {
3711                        index: b' ',
3712                        worktree: worktree_code,
3713                        path: path.to_vec(),
3714                        head_mode: Some(index_entry.mode),
3715                        index_mode: Some(index_entry.mode),
3716                        worktree_mode: worktree_entry.as_ref().map(|entry| entry.mode),
3717                        head_oid: Some(index_entry.oid),
3718                        index_oid: Some(index_entry.oid),
3719                        submodule: submodule.filter(|sub| sub.any()),
3720                    });
3721                }
3722            }
3723        }
3724    }
3725    entries.sort_by(|left, right| {
3726        status_sort_category(left)
3727            .cmp(&status_sort_category(right))
3728            .then_with(|| left.path.cmp(&right.path))
3729    });
3730    Ok(entries)
3731}
3732
3733fn short_status_borrowed_tracked_only_head_matches_index_parallel(
3734    worktree_root: &Path,
3735    git_dir: &Path,
3736    format: ObjectFormat,
3737    index: &BorrowedIndex<'_>,
3738    stat_cache: &IndexStatCache,
3739    untracked_mode: StatusUntrackedMode,
3740) -> Result<Vec<ShortStatusEntry>> {
3741    let prechecks =
3742        tracked_only_borrowed_non_clean_prechecks_parallel(worktree_root, index, stat_cache)?;
3743
3744    let mut clean_filter = None;
3745    let mut entries = Vec::new();
3746    for precheck in prechecks {
3747        match precheck {
3748            TrackedOnlyPrecheck::Deleted(idx) => {
3749                let entry = &index.entries[idx];
3750                entries.push(ShortStatusEntry {
3751                    index: b' ',
3752                    worktree: b'D',
3753                    path: entry.path.to_vec(),
3754                    head_mode: Some(entry.mode),
3755                    index_mode: Some(entry.mode),
3756                    worktree_mode: None,
3757                    head_oid: Some(entry.oid),
3758                    index_oid: Some(entry.oid),
3759                    submodule: None,
3760                });
3761            }
3762            TrackedOnlyPrecheck::Slow(idx) => {
3763                let entry = &index.entries[idx];
3764                let index_entry = TrackedEntry {
3765                    mode: entry.mode,
3766                    oid: entry.oid,
3767                };
3768                let worktree_entry = worktree_entry_for_index_entry_ref_with_attributes(
3769                    worktree_root,
3770                    git_dir,
3771                    format,
3772                    entry,
3773                    stat_cache,
3774                    &mut clean_filter,
3775                )?;
3776                let submodule = tracked_only_submodule_status(
3777                    worktree_root,
3778                    entry.path,
3779                    &index_entry,
3780                    worktree_entry.as_ref(),
3781                    untracked_mode,
3782                )?;
3783                let worktree_code = match worktree_entry.as_ref() {
3784                    None => b'D',
3785                    Some(worktree_entry) if *worktree_entry != index_entry => b'M',
3786                    _ if submodule.is_some_and(|sub| sub.any()) => b'M',
3787                    _ => b' ',
3788                };
3789                if worktree_code != b' ' {
3790                    entries.push(ShortStatusEntry {
3791                        index: b' ',
3792                        worktree: worktree_code,
3793                        path: entry.path.to_vec(),
3794                        head_mode: Some(index_entry.mode),
3795                        index_mode: Some(index_entry.mode),
3796                        worktree_mode: worktree_entry.as_ref().map(|entry| entry.mode),
3797                        head_oid: Some(index_entry.oid),
3798                        index_oid: Some(index_entry.oid),
3799                        submodule: submodule.filter(|sub| sub.any()),
3800                    });
3801                }
3802            }
3803        }
3804    }
3805    entries.sort_by(|left, right| {
3806        status_sort_category(left)
3807            .cmp(&status_sort_category(right))
3808            .then_with(|| left.path.cmp(&right.path))
3809    });
3810    Ok(entries)
3811}
3812
3813fn short_status_tracked_only_with_head_parallel(
3814    worktree_root: &Path,
3815    git_dir: &Path,
3816    format: ObjectFormat,
3817    index: &Index,
3818    stat_cache: &IndexStatCache,
3819    head: &BTreeMap<Vec<u8>, TrackedEntry>,
3820    untracked_mode: StatusUntrackedMode,
3821) -> Result<Vec<ShortStatusEntry>> {
3822    let prechecks = tracked_only_non_clean_prechecks_parallel(worktree_root, index, stat_cache)?;
3823    let mut precheck_cursor = 0usize;
3824    let mut clean_filter = None;
3825    let mut entries = Vec::new();
3826
3827    for (idx, entry) in index.entries.iter().enumerate() {
3828        if entry.stage() != Stage::Normal {
3829            continue;
3830        }
3831        let path = entry.path.as_bytes();
3832        let index_entry = TrackedEntry {
3833            mode: entry.mode,
3834            oid: entry.oid,
3835        };
3836        let head_entry = head.get(path);
3837        let index_code = match head_entry {
3838            None => b'A',
3839            Some(head_entry) if *head_entry != index_entry => b'M',
3840            _ => b' ',
3841        };
3842        let precheck = prechecks
3843            .get(precheck_cursor)
3844            .copied()
3845            .and_then(|precheck| {
3846                if tracked_only_precheck_index(precheck) == idx {
3847                    precheck_cursor += 1;
3848                    Some(precheck)
3849                } else {
3850                    None
3851                }
3852            });
3853        let (worktree_code, worktree_mode, submodule) = match precheck {
3854            None => (b' ', Some(index_entry.mode), None),
3855            Some(TrackedOnlyPrecheck::Deleted(_)) => (b'D', None, None),
3856            Some(TrackedOnlyPrecheck::Slow(_)) => {
3857                let worktree_entry = worktree_entry_for_index_entry_with_attributes(
3858                    worktree_root,
3859                    git_dir,
3860                    format,
3861                    entry,
3862                    stat_cache,
3863                    &mut clean_filter,
3864                )?;
3865                let submodule = tracked_only_submodule_status(
3866                    worktree_root,
3867                    path,
3868                    &index_entry,
3869                    worktree_entry.as_ref(),
3870                    untracked_mode,
3871                )?;
3872                let worktree_code = match worktree_entry.as_ref() {
3873                    None => b'D',
3874                    Some(worktree_entry) if *worktree_entry != index_entry => b'M',
3875                    _ if submodule.is_some_and(|sub| sub.any()) => b'M',
3876                    _ => b' ',
3877                };
3878                (
3879                    worktree_code,
3880                    worktree_entry.as_ref().map(|entry| entry.mode),
3881                    submodule.filter(|sub| sub.any()),
3882                )
3883            }
3884        };
3885        if index_code != b' ' || worktree_code != b' ' {
3886            entries.push(ShortStatusEntry {
3887                index: index_code,
3888                worktree: worktree_code,
3889                path: path.to_vec(),
3890                head_mode: head_entry.map(|entry| entry.mode),
3891                index_mode: Some(index_entry.mode),
3892                worktree_mode,
3893                head_oid: head_entry.map(|entry| entry.oid),
3894                index_oid: Some(index_entry.oid),
3895                submodule,
3896            });
3897        }
3898    }
3899
3900    let index_paths = index
3901        .entries
3902        .iter()
3903        .filter(|entry| entry.stage() == Stage::Normal)
3904        .map(|entry| entry.path.as_bytes().to_vec())
3905        .collect::<HashSet<_>>();
3906    for (path, head_entry) in head {
3907        if index_paths.contains(path.as_slice()) {
3908            continue;
3909        }
3910        entries.push(ShortStatusEntry {
3911            index: b'D',
3912            worktree: b' ',
3913            path: path.clone(),
3914            head_mode: Some(head_entry.mode),
3915            index_mode: None,
3916            worktree_mode: None,
3917            head_oid: Some(head_entry.oid),
3918            index_oid: None,
3919            submodule: None,
3920        });
3921    }
3922    entries.sort_by(|left, right| {
3923        status_sort_category(left)
3924            .cmp(&status_sort_category(right))
3925            .then_with(|| left.path.cmp(&right.path))
3926    });
3927    Ok(entries)
3928}
3929
3930fn tracked_only_precheck_index(precheck: TrackedOnlyPrecheck) -> usize {
3931    match precheck {
3932        TrackedOnlyPrecheck::Deleted(idx) | TrackedOnlyPrecheck::Slow(idx) => idx,
3933    }
3934}
3935
3936fn tracked_only_non_clean_prechecks_parallel(
3937    worktree_root: &Path,
3938    index: &Index,
3939    stat_cache: &IndexStatCache,
3940) -> Result<Vec<TrackedOnlyPrecheck>> {
3941    let normal_indices = index
3942        .entries
3943        .iter()
3944        .enumerate()
3945        .filter_map(|(idx, entry)| (entry.stage() == Stage::Normal).then_some(idx))
3946        .collect::<Vec<_>>();
3947    if normal_indices.is_empty() {
3948        return Ok(Vec::new());
3949    }
3950    let max_workers = std::thread::available_parallelism()
3951        .map(|count| count.get())
3952        .unwrap_or(1)
3953        .min(16);
3954    let worker_count = max_workers.min(normal_indices.len().div_ceil(512)).max(1);
3955    if worker_count == 1 {
3956        let mut prechecks = Vec::new();
3957        let mut absolute = PathBuf::new();
3958        for idx in normal_indices {
3959            let entry = &index.entries[idx];
3960            match tracked_only_stat_precheck(worktree_root, entry, stat_cache, &mut absolute)? {
3961                TrackedOnlyPrecheckOutcome::Clean => {}
3962                TrackedOnlyPrecheckOutcome::Deleted => {
3963                    prechecks.push(TrackedOnlyPrecheck::Deleted(idx));
3964                }
3965                TrackedOnlyPrecheckOutcome::Slow => {
3966                    prechecks.push(TrackedOnlyPrecheck::Slow(idx));
3967                }
3968            }
3969        }
3970        return Ok(prechecks);
3971    }
3972    let chunk_size = normal_indices.len().div_ceil(worker_count);
3973    let mut prechecks = std::thread::scope(|scope| -> Result<Vec<TrackedOnlyPrecheck>> {
3974        let mut handles = Vec::new();
3975        for chunk in normal_indices.chunks(chunk_size) {
3976            handles.push(scope.spawn(move || -> Result<Vec<TrackedOnlyPrecheck>> {
3977                let mut prechecks = Vec::new();
3978                let mut absolute = PathBuf::new();
3979                for &idx in chunk {
3980                    let entry = &index.entries[idx];
3981                    match tracked_only_stat_precheck(
3982                        worktree_root,
3983                        entry,
3984                        stat_cache,
3985                        &mut absolute,
3986                    )? {
3987                        TrackedOnlyPrecheckOutcome::Clean => {}
3988                        TrackedOnlyPrecheckOutcome::Deleted => {
3989                            prechecks.push(TrackedOnlyPrecheck::Deleted(idx));
3990                        }
3991                        TrackedOnlyPrecheckOutcome::Slow => {
3992                            prechecks.push(TrackedOnlyPrecheck::Slow(idx));
3993                        }
3994                    }
3995                }
3996                Ok(prechecks)
3997            }));
3998        }
3999        let mut prechecks = Vec::new();
4000        for handle in handles {
4001            let mut chunk = handle
4002                .join()
4003                .map_err(|_| GitError::Command("status worker panicked".into()))??;
4004            prechecks.append(&mut chunk);
4005        }
4006        Ok(prechecks)
4007    })?;
4008    prechecks.sort_by_key(|precheck| match precheck {
4009        TrackedOnlyPrecheck::Deleted(idx) | TrackedOnlyPrecheck::Slow(idx) => *idx,
4010    });
4011    Ok(prechecks)
4012}
4013
4014fn tracked_only_borrowed_non_clean_prechecks_parallel(
4015    worktree_root: &Path,
4016    index: &BorrowedIndex<'_>,
4017    stat_cache: &IndexStatCache,
4018) -> Result<Vec<TrackedOnlyPrecheck>> {
4019    let normal_indices = index
4020        .entries
4021        .iter()
4022        .enumerate()
4023        .filter_map(|(idx, entry)| (entry.stage() == Stage::Normal).then_some(idx))
4024        .collect::<Vec<_>>();
4025    if normal_indices.is_empty() {
4026        return Ok(Vec::new());
4027    }
4028    let max_workers = std::thread::available_parallelism()
4029        .map(|count| count.get())
4030        .unwrap_or(1)
4031        .min(16);
4032    let worker_count = max_workers.min(normal_indices.len().div_ceil(512)).max(1);
4033    if worker_count == 1 {
4034        let mut prechecks = Vec::new();
4035        let mut absolute = PathBuf::new();
4036        for idx in normal_indices {
4037            let entry = &index.entries[idx];
4038            match tracked_only_borrowed_stat_precheck(
4039                worktree_root,
4040                entry,
4041                stat_cache,
4042                &mut absolute,
4043            )? {
4044                TrackedOnlyPrecheckOutcome::Clean => {}
4045                TrackedOnlyPrecheckOutcome::Deleted => {
4046                    prechecks.push(TrackedOnlyPrecheck::Deleted(idx));
4047                }
4048                TrackedOnlyPrecheckOutcome::Slow => {
4049                    prechecks.push(TrackedOnlyPrecheck::Slow(idx));
4050                }
4051            }
4052        }
4053        return Ok(prechecks);
4054    }
4055    let chunk_size = normal_indices.len().div_ceil(worker_count);
4056    let mut prechecks = std::thread::scope(|scope| -> Result<Vec<TrackedOnlyPrecheck>> {
4057        let mut handles = Vec::new();
4058        for chunk in normal_indices.chunks(chunk_size) {
4059            handles.push(scope.spawn(move || -> Result<Vec<TrackedOnlyPrecheck>> {
4060                let mut prechecks = Vec::new();
4061                let mut absolute = PathBuf::new();
4062                for &idx in chunk {
4063                    let entry = &index.entries[idx];
4064                    match tracked_only_borrowed_stat_precheck(
4065                        worktree_root,
4066                        entry,
4067                        stat_cache,
4068                        &mut absolute,
4069                    )? {
4070                        TrackedOnlyPrecheckOutcome::Clean => {}
4071                        TrackedOnlyPrecheckOutcome::Deleted => {
4072                            prechecks.push(TrackedOnlyPrecheck::Deleted(idx));
4073                        }
4074                        TrackedOnlyPrecheckOutcome::Slow => {
4075                            prechecks.push(TrackedOnlyPrecheck::Slow(idx));
4076                        }
4077                    }
4078                }
4079                Ok(prechecks)
4080            }));
4081        }
4082        let mut prechecks = Vec::new();
4083        for handle in handles {
4084            let mut chunk = handle
4085                .join()
4086                .map_err(|_| GitError::Command("status worker panicked".into()))??;
4087            prechecks.append(&mut chunk);
4088        }
4089        Ok(prechecks)
4090    })?;
4091    prechecks.sort_by_key(|precheck| match precheck {
4092        TrackedOnlyPrecheck::Deleted(idx) | TrackedOnlyPrecheck::Slow(idx) => *idx,
4093    });
4094    Ok(prechecks)
4095}
4096
4097fn tracked_only_stat_precheck(
4098    worktree_root: &Path,
4099    index_entry: &IndexEntry,
4100    stat_cache: &IndexStatCache,
4101    absolute: &mut PathBuf,
4102) -> Result<TrackedOnlyPrecheckOutcome> {
4103    if index_entry.mode == 0o160000 {
4104        return Ok(TrackedOnlyPrecheckOutcome::Slow);
4105    }
4106    let git_path = index_entry.path.as_bytes();
4107    set_worktree_path_from_repo_path(worktree_root, git_path, absolute)?;
4108    let metadata = match fs::symlink_metadata(&absolute) {
4109        Ok(metadata) => metadata,
4110        Err(err)
4111            if matches!(
4112                err.kind(),
4113                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
4114            ) =>
4115        {
4116            return Ok(TrackedOnlyPrecheckOutcome::Deleted);
4117        }
4118        Err(err) => return Err(err.into()),
4119    };
4120    let file_type = metadata.file_type();
4121    if file_type.is_dir() || !(file_type.is_file() || file_type.is_symlink()) {
4122        return Ok(TrackedOnlyPrecheckOutcome::Slow);
4123    }
4124    if stat_cache
4125        .reuse_index_entry(index_entry, &metadata)
4126        .is_some()
4127    {
4128        Ok(TrackedOnlyPrecheckOutcome::Clean)
4129    } else {
4130        Ok(TrackedOnlyPrecheckOutcome::Slow)
4131    }
4132}
4133
4134fn tracked_only_borrowed_stat_precheck(
4135    worktree_root: &Path,
4136    index_entry: &IndexEntryRef<'_>,
4137    stat_cache: &IndexStatCache,
4138    absolute: &mut PathBuf,
4139) -> Result<TrackedOnlyPrecheckOutcome> {
4140    if index_entry.mode == 0o160000 {
4141        return Ok(TrackedOnlyPrecheckOutcome::Slow);
4142    }
4143    set_worktree_path_from_repo_path(worktree_root, index_entry.path, absolute)?;
4144    let metadata = match fs::symlink_metadata(&absolute) {
4145        Ok(metadata) => metadata,
4146        Err(err)
4147            if matches!(
4148                err.kind(),
4149                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
4150            ) =>
4151        {
4152            return Ok(TrackedOnlyPrecheckOutcome::Deleted);
4153        }
4154        Err(err) => return Err(err.into()),
4155    };
4156    let file_type = metadata.file_type();
4157    if file_type.is_dir() || !(file_type.is_file() || file_type.is_symlink()) {
4158        return Ok(TrackedOnlyPrecheckOutcome::Slow);
4159    }
4160    if stat_cache
4161        .reuse_index_entry_ref(index_entry, &metadata)
4162        .is_some()
4163    {
4164        Ok(TrackedOnlyPrecheckOutcome::Clean)
4165    } else {
4166        Ok(TrackedOnlyPrecheckOutcome::Slow)
4167    }
4168}
4169
4170fn set_worktree_path_from_repo_path(
4171    worktree_root: &Path,
4172    git_path: &[u8],
4173    out: &mut PathBuf,
4174) -> Result<()> {
4175    out.clear();
4176    out.push(worktree_root);
4177    push_repo_path(out, git_path)
4178}
4179
4180#[cfg(unix)]
4181fn push_repo_path(out: &mut PathBuf, path: &[u8]) -> Result<()> {
4182    use std::os::unix::ffi::OsStrExt;
4183
4184    out.push(Path::new(std::ffi::OsStr::from_bytes(path)));
4185    Ok(())
4186}
4187
4188#[cfg(not(unix))]
4189fn push_repo_path(out: &mut PathBuf, path: &[u8]) -> Result<()> {
4190    let path = std::str::from_utf8(path)
4191        .map_err(|_| GitError::InvalidPath("index path is not utf8".into()))?;
4192    for component in path.split('/') {
4193        out.push(component);
4194    }
4195    Ok(())
4196}
4197
4198fn tracked_only_submodule_status(
4199    worktree_root: &Path,
4200    path: &[u8],
4201    index_entry: &TrackedEntry,
4202    worktree_entry: Option<&TrackedEntry>,
4203    untracked_mode: StatusUntrackedMode,
4204) -> Result<Option<SubmoduleStatus>> {
4205    let Some(worktree_entry) = worktree_entry else {
4206        return Ok(None);
4207    };
4208    if index_entry.mode != 0o160000 || worktree_entry.mode != 0o160000 {
4209        return Ok(None);
4210    }
4211    let absolute = worktree_root.join(repo_path_to_os_path(path)?);
4212    let dirt = if absolute.is_dir() {
4213        submodule_dirt(&absolute)
4214    } else {
4215        0
4216    };
4217    Ok(Some(SubmoduleStatus {
4218        new_commits: index_entry.oid != worktree_entry.oid,
4219        modified_content: dirt & DIRTY_SUBMODULE_MODIFIED != 0,
4220        untracked_content: dirt & DIRTY_SUBMODULE_UNTRACKED != 0
4221            && !matches!(untracked_mode, StatusUntrackedMode::None),
4222    }))
4223}
4224
4225fn status_sort_category(entry: &ShortStatusEntry) -> u8 {
4226    match (entry.index, entry.worktree) {
4227        (b'?', b'?') => 1,
4228        (b'!', b'!') => 2,
4229        _ => 0,
4230    }
4231}
4232
4233pub fn untracked_paths(
4234    worktree_root: impl AsRef<Path>,
4235    git_dir: impl AsRef<Path>,
4236    format: ObjectFormat,
4237) -> Result<Vec<Vec<u8>>> {
4238    untracked_paths_with_options(
4239        worktree_root,
4240        git_dir,
4241        format,
4242        UntrackedPathOptions::default(),
4243    )
4244}
4245
4246/// Pathspec filter for untracked collection. Mirrors git `ls-files` pathspec
4247/// semantics: literal paths, recursive directory prefixes, and fnmatch globs.
4248#[derive(Debug, Clone, PartialEq, Eq)]
4249pub struct UntrackedPathspecFilter {
4250    pub path: Vec<u8>,
4251    pub recursive: bool,
4252    pub is_glob: bool,
4253}
4254
4255#[derive(Debug, Clone, PartialEq, Eq, Default)]
4256pub struct UntrackedPathOptions {
4257    pub directory: bool,
4258    pub no_empty_directory: bool,
4259    pub preserve_ignored_directories: bool,
4260    pub exclude_standard: bool,
4261    pub ignored_only: bool,
4262    pub exclude_patterns: Vec<Vec<u8>>,
4263    pub exclude_per_directory: Vec<String>,
4264    pub pathspecs: Vec<UntrackedPathspecFilter>,
4265}
4266
4267// The wildmatch engine and the single-item pathspec matcher now live in the
4268// shared `sley-pathspec` crate. Re-export them so existing `sley-worktree`
4269// callers (and the t3070 `ls-files` path) keep their public surface unchanged.
4270pub use sley_pathspec::{
4271    PathspecMatchMagic, WM_CASEFOLD, WM_PATHNAME, pathspec_is_glob, pathspec_item_matches,
4272    wildmatch,
4273};
4274
4275/// Whether `path` matches an `ls-files` pathspec (literal, directory prefix, or glob).
4276pub fn untracked_pathspec_matches(spec: &UntrackedPathspecFilter, path: &[u8]) -> bool {
4277    if spec.path.is_empty() {
4278        return true;
4279    }
4280    let path_no_slash = path.strip_suffix(b"/").unwrap_or(path);
4281    if path == spec.path.as_slice() || path_no_slash == spec.path.as_slice() {
4282        return true;
4283    }
4284    if spec.recursive
4285        && let Some(rest) = path
4286            .strip_prefix(spec.path.as_slice())
4287            .and_then(|rest| rest.strip_prefix(b"/"))
4288        && !rest.is_empty()
4289    {
4290        return true;
4291    }
4292    if spec.is_glob {
4293        return untracked_wildmatch(&spec.path, path)
4294            || untracked_wildmatch(&spec.path, path_no_slash);
4295    }
4296    false
4297}
4298
4299/// Whether a directory walk must descend into `parent` to satisfy active pathspecs.
4300pub fn untracked_pathspec_needs_descent(parent: &[u8], specs: &[UntrackedPathspecFilter]) -> bool {
4301    if specs.is_empty() {
4302        return false;
4303    }
4304    let parent_prefix = if parent.is_empty() {
4305        Vec::new()
4306    } else {
4307        let mut prefix = parent.to_vec();
4308        prefix.push(b'/');
4309        prefix
4310    };
4311    for spec in specs {
4312        if !parent.is_empty()
4313            && spec.path.starts_with(&parent_prefix)
4314            && spec.path.as_slice() != parent
4315        {
4316            return true;
4317        }
4318        if spec.is_glob && glob_pathspec_may_match_under(&spec.path, parent) {
4319            return true;
4320        }
4321        if spec.recursive
4322            && !parent.is_empty()
4323            && parent.starts_with(spec.path.as_slice())
4324            && parent != spec.path.as_slice()
4325        {
4326            return true;
4327        }
4328    }
4329    false
4330}
4331
4332/// Whether some pathspec selects the directory `git_path` *as a whole* (so an
4333/// untracked directory can roll up to `dir/` under `--directory`), as opposed to
4334/// only matching something strictly below it (which forces descent). A
4335/// directory-prefix pathspec covering the directory, an exact directory match, or
4336/// a glob matching the directory's own name all count; a deeper glob such as
4337/// `dir/*.c` or an exact file path inside the directory does not.
4338fn untracked_pathspec_selects_directory(
4339    specs: &[UntrackedPathspecFilter],
4340    git_path: &[u8],
4341) -> bool {
4342    specs
4343        .iter()
4344        .any(|spec| untracked_pathspec_matches(spec, git_path))
4345}
4346
4347fn glob_pathspec_may_match_under(pattern: &[u8], dir: &[u8]) -> bool {
4348    let literal_prefix = literal_prefix_before_glob(pattern);
4349    if literal_prefix.is_empty() {
4350        return true;
4351    }
4352    if dir.is_empty() {
4353        return true;
4354    }
4355    let mut dir_prefix = dir.to_vec();
4356    dir_prefix.push(b'/');
4357    if literal_prefix.starts_with(&dir_prefix) {
4358        return true;
4359    }
4360    if dir_prefix.starts_with(&literal_prefix) {
4361        return true;
4362    }
4363    literal_prefix
4364        .strip_suffix(b"/")
4365        .is_some_and(|prefix| prefix == dir)
4366}
4367
4368fn literal_prefix_before_glob(pattern: &[u8]) -> Vec<u8> {
4369    let mut prefix = Vec::new();
4370    for &byte in pattern {
4371        if matches!(byte, b'*' | b'?' | b'[') {
4372            break;
4373        }
4374        prefix.push(byte);
4375    }
4376    prefix
4377}
4378
4379fn insert_untracked_directory(paths: &mut BTreeSet<Vec<u8>>, git_path: &[u8]) {
4380    let mut directory = git_path.to_vec();
4381    if directory.last() != Some(&b'/') {
4382        directory.push(b'/');
4383    }
4384    paths.insert(directory);
4385}
4386
4387/// fnmatch-style glob where `*` and `?` match any byte including `/`.
4388fn untracked_wildmatch(pattern: &[u8], text: &[u8]) -> bool {
4389    // Untracked-walk pathspec globs match with PATHMATCH semantics (`*` crosses
4390    // `/`), matching git's default (non-GLOB-magic) pathspec behavior.
4391    wildmatch(pattern, text, 0)
4392}
4393
4394#[derive(Debug, Clone, PartialEq, Eq)]
4395pub struct IgnoreMatch {
4396    pub source: Vec<u8>,
4397    pub line_number: usize,
4398    pub pattern: Vec<u8>,
4399    pub ignored: bool,
4400}
4401
4402#[derive(Debug, Clone, PartialEq, Eq)]
4403pub enum AttributeState {
4404    Set,
4405    Unset,
4406    Value(Vec<u8>),
4407}
4408
4409#[derive(Debug, Clone, PartialEq, Eq)]
4410pub struct AttributeCheck {
4411    pub attribute: Vec<u8>,
4412    pub state: Option<AttributeState>,
4413}
4414
4415pub fn untracked_paths_with_options(
4416    worktree_root: impl AsRef<Path>,
4417    git_dir: impl AsRef<Path>,
4418    format: ObjectFormat,
4419    options: UntrackedPathOptions,
4420) -> Result<Vec<Vec<u8>>> {
4421    let worktree_root = worktree_root.as_ref();
4422    let git_dir = git_dir.as_ref();
4423    let db = FileObjectDatabase::from_git_dir(git_dir, format);
4424    let (index, stat_cache, _) = read_index_entries_with_stat_cache(git_dir, format, &db)?;
4425    let ignores = IgnoreMatcher::from_sources(
4426        worktree_root,
4427        options.exclude_standard,
4428        &options.exclude_patterns,
4429        &options.exclude_per_directory,
4430    )?;
4431    if options.ignored_only {
4432        return ignored_untracked_paths(
4433            worktree_root,
4434            git_dir,
4435            &index,
4436            &ignores,
4437            options.directory,
4438        );
4439    }
4440    if options.directory {
4441        let mut paths = BTreeSet::new();
4442        collect_untracked_directory_paths(
4443            worktree_root,
4444            git_dir,
4445            worktree_root,
4446            &index,
4447            &ignores,
4448            &options,
4449            &mut paths,
4450        )?;
4451        return Ok(paths.into_iter().collect());
4452    }
4453    let worktree = worktree_entries_with_stat_cache(
4454        worktree_root,
4455        git_dir,
4456        format,
4457        Some(&stat_cache),
4458        None,
4459        None,
4460    )?;
4461    Ok(ls_files_untracked_paths_from_worktree(
4462        &worktree, &index, &ignores,
4463    ))
4464}
4465
4466/// Untracked paths for `ls-files --others` (without `--directory`): every
4467/// untracked file is listed individually, except embedded-repository boundaries
4468/// which are emitted as `dir/` to match git's non-submodule `.git` handling.
4469fn ls_files_untracked_paths_from_worktree(
4470    worktree: &BTreeMap<Vec<u8>, TrackedEntry>,
4471    index: &BTreeMap<Vec<u8>, TrackedEntry>,
4472    ignores: &IgnoreMatcher,
4473) -> Vec<Vec<u8>> {
4474    let mut paths = BTreeSet::new();
4475    for (path, entry) in worktree {
4476        if index.contains_key(path) || ignores.is_ignored(path, false) {
4477            continue;
4478        }
4479        if entry.mode == 0o040000 && entry.oid.is_null() {
4480            insert_untracked_directory(&mut paths, path);
4481            continue;
4482        }
4483        paths.insert(path.clone());
4484    }
4485    paths.into_iter().collect()
4486}
4487
4488pub fn path_matches_standard_ignore(
4489    worktree_root: impl AsRef<Path>,
4490    path: &[u8],
4491    is_dir: bool,
4492) -> Result<bool> {
4493    path_matches_ignore(worktree_root, path, is_dir, true, &[])
4494}
4495
4496pub fn standard_ignore_match(
4497    worktree_root: impl AsRef<Path>,
4498    path: &[u8],
4499    is_dir: bool,
4500) -> Result<Option<IgnoreMatch>> {
4501    let ignores = IgnoreMatcher::from_worktree_root(worktree_root.as_ref())?;
4502    Ok(ignores.match_for(path, is_dir).map(IgnorePattern::to_match))
4503}
4504
4505pub fn standard_attributes_for_path(
4506    worktree_root: impl AsRef<Path>,
4507    path: &[u8],
4508    requested: &[Vec<u8>],
4509    all: bool,
4510) -> Result<Vec<AttributeCheck>> {
4511    let matcher = AttributeMatcher::from_worktree_root(worktree_root.as_ref())?;
4512    Ok(matcher.attributes_for_path(path, requested, all))
4513}
4514
4515/// A reusable matcher for standard worktree attributes (global or
4516/// `core.attributesFile`, every in-tree `.gitattributes`, and
4517/// `$GIT_DIR/info/attributes`).
4518///
4519/// This is behaviourally identical to [`standard_attributes_for_path`] except
4520/// the attribute sources are read once and reused for each path.
4521pub struct StandardAttributeMatcher {
4522    matcher: AttributeMatcher,
4523}
4524
4525impl StandardAttributeMatcher {
4526    pub fn from_worktree_root(worktree_root: impl AsRef<Path>) -> Result<Self> {
4527        Ok(Self {
4528            matcher: AttributeMatcher::from_worktree_root(worktree_root.as_ref())?,
4529        })
4530    }
4531
4532    pub fn attributes_for_path(
4533        &self,
4534        path: &[u8],
4535        requested: &[Vec<u8>],
4536        all: bool,
4537    ) -> Vec<AttributeCheck> {
4538        self.matcher.attributes_for_path(path, requested, all)
4539    }
4540}
4541
4542pub fn standard_attributes_for_path_from_tree(
4543    worktree_root: impl AsRef<Path>,
4544    db: &FileObjectDatabase,
4545    format: ObjectFormat,
4546    tree_oid: &ObjectId,
4547    path: &[u8],
4548    requested: &[Vec<u8>],
4549    all: bool,
4550) -> Result<Vec<AttributeCheck>> {
4551    let mut matcher = AttributeMatcher::default();
4552    let worktree_root = worktree_root.as_ref();
4553    if !matcher.read_configured_attributes(worktree_root) {
4554        matcher.read_default_global_attributes();
4555    }
4556    collect_attribute_patterns_from_tree(db, format, tree_oid, Vec::new(), &mut matcher)?;
4557    read_attribute_patterns(
4558        worktree_root.join(".git").join("info").join("attributes"),
4559        &mut matcher,
4560        &[],
4561        b".git/info/attributes",
4562    );
4563    Ok(matcher.attributes_for_path(path, requested, all))
4564}
4565
4566pub fn standard_attributes_for_path_from_index(
4567    worktree_root: impl AsRef<Path>,
4568    git_dir: impl AsRef<Path>,
4569    format: ObjectFormat,
4570    path: &[u8],
4571    requested: &[Vec<u8>],
4572    all: bool,
4573) -> Result<Vec<AttributeCheck>> {
4574    let worktree_root = worktree_root.as_ref();
4575    let git_dir = git_dir.as_ref();
4576    let mut matcher = AttributeMatcher::default();
4577    if !matcher.read_configured_attributes(worktree_root) {
4578        matcher.read_default_global_attributes();
4579    }
4580    let db = FileObjectDatabase::from_git_dir(git_dir, format);
4581    collect_attribute_patterns_from_index(git_dir, format, &db, &mut matcher)?;
4582    read_attribute_patterns(
4583        worktree_root.join(".git").join("info").join("attributes"),
4584        &mut matcher,
4585        &[],
4586        b".git/info/attributes",
4587    );
4588    Ok(matcher.attributes_for_path(path, requested, all))
4589}
4590
4591pub fn path_matches_ignore(
4592    worktree_root: impl AsRef<Path>,
4593    path: &[u8],
4594    is_dir: bool,
4595    exclude_standard: bool,
4596    exclude_patterns: &[Vec<u8>],
4597) -> Result<bool> {
4598    path_matches_ignore_with_per_directory(
4599        worktree_root,
4600        path,
4601        is_dir,
4602        exclude_standard,
4603        exclude_patterns,
4604        &[],
4605    )
4606}
4607
4608pub fn path_matches_ignore_with_per_directory(
4609    worktree_root: impl AsRef<Path>,
4610    path: &[u8],
4611    is_dir: bool,
4612    exclude_standard: bool,
4613    exclude_patterns: &[Vec<u8>],
4614    exclude_per_directory: &[String],
4615) -> Result<bool> {
4616    let ignores = IgnoreMatcher::from_sources(
4617        worktree_root.as_ref(),
4618        exclude_standard,
4619        exclude_patterns,
4620        exclude_per_directory,
4621    )?;
4622    Ok(ignores.is_ignored(path, is_dir))
4623}
4624
4625pub fn ignored_index_entries<'a>(
4626    worktree_root: impl AsRef<Path>,
4627    entries: &'a [IndexEntry],
4628    exclude_standard: bool,
4629    exclude_patterns: &[Vec<u8>],
4630    exclude_per_directory: &[String],
4631) -> Result<Vec<&'a IndexEntry>> {
4632    let ignores = IgnoreMatcher::from_sources(
4633        worktree_root.as_ref(),
4634        exclude_standard,
4635        exclude_patterns,
4636        exclude_per_directory,
4637    )?;
4638    Ok(entries
4639        .iter()
4640        .filter(|entry| ignores.is_ignored(entry.path.as_bytes(), false))
4641        .collect())
4642}
4643
4644fn collect_untracked_directory_paths(
4645    root: &Path,
4646    git_dir: &Path,
4647    dir: &Path,
4648    index: &BTreeMap<Vec<u8>, TrackedEntry>,
4649    ignores: &IgnoreMatcher,
4650    options: &UntrackedPathOptions,
4651    paths: &mut BTreeSet<Vec<u8>>,
4652) -> Result<()> {
4653    if is_same_path(dir, git_dir) {
4654        return Ok(());
4655    }
4656    let mut entries = fs::read_dir(dir)?.collect::<std::result::Result<Vec<_>, _>>()?;
4657    entries.sort_by_key(|entry| entry.file_name());
4658    for entry in entries {
4659        let path = entry.path();
4660        if is_dot_git_entry(&path) {
4661            continue;
4662        }
4663        if is_embedded_git_internals(root, &path) {
4664            continue;
4665        }
4666        if is_same_path(&path, git_dir) {
4667            continue;
4668        }
4669        let metadata = entry.metadata()?;
4670        let relative = path.strip_prefix(root).map_err(|_| {
4671            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
4672        })?;
4673        let git_path = git_path_bytes(relative)?;
4674        if ignores.is_ignored(&git_path, metadata.is_dir()) {
4675            continue;
4676        }
4677        if metadata.is_dir() {
4678            if is_nested_repository_boundary(&path) {
4679                insert_untracked_directory(paths, &git_path);
4680                continue;
4681            }
4682            let has_tracked_below = index_has_path_under(index, &git_path);
4683            let needs_descent = untracked_pathspec_needs_descent(&git_path, &options.pathspecs);
4684            if has_tracked_below {
4685                collect_untracked_directory_paths(
4686                    root, git_dir, &path, index, ignores, options, paths,
4687                )?;
4688            } else if needs_descent {
4689                // A pathspec reaches into this wholly-untracked directory. Git's
4690                // `--directory` still rolls it up to `dir/` when a pathspec selects
4691                // the directory *as a whole* (a directory-prefix that covers it, or
4692                // a glob matching its name). It descends only when a pathspec
4693                // targets something strictly below it that does not select the
4694                // directory itself (e.g. a deeper glob like `dir/*.c` or an exact
4695                // file path).
4696                if untracked_pathspec_selects_directory(&options.pathspecs, &git_path) {
4697                    insert_untracked_directory(paths, &git_path);
4698                    continue;
4699                }
4700                collect_untracked_directory_paths(
4701                    root, git_dir, &path, index, ignores, options, paths,
4702                )?;
4703            } else if options.preserve_ignored_directories
4704                && directory_has_ignored(&path, root, git_dir, ignores)?
4705            {
4706                collect_untracked_directory_paths(
4707                    root, git_dir, &path, index, ignores, options, paths,
4708                )?;
4709            } else if !options.no_empty_directory
4710                || directory_has_file(&path, root, git_dir, ignores)?
4711            {
4712                insert_untracked_directory(paths, &git_path);
4713            }
4714        } else if !index.contains_key(&git_path)
4715            && (metadata.is_file() || metadata.file_type().is_symlink())
4716            && (options.pathspecs.is_empty()
4717                || options
4718                    .pathspecs
4719                    .iter()
4720                    .any(|spec| untracked_pathspec_matches(spec, &git_path)))
4721        {
4722            // A file reached here was found by descending into its parent
4723            // directory, which happens only when that directory is not eligible
4724            // for rollup (it contains tracked content, has ignored entries `-d`
4725            // must preserve, or a pathspec selects something strictly below it).
4726            // Git's `--directory` rollup is a directory-level decision made when
4727            // the whole directory matches; an individually-reached file is always
4728            // listed individually.
4729            paths.insert(git_path);
4730        }
4731    }
4732    Ok(())
4733}
4734
4735fn index_has_path_under(index: &BTreeMap<Vec<u8>, TrackedEntry>, directory: &[u8]) -> bool {
4736    // The index map is sorted, so a single range query finds whether any tracked
4737    // path lives under `directory/` in O(log n) — scanning every key was O(n) per
4738    // untracked directory (quadratic over a deep untracked tree).
4739    let mut prefix = directory.to_vec();
4740    prefix.push(b'/');
4741    index
4742        .range::<[u8], _>((
4743            std::ops::Bound::Included(prefix.as_slice()),
4744            std::ops::Bound::Unbounded,
4745        ))
4746        .next()
4747        .is_some_and(|(path, _)| path.starts_with(&prefix))
4748}
4749
4750/// Derives normal-mode untracked paths (directory rollup) from the worktree map
4751/// produced by the single status walk, avoiding a third filesystem traversal.
4752fn normal_untracked_paths_from_worktree(
4753    worktree: &BTreeMap<Vec<u8>, TrackedEntry>,
4754    index: &BTreeMap<Vec<u8>, TrackedEntry>,
4755    ignores: &IgnoreMatcher,
4756) -> Vec<Vec<u8>> {
4757    let mut paths = BTreeSet::new();
4758    for (path, entry) in worktree {
4759        if index.contains_key(path) || ignores.is_ignored(path, false) {
4760            continue;
4761        }
4762        if entry.mode == 0o040000 && entry.oid.is_null() {
4763            insert_untracked_directory(&mut paths, path);
4764            continue;
4765        }
4766        paths.insert(untracked_normal_rollup_path(path, index, ignores));
4767    }
4768    paths.into_iter().collect()
4769}
4770
4771fn status_untracked_paths_from_index(
4772    root: &Path,
4773    git_dir: &Path,
4774    index: &Index,
4775    stat_cache: &IndexStatCache,
4776    ignores: &mut IgnoreMatcher,
4777    untracked_mode: StatusUntrackedMode,
4778) -> Result<Vec<Vec<u8>>> {
4779    if matches!(untracked_mode, StatusUntrackedMode::None) {
4780        return Ok(Vec::new());
4781    }
4782    let mut paths = BTreeSet::new();
4783    let tracked_dirs = stage0_tracked_directories(index);
4784    let mut context = StatusUntrackedWalk {
4785        git_dir,
4786        tracked: stat_cache,
4787        tracked_dirs: &tracked_dirs,
4788        ignores,
4789        untracked_mode,
4790    };
4791    collect_status_untracked_paths(&mut context, root, &[], &mut paths)?;
4792    Ok(paths.into_iter().collect())
4793}
4794
4795fn status_untracked_paths_from_borrowed_index(
4796    root: &Path,
4797    git_dir: &Path,
4798    index: &BorrowedIndex<'_>,
4799    ignores: &mut IgnoreMatcher,
4800    untracked_mode: StatusUntrackedMode,
4801) -> Result<Vec<Vec<u8>>> {
4802    if matches!(untracked_mode, StatusUntrackedMode::None) {
4803        return Ok(Vec::new());
4804    }
4805    let mut paths = BTreeSet::new();
4806    let tracked = BorrowedIndexLookup::new(&index.entries);
4807    let mut context = StatusUntrackedWalk {
4808        git_dir,
4809        tracked: &tracked,
4810        tracked_dirs: &tracked.tracked_dirs,
4811        ignores,
4812        untracked_mode,
4813    };
4814    collect_status_untracked_paths(&mut context, root, &[], &mut paths)?;
4815    Ok(paths.into_iter().collect())
4816}
4817
4818trait StatusTrackedLookup {
4819    fn contains_tracked(&self, git_path: &[u8]) -> bool;
4820    fn is_tracked_gitlink(&self, git_path: &[u8]) -> bool;
4821}
4822
4823impl StatusTrackedLookup for IndexStatCache {
4824    fn contains_tracked(&self, git_path: &[u8]) -> bool {
4825        self.contains(git_path)
4826    }
4827
4828    fn is_tracked_gitlink(&self, git_path: &[u8]) -> bool {
4829        self.gitlink_entry(git_path).is_some()
4830    }
4831}
4832
4833struct BorrowedIndexLookup<'a> {
4834    tracked: HashSet<&'a [u8]>,
4835    gitlinks: HashSet<&'a [u8]>,
4836    tracked_dirs: HashSet<&'a [u8]>,
4837}
4838
4839impl<'a> BorrowedIndexLookup<'a> {
4840    fn new(entries: &'a [IndexEntryRef<'a>]) -> Self {
4841        let mut tracked = HashSet::with_capacity(entries.len());
4842        let mut gitlinks = HashSet::new();
4843        let mut tracked_dirs = HashSet::new();
4844        for entry in entries {
4845            if entry.stage() != Stage::Normal {
4846                continue;
4847            }
4848            let path = entry.path;
4849            tracked.insert(path);
4850            if entry.mode == 0o160000 {
4851                gitlinks.insert(path);
4852            }
4853            for (idx, byte) in path.iter().enumerate() {
4854                if *byte == b'/' && idx > 0 {
4855                    tracked_dirs.insert(&path[..idx]);
4856                }
4857            }
4858        }
4859        Self {
4860            tracked,
4861            gitlinks,
4862            tracked_dirs,
4863        }
4864    }
4865}
4866
4867impl StatusTrackedLookup for BorrowedIndexLookup<'_> {
4868    fn contains_tracked(&self, git_path: &[u8]) -> bool {
4869        self.tracked.contains(git_path)
4870    }
4871
4872    fn is_tracked_gitlink(&self, git_path: &[u8]) -> bool {
4873        self.gitlinks.contains(git_path)
4874    }
4875}
4876
4877struct StatusUntrackedWalk<'a, T: StatusTrackedLookup + ?Sized> {
4878    git_dir: &'a Path,
4879    tracked: &'a T,
4880    tracked_dirs: &'a HashSet<&'a [u8]>,
4881    ignores: &'a mut IgnoreMatcher,
4882    untracked_mode: StatusUntrackedMode,
4883}
4884
4885fn collect_status_untracked_paths<T: StatusTrackedLookup + ?Sized>(
4886    context: &mut StatusUntrackedWalk<'_, T>,
4887    dir: &Path,
4888    dir_git_path: &[u8],
4889    paths: &mut BTreeSet<Vec<u8>>,
4890) -> Result<()> {
4891    if is_same_path(dir, context.git_dir) {
4892        return Ok(());
4893    }
4894    let ignore_len = context.ignores.patterns.len();
4895    let entries = read_dir_entries_with_ignore_patterns(dir, dir_git_path, context.ignores)?;
4896    let result = (|| -> Result<()> {
4897        let mut git_path = dir_git_path.to_vec();
4898        for entry in entries {
4899            let file_name = entry.file_name();
4900            if file_name == std::ffi::OsStr::new(".git") {
4901                continue;
4902            }
4903            let file_type = entry.file_type()?;
4904            let is_dir = file_type.is_dir();
4905            let path_len = git_path_push_component(&mut git_path, &file_name);
4906            let entry_result = (|| -> Result<()> {
4907                if file_type.is_file() || file_type.is_symlink() {
4908                    if !context.tracked.contains_tracked(&git_path)
4909                        && !context.ignores.is_ignored(&git_path, false)
4910                    {
4911                        paths.insert(git_path.clone());
4912                    }
4913                    return Ok(());
4914                } else if is_dir {
4915                    if context.ignores.is_ignored(&git_path, true) {
4916                        return Ok(());
4917                    }
4918                    let path = entry.path();
4919                    if is_same_path(&path, context.git_dir) {
4920                        return Ok(());
4921                    }
4922                    if context.tracked.is_tracked_gitlink(&git_path) {
4923                        return Ok(());
4924                    }
4925                    match context.untracked_mode {
4926                        StatusUntrackedMode::All => {
4927                            if !context.tracked_dirs.contains(git_path.as_slice())
4928                                && is_nested_repository_boundary(&path)
4929                            {
4930                                insert_untracked_directory(paths, &git_path);
4931                            } else {
4932                                collect_status_untracked_paths(context, &path, &git_path, paths)?;
4933                            }
4934                        }
4935                        StatusUntrackedMode::Normal => {
4936                            if context.tracked_dirs.contains(git_path.as_slice()) {
4937                                collect_status_untracked_paths(context, &path, &git_path, paths)?;
4938                            } else if is_nested_repository_boundary(&path) {
4939                                insert_untracked_directory(paths, &git_path);
4940                            } else if status_untracked_directory_has_file(
4941                                context, &path, &git_path,
4942                            )? {
4943                                insert_untracked_directory(paths, &git_path);
4944                            }
4945                        }
4946                        StatusUntrackedMode::None => {}
4947                    }
4948                }
4949                Ok(())
4950            })();
4951            git_path.truncate(path_len);
4952            entry_result?;
4953        }
4954        Ok(())
4955    })();
4956    context.ignores.patterns.truncate(ignore_len);
4957    result
4958}
4959
4960fn stage0_tracked_directories(index: &Index) -> HashSet<&[u8]> {
4961    let mut directories = HashSet::new();
4962    for entry in index
4963        .entries
4964        .iter()
4965        .filter(|entry| entry.stage() == Stage::Normal)
4966    {
4967        let path = entry.path.as_bytes();
4968        for (idx, byte) in path.iter().enumerate() {
4969            if *byte == b'/' && idx > 0 {
4970                directories.insert(&path[..idx]);
4971            }
4972        }
4973    }
4974    directories
4975}
4976
4977fn status_untracked_directory_has_file<T: StatusTrackedLookup + ?Sized>(
4978    context: &mut StatusUntrackedWalk<'_, T>,
4979    dir: &Path,
4980    dir_git_path: &[u8],
4981) -> Result<bool> {
4982    if is_same_path(dir, context.git_dir) {
4983        return Ok(false);
4984    }
4985    let ignore_len = context.ignores.patterns.len();
4986    let entries = read_dir_entries_with_ignore_patterns(dir, dir_git_path, context.ignores)?;
4987    let result = (|| -> Result<bool> {
4988        let mut git_path = dir_git_path.to_vec();
4989        for entry in entries {
4990            let file_name = entry.file_name();
4991            if file_name == std::ffi::OsStr::new(".git") {
4992                continue;
4993            }
4994            let file_type = entry.file_type()?;
4995            let is_dir = file_type.is_dir();
4996            let path_len = git_path_push_component(&mut git_path, &file_name);
4997            let entry_result = (|| -> Result<Option<bool>> {
4998                if context.ignores.is_ignored(&git_path, is_dir) {
4999                    return Ok(None);
5000                }
5001                if file_type.is_file() || file_type.is_symlink() {
5002                    return Ok(Some(!context.tracked.contains_tracked(&git_path)));
5003                }
5004                if is_dir {
5005                    let path = entry.path();
5006                    if is_same_path(&path, context.git_dir) {
5007                        return Ok(None);
5008                    }
5009                    if is_nested_repository_boundary(&path) {
5010                        return Ok(Some(true));
5011                    }
5012                    if status_untracked_directory_has_file(context, &path, &git_path)? {
5013                        return Ok(Some(true));
5014                    }
5015                }
5016                Ok(None)
5017            })();
5018            git_path.truncate(path_len);
5019            if let Some(has_file) = entry_result? {
5020                return Ok(has_file);
5021            }
5022        }
5023        Ok(false)
5024    })();
5025    context.ignores.patterns.truncate(ignore_len);
5026    result
5027}
5028
5029fn read_dir_entries_with_ignore_patterns(
5030    dir: &Path,
5031    base: &[u8],
5032    matcher: &mut IgnoreMatcher,
5033) -> Result<Vec<fs::DirEntry>> {
5034    let mut entries = Vec::new();
5035    let mut ignore_path = None;
5036    for entry in fs::read_dir(dir)? {
5037        let entry = entry?;
5038        if entry.file_name() == std::ffi::OsStr::new(".gitignore") {
5039            ignore_path = Some(entry.path());
5040        }
5041        entries.push(entry);
5042    }
5043    if let Some(path) = ignore_path {
5044        let mut source = base.to_vec();
5045        if !source.is_empty() {
5046            source.push(b'/');
5047        }
5048        source.extend_from_slice(b".gitignore");
5049        read_ignore_patterns(path, &mut matcher.patterns, base, &source);
5050    }
5051    Ok(entries)
5052}
5053
5054fn untracked_normal_rollup_path(
5055    file_path: &[u8],
5056    index: &BTreeMap<Vec<u8>, TrackedEntry>,
5057    ignores: &IgnoreMatcher,
5058) -> Vec<u8> {
5059    let segments = file_path
5060        .split(|byte| *byte == b'/')
5061        .filter(|segment| !segment.is_empty())
5062        .collect::<Vec<_>>();
5063    if segments.len() <= 1 {
5064        return file_path.to_vec();
5065    }
5066    let mut prefix = Vec::new();
5067    for segment in &segments[..segments.len() - 1] {
5068        if !prefix.is_empty() {
5069            prefix.push(b'/');
5070        }
5071        prefix.extend_from_slice(segment);
5072        if index_has_path_under(index, &prefix) {
5073            break;
5074        }
5075        if !ignores.is_ignored(&prefix, true) {
5076            let mut directory = prefix;
5077            directory.push(b'/');
5078            return directory;
5079        }
5080    }
5081    file_path.to_vec()
5082}
5083
5084fn directory_has_file(
5085    dir: &Path,
5086    root: &Path,
5087    git_dir: &Path,
5088    ignores: &IgnoreMatcher,
5089) -> Result<bool> {
5090    if is_same_path(dir, git_dir) {
5091        return Ok(false);
5092    }
5093    for entry in fs::read_dir(dir)? {
5094        let entry = entry?;
5095        let path = entry.path();
5096        if is_dot_git_entry(&path) {
5097            continue;
5098        }
5099        if is_embedded_git_internals(root, &path) {
5100            continue;
5101        }
5102        if is_same_path(&path, git_dir) {
5103            continue;
5104        }
5105        let metadata = entry.metadata()?;
5106        let relative = path.strip_prefix(root).map_err(|_| {
5107            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
5108        })?;
5109        let git_path = git_path_bytes(relative)?;
5110        if ignores.is_ignored(&git_path, metadata.is_dir()) {
5111            continue;
5112        }
5113        if metadata.is_file() || metadata.file_type().is_symlink() {
5114            return Ok(true);
5115        }
5116        if metadata.is_dir() {
5117            if is_nested_repository_boundary(&path) {
5118                continue;
5119            }
5120            if directory_has_file(&path, root, git_dir, ignores)? {
5121                return Ok(true);
5122            }
5123        }
5124    }
5125    Ok(false)
5126}
5127
5128fn directory_has_ignored(
5129    dir: &Path,
5130    root: &Path,
5131    git_dir: &Path,
5132    ignores: &IgnoreMatcher,
5133) -> Result<bool> {
5134    if is_same_path(dir, git_dir) {
5135        return Ok(false);
5136    }
5137    for entry in fs::read_dir(dir)? {
5138        let entry = entry?;
5139        let path = entry.path();
5140        if is_dot_git_entry(&path) {
5141            continue;
5142        }
5143        if is_same_path(&path, git_dir) {
5144            continue;
5145        }
5146        let metadata = entry.metadata()?;
5147        let relative = path.strip_prefix(root).map_err(|_| {
5148            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
5149        })?;
5150        let git_path = git_path_bytes(relative)?;
5151        if ignores.is_ignored(&git_path, metadata.is_dir()) {
5152            return Ok(true);
5153        }
5154        if metadata.is_dir() && directory_has_ignored(&path, root, git_dir, ignores)? {
5155            return Ok(true);
5156        }
5157    }
5158    Ok(false)
5159}
5160
5161fn ignored_untracked_paths(
5162    root: &Path,
5163    git_dir: &Path,
5164    index: &BTreeMap<Vec<u8>, TrackedEntry>,
5165    ignores: &IgnoreMatcher,
5166    directory: bool,
5167) -> Result<Vec<Vec<u8>>> {
5168    let mut paths = BTreeSet::new();
5169    let context = IgnoredUntrackedContext {
5170        root,
5171        git_dir,
5172        index,
5173        ignores,
5174        directory,
5175    };
5176    collect_ignored_untracked_paths(&context, root, false, &mut paths)?;
5177    Ok(paths.into_iter().collect())
5178}
5179
5180fn ignored_traditional_path_is_empty_directory(root: &Path, path: &[u8]) -> Result<bool> {
5181    let Some(path) = path.strip_suffix(b"/") else {
5182        return Ok(false);
5183    };
5184    let mut absolute = PathBuf::new();
5185    set_worktree_path_from_repo_path(root, path, &mut absolute)?;
5186    match fs::read_dir(&absolute) {
5187        Ok(mut entries) => Ok(entries.next().is_none()),
5188        Err(err) if err.kind() == std::io::ErrorKind::NotADirectory => Ok(false),
5189        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
5190        Err(err) => Err(err.into()),
5191    }
5192}
5193
5194struct IgnoredUntrackedContext<'a> {
5195    root: &'a Path,
5196    git_dir: &'a Path,
5197    index: &'a BTreeMap<Vec<u8>, TrackedEntry>,
5198    ignores: &'a IgnoreMatcher,
5199    directory: bool,
5200}
5201
5202fn collect_ignored_untracked_paths(
5203    context: &IgnoredUntrackedContext<'_>,
5204    dir: &Path,
5205    parent_ignored: bool,
5206    paths: &mut BTreeSet<Vec<u8>>,
5207) -> Result<()> {
5208    if is_same_path(dir, context.git_dir) {
5209        return Ok(());
5210    }
5211    let mut entries = fs::read_dir(dir)?.collect::<std::result::Result<Vec<_>, _>>()?;
5212    entries.sort_by_key(|entry| entry.file_name());
5213    for entry in entries {
5214        let path = entry.path();
5215        if is_dot_git_entry(&path) {
5216            continue;
5217        }
5218        if is_same_path(&path, context.git_dir) {
5219            continue;
5220        }
5221        let metadata = entry.metadata()?;
5222        let relative = path.strip_prefix(context.root).map_err(|_| {
5223            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
5224        })?;
5225        let git_path = git_path_bytes(relative)?;
5226        if metadata.is_dir() {
5227            let ignored = parent_ignored || context.ignores.is_ignored(&git_path, true);
5228            if ignored && !index_has_path_under(context.index, &git_path) {
5229                if context.directory {
5230                    let mut directory_path = git_path;
5231                    directory_path.push(b'/');
5232                    paths.insert(directory_path);
5233                } else {
5234                    collect_ignored_untracked_paths(context, &path, true, paths)?;
5235                }
5236            } else {
5237                if is_nested_repository_boundary(&path) {
5238                    continue;
5239                }
5240                collect_ignored_untracked_paths(context, &path, ignored, paths)?;
5241            }
5242        } else if !context.index.contains_key(&git_path)
5243            && (metadata.is_file() || metadata.file_type().is_symlink())
5244            && (parent_ignored || context.ignores.is_ignored(&git_path, false))
5245        {
5246            paths.insert(git_path);
5247        }
5248    }
5249    Ok(())
5250}
5251
5252#[derive(Debug, Default)]
5253struct IgnoreMatcher {
5254    patterns: Vec<IgnorePattern>,
5255}
5256
5257#[derive(Debug)]
5258struct IgnorePattern {
5259    base: Vec<u8>,
5260    pattern: Vec<u8>,
5261    original: Vec<u8>,
5262    source: Vec<u8>,
5263    line_number: usize,
5264    negated: bool,
5265    directory_only: bool,
5266    anchored: bool,
5267    has_slash: bool,
5268    /// How `pattern` should be matched against a slash-free segment. Most
5269    /// `.gitignore` entries are literals or simple `*.ext` / `prefix*` globs, all
5270    /// of which match without the allocating wildcard DP engine; only genuinely
5271    /// complex globs fall through to [`wildcard_path_matches`].
5272    match_kind: MatchKind,
5273}
5274
5275/// Classification of an [`IgnorePattern`] that lets common shapes skip the
5276/// general wildcard matcher. Every variant matches a *slash-free* segment
5277/// (basename or path component); patterns containing `/` are always
5278/// [`MatchKind::Glob`] so they only ever reach the full engine.
5279#[derive(Debug)]
5280enum MatchKind {
5281    /// No metacharacters: matches by byte equality.
5282    Literal,
5283    /// `*X` with `X` literal: matches a segment ending in `X`.
5284    Suffix,
5285    /// `X*` with `X` literal: matches a segment starting with `X`.
5286    Prefix,
5287    /// Anything else: defer to [`wildcard_path_matches`].
5288    Glob,
5289}
5290
5291/// Classify `pattern` for [`MatchKind`]. `*X`/`X*` fast paths require the literal
5292/// part to be slash-free so that `ends_with`/`starts_with` on a single segment is
5293/// exactly equivalent to the glob (`*` never crosses `/`).
5294fn classify_ignore_pattern(pattern: &[u8]) -> MatchKind {
5295    let stars = pattern.iter().filter(|byte| **byte == b'*').count();
5296    let other_meta = pattern
5297        .iter()
5298        .any(|byte| matches!(byte, b'?' | b'[' | b'\\'));
5299    if stars == 0 && !other_meta {
5300        return MatchKind::Literal;
5301    }
5302    if stars == 1 && !other_meta {
5303        let literal = if pattern.first() == Some(&b'*') {
5304            Some((&pattern[1..], MatchKind::Suffix))
5305        } else if pattern.last() == Some(&b'*') {
5306            Some((&pattern[..pattern.len() - 1], MatchKind::Prefix))
5307        } else {
5308            None
5309        };
5310        if let Some((literal, kind)) = literal
5311            && !literal.is_empty()
5312            && !literal.contains(&b'/')
5313        {
5314            return kind;
5315        }
5316    }
5317    MatchKind::Glob
5318}
5319
5320impl IgnoreMatcher {
5321    fn from_sources(
5322        root: &Path,
5323        exclude_standard: bool,
5324        patterns: &[Vec<u8>],
5325        per_directory: &[String],
5326    ) -> Result<Self> {
5327        let mut matcher = if exclude_standard {
5328            Self::from_worktree_root(root)?
5329        } else {
5330            Self::default()
5331        };
5332        matcher.extend_patterns(patterns);
5333        matcher.extend_per_directory_patterns(root, per_directory)?;
5334        Ok(matcher)
5335    }
5336
5337    /// Builds only the repository-wide ignore sources — `core.excludesFile` (or the
5338    /// default global) and `$GIT_DIR/info/exclude` — *without* walking the worktree
5339    /// for `.gitignore`. The caller folds each directory's `.gitignore` into the
5340    /// matcher as it descends (see [`read_dir_ignore_patterns`]), so status reads
5341    /// the tree exactly once instead of doing a separate full-tree ignore pass.
5342    fn from_worktree_base(root: &Path) -> Result<Self> {
5343        let mut patterns = Vec::new();
5344        read_ignore_patterns(
5345            root.join(".git").join("info").join("exclude"),
5346            &mut patterns,
5347            &[],
5348            b".git/info/exclude",
5349        );
5350        if !read_core_excludes_file(root, &mut patterns) {
5351            read_default_global_excludes_file(&mut patterns);
5352        }
5353        Ok(Self { patterns })
5354    }
5355
5356    fn from_worktree_root(root: &Path) -> Result<Self> {
5357        let mut patterns = Vec::new();
5358        read_ignore_patterns(
5359            root.join(".git").join("info").join("exclude"),
5360            &mut patterns,
5361            &[],
5362            b".git/info/exclude",
5363        );
5364        if !read_core_excludes_file(root, &mut patterns) {
5365            read_default_global_excludes_file(&mut patterns);
5366        }
5367        collect_per_directory_patterns(root, root, &[String::from(".gitignore")], &mut patterns)?;
5368        Ok(Self { patterns })
5369    }
5370
5371    fn extend_patterns(&mut self, patterns: &[Vec<u8>]) {
5372        for pattern in patterns {
5373            push_ignore_pattern(&mut self.patterns, pattern, &[], &[], 0);
5374        }
5375    }
5376
5377    fn extend_per_directory_patterns(&mut self, root: &Path, names: &[String]) -> Result<()> {
5378        if names.is_empty() {
5379            return Ok(());
5380        }
5381        collect_per_directory_patterns(root, root, names, &mut self.patterns)
5382    }
5383
5384    fn is_ignored(&self, path: &[u8], is_dir: bool) -> bool {
5385        let basename = path.rsplit(|byte| *byte == b'/').next().unwrap_or(path);
5386        for pattern in self.patterns.iter().rev() {
5387            if pattern.matches_with_basename(path, basename, is_dir) {
5388                return !pattern.negated;
5389            }
5390        }
5391        false
5392    }
5393
5394    fn match_for(&self, path: &[u8], is_dir: bool) -> Option<&IgnorePattern> {
5395        let basename = path.rsplit(|byte| *byte == b'/').next().unwrap_or(path);
5396        for pattern in self.patterns.iter().rev() {
5397            if pattern.matches_with_basename(path, basename, is_dir) {
5398                return Some(pattern);
5399            }
5400        }
5401        None
5402    }
5403}
5404
5405/// Decides whether a worktree path is included by a [`SparseCheckout`].
5406///
5407/// In [`SparseCheckoutMode::Full`] the sparse patterns are compiled with the
5408/// same `.gitignore` grammar used elsewhere in this crate ([`IgnorePattern`]);
5409/// a path is *in cone* when the last matching pattern is positive. In
5410/// [`SparseCheckoutMode::Cone`] the patterns are reduced to a set of recursive
5411/// directory prefixes plus a flag for whether top-level files are kept, and
5412/// inclusion is decided by literal prefix containment.
5413#[derive(Debug)]
5414enum SparseMatcher {
5415    Full { patterns: Vec<IgnorePattern> },
5416    Cone(ConeMatcher),
5417}
5418
5419#[derive(Debug, Default)]
5420struct ConeMatcher {
5421    /// `true` when files directly at the repository root are in cone (`/*`).
5422    root_files: bool,
5423    /// Directory prefixes (without leading or trailing `/`) whose entire
5424    /// subtree is in cone, e.g. `dir1/dir2`.
5425    recursive_dirs: Vec<Vec<u8>>,
5426    /// Parent directories that are in cone only for their direct files
5427    /// (the `/dir/*` guard Git emits so intermediate directories keep their
5428    /// own files). Stored without leading or trailing `/`.
5429    parent_dirs: Vec<Vec<u8>>,
5430}
5431
5432impl SparseMatcher {
5433    fn new(sparse: &SparseCheckout, mode: SparseCheckoutMode) -> Self {
5434        let resolved = match mode {
5435            SparseCheckoutMode::Auto => {
5436                if patterns_are_cone(&sparse.patterns) {
5437                    SparseCheckoutMode::Cone
5438                } else {
5439                    SparseCheckoutMode::Full
5440                }
5441            }
5442            other => other,
5443        };
5444        match resolved {
5445            SparseCheckoutMode::Cone => SparseMatcher::Cone(ConeMatcher::compile(&sparse.patterns)),
5446            // `Auto` has been resolved above; everything else is full matching.
5447            _ => {
5448                let mut patterns = Vec::new();
5449                for pattern in &sparse.patterns {
5450                    push_ignore_pattern(&mut patterns, pattern, &[], b"sparse-checkout", 0);
5451                }
5452                SparseMatcher::Full { patterns }
5453            }
5454        }
5455    }
5456
5457    /// Returns `true` when the given file path should be present in the
5458    /// worktree under this sparse specification.
5459    fn includes_file(&self, path: &[u8]) -> bool {
5460        match self {
5461            SparseMatcher::Full { patterns } => {
5462                let mut included = false;
5463                for pattern in patterns {
5464                    if pattern.matches(path, false) {
5465                        included = !pattern.negated;
5466                    }
5467                }
5468                included
5469            }
5470            SparseMatcher::Cone(cone) => cone.includes_file(path),
5471        }
5472    }
5473}
5474
5475impl ConeMatcher {
5476    fn compile(patterns: &[Vec<u8>]) -> Self {
5477        let mut matcher = ConeMatcher::default();
5478        for raw in patterns {
5479            let line = sparse_clean_line(raw);
5480            if line.is_empty() || line.starts_with(b"#") {
5481                continue;
5482            }
5483            // Negated guards such as `!/*/` and `!/dir/*/` only exist to stop a
5484            // recursive match from pulling in nested directories; the positive
5485            // patterns already capture the cone, so we ignore the negations.
5486            if line.starts_with(b"!") {
5487                continue;
5488            }
5489            if line == b"/*" {
5490                matcher.root_files = true;
5491                continue;
5492            }
5493            // `/dir/` -> recursive subtree.
5494            if let Some(rest) = line.strip_prefix(b"/")
5495                && let Some(dir) = rest.strip_suffix(b"/")
5496                && !dir.is_empty()
5497            {
5498                matcher.recursive_dirs.push(dir.to_vec());
5499                continue;
5500            }
5501            // `/dir/*` -> direct files of `dir` only (parent guard).
5502            if let Some(rest) = line.strip_prefix(b"/")
5503                && let Some(dir) = rest.strip_suffix(b"/*")
5504                && !dir.is_empty()
5505            {
5506                matcher.parent_dirs.push(dir.to_vec());
5507                continue;
5508            }
5509        }
5510        matcher
5511    }
5512
5513    fn includes_file(&self, path: &[u8]) -> bool {
5514        let parent = match path.iter().rposition(|byte| *byte == b'/') {
5515            Some(index) => &path[..index],
5516            None => {
5517                // A path with no slash is a top-level file.
5518                return self.root_files;
5519            }
5520        };
5521        if self
5522            .recursive_dirs
5523            .iter()
5524            .any(|dir| path_is_under_dir(path, dir))
5525        {
5526            return true;
5527        }
5528        self.parent_dirs.iter().any(|dir| dir.as_slice() == parent)
5529    }
5530}
5531
5532/// Strips a CR, leading/trailing whitespace, and an optional trailing slash is
5533/// preserved (cone patterns are slash sensitive) from a raw sparse line.
5534fn sparse_clean_line(raw: &[u8]) -> &[u8] {
5535    let line = raw.strip_suffix(b"\r").unwrap_or(raw);
5536    trim_ascii_whitespace(line)
5537}
5538
5539/// Returns `true` when `path` is the directory `dir` itself or lives anywhere
5540/// beneath it.
5541fn path_is_under_dir(path: &[u8], dir: &[u8]) -> bool {
5542    if dir.is_empty() {
5543        return true;
5544    }
5545    path.strip_prefix(dir)
5546        .is_some_and(|rest| rest.first() == Some(&b'/'))
5547}
5548
5549/// Heuristic used by [`SparseCheckoutMode::Auto`]: the pattern set is cone
5550/// shaped when every (non-comment, non-blank) line is one of the restricted
5551/// cone forms Git emits.
5552fn patterns_are_cone(patterns: &[Vec<u8>]) -> bool {
5553    let mut saw_pattern = false;
5554    for raw in patterns {
5555        let line = sparse_clean_line(raw);
5556        if line.is_empty() || line.starts_with(b"#") {
5557            continue;
5558        }
5559        saw_pattern = true;
5560        let body = line.strip_prefix(b"!").unwrap_or(line);
5561        let is_cone_shaped = body == b"/*"
5562            || body == b"/*/"
5563            || (body.starts_with(b"/")
5564                && (body.ends_with(b"/") || body.ends_with(b"/*"))
5565                && !sparse_has_glob_meta(body));
5566        if !is_cone_shaped {
5567            return false;
5568        }
5569    }
5570    saw_pattern
5571}
5572
5573/// Detects glob metacharacters that disqualify a line from cone interpretation.
5574/// A single trailing `/*` is allowed by the caller and handled separately.
5575fn sparse_has_glob_meta(body: &[u8]) -> bool {
5576    let trimmed = body.strip_suffix(b"/*").unwrap_or(body);
5577    trimmed
5578        .iter()
5579        .any(|byte| matches!(byte, b'*' | b'?' | b'[' | b']' | b'\\'))
5580}
5581
5582fn read_core_excludes_file(root: &Path, patterns: &mut Vec<IgnorePattern>) -> bool {
5583    let Ok(config) = sley_config::read_repo_config(&root.join(".git"), None) else {
5584        return false;
5585    };
5586    let Some(value) = config.get("core", None, "excludesFile") else {
5587        return false;
5588    };
5589    let path = expand_core_excludes_file(root, value);
5590    read_ignore_patterns(path, patterns, &[], value.as_bytes());
5591    true
5592}
5593
5594fn expand_core_excludes_file(root: &Path, value: &str) -> PathBuf {
5595    let path = Path::new(value);
5596    if path.is_absolute() {
5597        return path.to_path_buf();
5598    }
5599    if let Some(rest) = value.strip_prefix("~/")
5600        && let Some(home) = std::env::var_os("HOME")
5601    {
5602        return PathBuf::from(home).join(rest);
5603    }
5604    root.join(path)
5605}
5606
5607fn read_default_global_excludes_file(patterns: &mut Vec<IgnorePattern>) {
5608    if let Some(config_home) = std::env::var_os("XDG_CONFIG_HOME")
5609        && !config_home.is_empty()
5610    {
5611        let path = PathBuf::from(config_home).join("git").join("ignore");
5612        let source = path.to_string_lossy().into_owned();
5613        read_ignore_patterns(path, patterns, &[], source.as_bytes());
5614        return;
5615    }
5616    if let Some(home) = std::env::var_os("HOME") {
5617        let path = PathBuf::from(home)
5618            .join(".config")
5619            .join("git")
5620            .join("ignore");
5621        let source = path.to_string_lossy().into_owned();
5622        read_ignore_patterns(path, patterns, &[], source.as_bytes());
5623    }
5624}
5625
5626fn collect_per_directory_patterns(
5627    root: &Path,
5628    dir: &Path,
5629    names: &[String],
5630    patterns: &mut Vec<IgnorePattern>,
5631) -> Result<()> {
5632    let mut entries = fs::read_dir(dir)?.collect::<std::result::Result<Vec<_>, _>>()?;
5633    entries.sort_by_key(|entry| entry.file_name());
5634    for entry in entries {
5635        let path = entry.path();
5636        if path.file_name().and_then(|name| name.to_str()) == Some(".git") {
5637            continue;
5638        }
5639        let metadata = entry.metadata()?;
5640        if metadata.is_dir() {
5641            collect_per_directory_patterns(root, &path, names, patterns)?;
5642            continue;
5643        }
5644        if !metadata.is_file() {
5645            continue;
5646        }
5647        let Some(file_name) = path.file_name().and_then(|name| name.to_str()) else {
5648            continue;
5649        };
5650        if !names.iter().any(|name| name == file_name) {
5651            continue;
5652        }
5653        let parent = path.parent().unwrap_or(root);
5654        let relative = parent.strip_prefix(root).map_err(|_| {
5655            GitError::InvalidPath(format!("path {} is outside worktree", parent.display()))
5656        })?;
5657        let base = git_path_bytes(relative)?;
5658        let mut source = base.clone();
5659        if !source.is_empty() {
5660            source.push(b'/');
5661        }
5662        source.extend_from_slice(file_name.as_bytes());
5663        read_ignore_patterns(&path, patterns, &base, &source);
5664    }
5665    Ok(())
5666}
5667
5668fn read_ignore_patterns(
5669    path: impl AsRef<Path>,
5670    patterns: &mut Vec<IgnorePattern>,
5671    base: &[u8],
5672    source: &[u8],
5673) {
5674    let Ok(contents) = fs::read(path) else {
5675        return;
5676    };
5677    for (line, raw) in contents.split(|byte| *byte == b'\n').enumerate() {
5678        push_ignore_pattern(patterns, raw, base, source, line + 1);
5679    }
5680}
5681
5682fn push_ignore_pattern(
5683    patterns: &mut Vec<IgnorePattern>,
5684    raw: &[u8],
5685    base: &[u8],
5686    source: &[u8],
5687    line_number: usize,
5688) {
5689    let mut line = raw.strip_suffix(b"\r").unwrap_or(raw).to_vec();
5690    normalize_ignore_trailing_spaces(&mut line);
5691    let original = line.clone();
5692    let mut line = line.as_slice();
5693    if line.is_empty() || line.starts_with(b"#") {
5694        return;
5695    }
5696    let negated = if line.starts_with(b"\\#") || line.starts_with(b"\\!") {
5697        line = &line[1..];
5698        false
5699    } else if let Some(pattern) = line.strip_prefix(b"!") {
5700        line = pattern;
5701        true
5702    } else {
5703        false
5704    };
5705    let directory_only = line.ends_with(b"/");
5706    let pattern = if directory_only {
5707        line.strip_suffix(b"/").unwrap_or(line)
5708    } else {
5709        line
5710    };
5711    let (anchored, pattern) = if let Some(pattern) = pattern.strip_prefix(b"/") {
5712        (true, pattern)
5713    } else {
5714        (false, pattern)
5715    };
5716    // A leading `**/` followed by a slash-free segment is, per gitignore,
5717    // identical to the bare segment ("match in all directories"): `**/Pods` ≡
5718    // `Pods`, `**/*.jks` ≡ `*.jks`. Collapse it so the pattern matches the
5719    // basename directly (a literal/suffix compare) instead of paying for the
5720    // `**` wildcard engine on the full path — verified against `git check-ignore`.
5721    let pattern = match pattern.strip_prefix(b"**/") {
5722        Some(rest) if !rest.is_empty() && !rest.contains(&b'/') => rest,
5723        _ => pattern,
5724    };
5725    if pattern.is_empty() {
5726        return;
5727    }
5728    patterns.push(IgnorePattern {
5729        base: base.to_vec(),
5730        pattern: pattern.to_vec(),
5731        original,
5732        source: source.to_vec(),
5733        line_number,
5734        negated,
5735        directory_only,
5736        anchored,
5737        has_slash: pattern.contains(&b'/'),
5738        match_kind: classify_ignore_pattern(pattern),
5739    });
5740}
5741
5742fn normalize_ignore_trailing_spaces(line: &mut Vec<u8>) {
5743    while line.last() == Some(&b' ') {
5744        let space_index = line.len() - 1;
5745        let backslashes = line[..space_index]
5746            .iter()
5747            .rev()
5748            .take_while(|byte| **byte == b'\\')
5749            .count();
5750        if backslashes % 2 == 1 {
5751            line.remove(space_index - 1);
5752            break;
5753        }
5754        line.pop();
5755    }
5756}
5757
5758impl IgnorePattern {
5759    fn to_match(&self) -> IgnoreMatch {
5760        IgnoreMatch {
5761            source: self.source.clone(),
5762            line_number: self.line_number,
5763            pattern: self.original.clone(),
5764            ignored: !self.negated,
5765        }
5766    }
5767
5768    fn matches(&self, path: &[u8], is_dir: bool) -> bool {
5769        let basename = path.rsplit(|byte| *byte == b'/').next().unwrap_or(path);
5770        self.matches_with_basename(path, basename, is_dir)
5771    }
5772
5773    fn matches_with_basename(&self, path: &[u8], basename: &[u8], is_dir: bool) -> bool {
5774        let path = if self.base.is_empty() {
5775            path
5776        } else {
5777            let Some(rest) = path
5778                .strip_prefix(self.base.as_slice())
5779                .and_then(|rest| rest.strip_prefix(b"/"))
5780            else {
5781                return false;
5782            };
5783            rest
5784        };
5785        if self.directory_only {
5786            return self.matches_directory(path, is_dir);
5787        }
5788        if self.anchored || self.has_slash {
5789            return self.match_segment(path);
5790        }
5791        self.match_segment(basename)
5792    }
5793
5794    fn matches_directory(&self, path: &[u8], is_dir: bool) -> bool {
5795        if self.anchored || self.has_slash {
5796            if is_dir && self.match_path(path) {
5797                return true;
5798            }
5799            // For a *file* path, a directory-only pattern can only apply
5800            // through an *ancestor* directory of the file: the leaf is matched
5801            // only because it lives inside a directory the pattern excludes
5802            // (e.g. `/tmp-*/` excludes `tmp-info-only`, so `tmp-info-only/x`
5803            // is excluded too). Upstream git models this through directory
5804            // traversal — `last_matching_pattern` skips a MUSTBEDIR pattern for
5805            // a non-directory leaf (`dtype != DT_DIR`), and a file is excluded
5806            // only when one of its parent directories is excluded.
5807            //
5808            // A *negated* directory-only pattern (`!data/**/`) re-includes a
5809            // directory but, per git, does NOT re-include the files inside it
5810            // (git's docs: "it is not possible to re-include a file if a parent
5811            // directory of that file is excluded" — re-including the dir with
5812            // `!dir/` still requires an explicit `!dir/*` to reach its files).
5813            // So a negated directory-only pattern must never match a file via
5814            // its ancestor, otherwise it wrongly wins the leaf scan and
5815            // un-ignores a file that an earlier positive pattern ignored
5816            // (t0008-ignores "directories and ** matches": `data/**` +
5817            // `!data/**/` must leave `data/data1/file1` ignored).
5818            if self.negated {
5819                return false;
5820            }
5821            return path
5822                .iter()
5823                .enumerate()
5824                .any(|(idx, byte)| *byte == b'/' && self.match_path(&path[..idx]));
5825        }
5826        let mut components = path.split(|byte| *byte == b'/').peekable();
5827        while let Some(component) = components.next() {
5828            if self.match_segment(component) && (is_dir || components.peek().is_some()) {
5829                return true;
5830            }
5831        }
5832        false
5833    }
5834
5835    fn match_path(&self, value: &[u8]) -> bool {
5836        match self.match_kind {
5837            MatchKind::Literal => self.pattern == value,
5838            MatchKind::Suffix => !value.contains(&b'/') && value.ends_with(&self.pattern[1..]),
5839            MatchKind::Prefix => {
5840                !value.contains(&b'/') && value.starts_with(&self.pattern[..self.pattern.len() - 1])
5841            }
5842            MatchKind::Glob => wildcard_path_matches(&self.pattern, value),
5843        }
5844    }
5845
5846    /// Match a slash-free `value` (a basename or path component) against this
5847    /// pattern. Literal and simple `*X`/`X*` patterns resolve with a direct
5848    /// comparison; only complex globs pay for the allocating wildcard engine.
5849    fn match_segment(&self, value: &[u8]) -> bool {
5850        self.match_path(value)
5851    }
5852}
5853
5854thread_local! {
5855    /// Reused dynamic-programming scratch for [`wildcard_path_matches`]. Flat
5856    /// `(pattern.len()+1) * (value.len()+1)` grid of memoised results, kept across
5857    /// calls so the hot ignore/attribute matching loop never reallocates.
5858    static WILDCARD_MEMO: RefCell<Vec<Option<bool>>> = const { RefCell::new(Vec::new()) };
5859}
5860
5861fn wildcard_path_matches(pattern: &[u8], value: &[u8]) -> bool {
5862    let stride = value.len() + 1;
5863    let cells = (pattern.len() + 1) * stride;
5864    WILDCARD_MEMO.with_borrow_mut(|memo| {
5865        // One reused allocation; clearing then resizing fills the grid with `None`.
5866        memo.clear();
5867        memo.resize(cells, None);
5868        wildcard_path_matches_from(pattern, value, 0, 0, memo, stride)
5869    })
5870}
5871
5872fn wildcard_path_matches_from(
5873    pattern: &[u8],
5874    value: &[u8],
5875    pattern_index: usize,
5876    value_index: usize,
5877    memo: &mut [Option<bool>],
5878    stride: usize,
5879) -> bool {
5880    let cell = pattern_index * stride + value_index;
5881    if let Some(cached) = memo[cell] {
5882        return cached;
5883    }
5884    let matched = if pattern_index == pattern.len() {
5885        value_index == value.len()
5886    } else {
5887        match pattern[pattern_index] {
5888            b'*' if pattern.get(pattern_index + 1) == Some(&b'*') => wildcard_double_star_matches(
5889                pattern,
5890                value,
5891                pattern_index,
5892                value_index,
5893                memo,
5894                stride,
5895            ),
5896            b'*' => {
5897                if wildcard_path_matches_from(
5898                    pattern,
5899                    value,
5900                    pattern_index + 1,
5901                    value_index,
5902                    memo,
5903                    stride,
5904                ) {
5905                    true
5906                } else {
5907                    let mut next = value_index;
5908                    while next < value.len() && value[next] != b'/' {
5909                        next += 1;
5910                        if wildcard_path_matches_from(
5911                            pattern,
5912                            value,
5913                            pattern_index + 1,
5914                            next,
5915                            memo,
5916                            stride,
5917                        ) {
5918                            return true;
5919                        }
5920                    }
5921                    false
5922                }
5923            }
5924            b'?' => {
5925                value_index < value.len()
5926                    && value[value_index] != b'/'
5927                    && wildcard_path_matches_from(
5928                        pattern,
5929                        value,
5930                        pattern_index + 1,
5931                        value_index + 1,
5932                        memo,
5933                        stride,
5934                    )
5935            }
5936            b'[' => {
5937                if value_index < value.len() && value[value_index] != b'/' {
5938                    if let Some((class_matches, next_pattern_index)) =
5939                        wildcard_class_matches(pattern, pattern_index, value[value_index])
5940                    {
5941                        class_matches
5942                            && wildcard_path_matches_from(
5943                                pattern,
5944                                value,
5945                                next_pattern_index,
5946                                value_index + 1,
5947                                memo,
5948                                stride,
5949                            )
5950                    } else {
5951                        value[value_index] == b'['
5952                            && wildcard_path_matches_from(
5953                                pattern,
5954                                value,
5955                                pattern_index + 1,
5956                                value_index + 1,
5957                                memo,
5958                                stride,
5959                            )
5960                    }
5961                } else {
5962                    false
5963                }
5964            }
5965            b'\\' if pattern_index + 1 < pattern.len() => {
5966                value_index < value.len()
5967                    && pattern[pattern_index + 1] == value[value_index]
5968                    && wildcard_path_matches_from(
5969                        pattern,
5970                        value,
5971                        pattern_index + 2,
5972                        value_index + 1,
5973                        memo,
5974                        stride,
5975                    )
5976            }
5977            literal => {
5978                value_index < value.len()
5979                    && literal == value[value_index]
5980                    && wildcard_path_matches_from(
5981                        pattern,
5982                        value,
5983                        pattern_index + 1,
5984                        value_index + 1,
5985                        memo,
5986                        stride,
5987                    )
5988            }
5989        }
5990    };
5991    memo[cell] = Some(matched);
5992    matched
5993}
5994
5995fn wildcard_double_star_matches(
5996    pattern: &[u8],
5997    value: &[u8],
5998    pattern_index: usize,
5999    value_index: usize,
6000    memo: &mut [Option<bool>],
6001    stride: usize,
6002) -> bool {
6003    let after_stars = pattern_index + 2;
6004    if pattern.get(after_stars) == Some(&b'/') {
6005        if wildcard_path_matches_from(pattern, value, after_stars + 1, value_index, memo, stride) {
6006            return true;
6007        }
6008        for next in value_index..value.len() {
6009            if value[next] == b'/'
6010                && wildcard_path_matches_from(
6011                    pattern,
6012                    value,
6013                    after_stars + 1,
6014                    next + 1,
6015                    memo,
6016                    stride,
6017                )
6018            {
6019                return true;
6020            }
6021        }
6022        return false;
6023    }
6024    for next in value_index..=value.len() {
6025        if wildcard_path_matches_from(pattern, value, after_stars, next, memo, stride) {
6026            return true;
6027        }
6028    }
6029    false
6030}
6031
6032fn wildcard_class_matches(pattern: &[u8], start: usize, value: u8) -> Option<(bool, usize)> {
6033    let mut index = start + 1;
6034    let negated = matches!(pattern.get(index), Some(b'!' | b'^'));
6035    if negated {
6036        index += 1;
6037    }
6038    let class_start = index;
6039    let end = pattern[class_start..]
6040        .iter()
6041        .position(|byte| *byte == b']')
6042        .map(|position| class_start + position)?;
6043    if end == class_start {
6044        return None;
6045    }
6046    let mut matched = false;
6047    while index < end {
6048        if index + 2 < end && pattern[index + 1] == b'-' {
6049            let lower = pattern[index].min(pattern[index + 2]);
6050            let upper = pattern[index].max(pattern[index + 2]);
6051            matched |= lower <= value && value <= upper;
6052            index += 3;
6053        } else {
6054            matched |= pattern[index] == value;
6055            index += 1;
6056        }
6057    }
6058    Some((if negated { !matched } else { matched }, end + 1))
6059}
6060
6061#[derive(Debug, Default)]
6062struct AttributeMatcher {
6063    patterns: Vec<AttributePattern>,
6064    attribute_order: BTreeMap<Vec<u8>, usize>,
6065    macros: BTreeMap<Vec<u8>, Vec<AttributeAssignment>>,
6066}
6067
6068#[derive(Debug)]
6069struct AttributePattern {
6070    base: Vec<u8>,
6071    pattern: Vec<u8>,
6072    anchored: bool,
6073    has_slash: bool,
6074    assignments: Vec<AttributeAssignment>,
6075}
6076
6077#[derive(Debug, Clone, PartialEq, Eq)]
6078struct AttributeAssignment {
6079    attribute: Vec<u8>,
6080    state: Option<AttributeState>,
6081}
6082
6083impl AttributeMatcher {
6084    fn from_worktree_root(root: &Path) -> Result<Self> {
6085        let mut matcher = Self::default();
6086        if !matcher.read_configured_attributes(root) {
6087            matcher.read_default_global_attributes();
6088        }
6089        collect_attribute_patterns(root, root, &mut matcher)?;
6090        read_attribute_patterns(
6091            root.join(".git").join("info").join("attributes"),
6092            &mut matcher,
6093            &[],
6094            b".git/info/attributes",
6095        );
6096        Ok(matcher)
6097    }
6098
6099    /// Builds only the repository-wide attribute sources — `core.attributesFile`
6100    /// (or the default global) and `$GIT_DIR/info/attributes` — *without* walking
6101    /// the worktree for `.gitattributes`. The caller is expected to fold each
6102    /// directory's `.gitattributes` into the matcher as it descends (see
6103    /// [`read_dir_attribute_patterns`]), so status/diff read the tree exactly once
6104    /// instead of doing a separate full-tree attribute pass. Lower-priority sources
6105    /// are added first, so in-tree patterns added during the walk take precedence —
6106    /// matching git's lookup order.
6107    fn from_worktree_base(root: &Path) -> Self {
6108        let mut matcher = Self::default();
6109        if !matcher.read_configured_attributes(root) {
6110            matcher.read_default_global_attributes();
6111        }
6112        read_attribute_patterns(
6113            root.join(".git").join("info").join("attributes"),
6114            &mut matcher,
6115            &[],
6116            b".git/info/attributes",
6117        );
6118        matcher
6119    }
6120
6121    fn attributes_for_path(
6122        &self,
6123        path: &[u8],
6124        requested: &[Vec<u8>],
6125        all: bool,
6126    ) -> Vec<AttributeCheck> {
6127        let mut states = BTreeMap::<Vec<u8>, Option<AttributeState>>::new();
6128        for pattern in &self.patterns {
6129            if !pattern.matches(path) {
6130                continue;
6131            }
6132            for assignment in &pattern.assignments {
6133                states.insert(assignment.attribute.clone(), assignment.state.clone());
6134            }
6135        }
6136        if all {
6137            let mut checks = states
6138                .into_iter()
6139                .filter_map(|(attribute, state)| {
6140                    state.map(|state| AttributeCheck {
6141                        attribute,
6142                        state: Some(state),
6143                    })
6144                })
6145                .collect::<Vec<_>>();
6146            checks.sort_by(|left, right| {
6147                attribute_all_rank(&left.attribute, &self.attribute_order)
6148                    .cmp(&attribute_all_rank(&right.attribute, &self.attribute_order))
6149                    .then_with(|| left.attribute.cmp(&right.attribute))
6150            });
6151            return checks;
6152        }
6153        requested
6154            .iter()
6155            .map(|attribute| AttributeCheck {
6156                attribute: attribute.clone(),
6157                state: states.get(attribute).cloned().flatten(),
6158            })
6159            .collect()
6160    }
6161
6162    fn push_attribute_order(&mut self, attribute: &[u8]) {
6163        let next = self.attribute_order.len();
6164        self.attribute_order
6165            .entry(attribute.to_vec())
6166            .or_insert(next);
6167    }
6168
6169    fn read_configured_attributes(&mut self, root: &Path) -> bool {
6170        let Ok(config) = sley_config::read_repo_config(&root.join(".git"), None) else {
6171            return false;
6172        };
6173        let Some(value) = config.get("core", None, "attributesFile") else {
6174            return false;
6175        };
6176        let path = expand_core_excludes_file(root, value);
6177        read_attribute_patterns(path, self, &[], value.as_bytes());
6178        true
6179    }
6180
6181    fn read_default_global_attributes(&mut self) {
6182        if let Some(config_home) = std::env::var_os("XDG_CONFIG_HOME")
6183            && !config_home.is_empty()
6184        {
6185            let path = PathBuf::from(config_home).join("git").join("attributes");
6186            let source = path.to_string_lossy().into_owned();
6187            read_attribute_patterns(path, self, &[], source.as_bytes());
6188            return;
6189        }
6190        if let Some(home) = std::env::var_os("HOME") {
6191            let path = PathBuf::from(home)
6192                .join(".config")
6193                .join("git")
6194                .join("attributes");
6195            let source = path.to_string_lossy().into_owned();
6196            read_attribute_patterns(path, self, &[], source.as_bytes());
6197        }
6198    }
6199}
6200
6201fn read_dir_ignore_patterns_for_base(
6202    dir: &Path,
6203    base: &[u8],
6204    matcher: &mut IgnoreMatcher,
6205) -> Result<()> {
6206    let mut source = base.to_vec();
6207    if !source.is_empty() {
6208        source.push(b'/');
6209    }
6210    source.extend_from_slice(b".gitignore");
6211    read_ignore_patterns(dir.join(".gitignore"), &mut matcher.patterns, base, &source);
6212    Ok(())
6213}
6214
6215/// Fold `dir`'s `.gitattributes` (if any) into `matcher`, scoped to `dir`'s path
6216/// within `root`. Used both by the eager full-tree pass and by the status/diff
6217/// worktree walk as it descends, so the tree is read for attributes exactly once.
6218fn read_dir_attribute_patterns(
6219    root: &Path,
6220    dir: &Path,
6221    matcher: &mut AttributeMatcher,
6222) -> Result<()> {
6223    let relative = dir.strip_prefix(root).map_err(|_| {
6224        GitError::InvalidPath(format!("path {} is outside worktree", dir.display()))
6225    })?;
6226    let base = git_path_bytes(relative)?;
6227    read_dir_attribute_patterns_for_base(dir, &base, matcher)
6228}
6229
6230fn read_dir_attribute_patterns_for_base(
6231    dir: &Path,
6232    base: &[u8],
6233    matcher: &mut AttributeMatcher,
6234) -> Result<()> {
6235    let mut source = base.to_vec();
6236    if !source.is_empty() {
6237        source.push(b'/');
6238    }
6239    source.extend_from_slice(b".gitattributes");
6240    read_attribute_patterns(dir.join(".gitattributes"), matcher, base, &source);
6241    Ok(())
6242}
6243
6244fn collect_attribute_patterns(
6245    root: &Path,
6246    dir: &Path,
6247    matcher: &mut AttributeMatcher,
6248) -> Result<()> {
6249    read_dir_attribute_patterns(root, dir, matcher)?;
6250
6251    let mut entries = fs::read_dir(dir)?.collect::<std::result::Result<Vec<_>, _>>()?;
6252    entries.sort_by_key(|entry| entry.file_name());
6253    for entry in entries {
6254        let path = entry.path();
6255        if path.file_name().and_then(|name| name.to_str()) == Some(".git") {
6256            continue;
6257        }
6258        if entry.metadata()?.is_dir() {
6259            collect_attribute_patterns(root, &path, matcher)?;
6260        }
6261    }
6262    Ok(())
6263}
6264
6265fn read_attribute_patterns(
6266    path: impl AsRef<Path>,
6267    matcher: &mut AttributeMatcher,
6268    base: &[u8],
6269    _source: &[u8],
6270) {
6271    let Ok(contents) = fs::read(path) else {
6272        return;
6273    };
6274    read_attribute_patterns_from_bytes(&contents, matcher, base);
6275}
6276
6277fn read_attribute_patterns_from_bytes(
6278    contents: &[u8],
6279    matcher: &mut AttributeMatcher,
6280    base: &[u8],
6281) {
6282    for raw in contents.split(|byte| *byte == b'\n') {
6283        push_attribute_pattern(matcher, raw, base);
6284    }
6285}
6286
6287fn collect_attribute_patterns_from_tree(
6288    db: &FileObjectDatabase,
6289    format: ObjectFormat,
6290    tree_oid: &ObjectId,
6291    base: Vec<u8>,
6292    matcher: &mut AttributeMatcher,
6293) -> Result<()> {
6294    let object = read_expected_object(db, tree_oid, ObjectType::Tree)?;
6295    let mut entries = Tree::parse(format, &object.body)?.entries;
6296    entries.sort_by(|left, right| left.name.cmp(&right.name));
6297    for entry in &entries {
6298        if entry.name == b".gitattributes" && tree_entry_object_type(entry.mode) == ObjectType::Blob
6299        {
6300            let object = db.read_object(&entry.oid).map_err(|err| {
6301                expect_missing_object_kind(err, entry.oid, MissingObjectKind::Blob)
6302            })?;
6303            if object.object_type == ObjectType::Blob {
6304                read_attribute_patterns_from_bytes(&object.body, matcher, &base);
6305            }
6306        }
6307    }
6308    for entry in entries {
6309        if tree_entry_object_type(entry.mode) != ObjectType::Tree {
6310            continue;
6311        }
6312        let mut child_base = base.clone();
6313        if !child_base.is_empty() {
6314            child_base.push(b'/');
6315        }
6316        child_base.extend_from_slice(entry.name.as_bytes());
6317        collect_attribute_patterns_from_tree(db, format, &entry.oid, child_base, matcher)?;
6318    }
6319    Ok(())
6320}
6321
6322fn collect_attribute_patterns_from_index(
6323    git_dir: &Path,
6324    format: ObjectFormat,
6325    db: &FileObjectDatabase,
6326    matcher: &mut AttributeMatcher,
6327) -> Result<()> {
6328    let index_path = repository_index_path(git_dir);
6329    if !index_path.exists() {
6330        return Ok(());
6331    }
6332    let mut entries = Index::parse(&fs::read(index_path)?, format)?.entries;
6333    entries.sort_by(|left, right| left.path.cmp(&right.path));
6334    for entry in entries {
6335        let is_attributes_file =
6336            entry.path == b".gitattributes" || entry.path.as_bytes().ends_with(b"/.gitattributes");
6337        if index_entry_stage(&entry) != 0
6338            || tree_entry_object_type(entry.mode) != ObjectType::Blob
6339            || !is_attributes_file
6340        {
6341            continue;
6342        }
6343        let base = match entry.path.as_bytes().strip_suffix(b".gitattributes") {
6344            Some(b"") => Vec::new(),
6345            Some(parent) => parent.strip_suffix(b"/").unwrap_or(parent).to_vec(),
6346            None => continue,
6347        };
6348        let object = db
6349            .read_object(&entry.oid)
6350            .map_err(|err| expect_missing_object_kind(err, entry.oid, MissingObjectKind::Blob))?;
6351        if object.object_type == ObjectType::Blob {
6352            read_attribute_patterns_from_bytes(&object.body, matcher, &base);
6353        }
6354    }
6355    Ok(())
6356}
6357
6358fn push_attribute_pattern(matcher: &mut AttributeMatcher, raw: &[u8], base: &[u8]) {
6359    let line = raw.strip_suffix(b"\r").unwrap_or(raw);
6360    let line = trim_ascii_whitespace(line);
6361    if line.is_empty() || line.starts_with(b"#") {
6362        return;
6363    }
6364    let mut fields = line
6365        .split(|byte| byte.is_ascii_whitespace())
6366        .filter(|field| !field.is_empty());
6367    let Some(raw_pattern) = fields.next() else {
6368        return;
6369    };
6370    if let Some(macro_name) = raw_pattern.strip_prefix(b"[attr]") {
6371        if macro_name.is_empty() {
6372            return;
6373        }
6374        let mut assignments = vec![AttributeAssignment {
6375            attribute: macro_name.to_vec(),
6376            state: Some(AttributeState::Set),
6377        }];
6378        for field in fields {
6379            push_attribute_assignments(&mut assignments, field, &matcher.macros);
6380        }
6381        for assignment in &assignments {
6382            matcher.push_attribute_order(&assignment.attribute);
6383        }
6384        matcher.macros.insert(macro_name.to_vec(), assignments);
6385        return;
6386    }
6387    let mut assignments = Vec::new();
6388    for field in fields {
6389        push_attribute_assignments(&mut assignments, field, &matcher.macros);
6390    }
6391    if assignments.is_empty() {
6392        return;
6393    }
6394    for assignment in &assignments {
6395        matcher.push_attribute_order(&assignment.attribute);
6396    }
6397    let (anchored, pattern) = if let Some(pattern) = raw_pattern.strip_prefix(b"/") {
6398        (true, pattern)
6399    } else {
6400        (false, raw_pattern)
6401    };
6402    if pattern.is_empty() {
6403        return;
6404    }
6405    matcher.patterns.push(AttributePattern {
6406        base: base.to_vec(),
6407        pattern: pattern.to_vec(),
6408        anchored,
6409        has_slash: pattern.contains(&b'/'),
6410        assignments,
6411    });
6412}
6413
6414fn push_attribute_assignments(
6415    assignments: &mut Vec<AttributeAssignment>,
6416    field: &[u8],
6417    macros: &BTreeMap<Vec<u8>, Vec<AttributeAssignment>>,
6418) {
6419    if let Some(macro_assignments) = macros.get(field) {
6420        assignments.extend(macro_assignments.iter().cloned());
6421        return;
6422    }
6423    if field == b"binary" {
6424        assignments.push(AttributeAssignment {
6425            attribute: b"binary".to_vec(),
6426            state: Some(AttributeState::Set),
6427        });
6428        assignments.push(AttributeAssignment {
6429            attribute: b"diff".to_vec(),
6430            state: Some(AttributeState::Unset),
6431        });
6432        assignments.push(AttributeAssignment {
6433            attribute: b"merge".to_vec(),
6434            state: Some(AttributeState::Unset),
6435        });
6436        assignments.push(AttributeAssignment {
6437            attribute: b"text".to_vec(),
6438            state: Some(AttributeState::Unset),
6439        });
6440        return;
6441    }
6442    if let Some(attribute) = field.strip_prefix(b"-") {
6443        if !attribute.is_empty() {
6444            assignments.push(AttributeAssignment {
6445                attribute: attribute.to_vec(),
6446                state: Some(AttributeState::Unset),
6447            });
6448        }
6449        return;
6450    }
6451    if let Some(attribute) = field.strip_prefix(b"!") {
6452        if !attribute.is_empty() {
6453            assignments.push(AttributeAssignment {
6454                attribute: attribute.to_vec(),
6455                state: None,
6456            });
6457        }
6458        return;
6459    }
6460    if let Some(equal) = field.iter().position(|byte| *byte == b'=') {
6461        let attribute = &field[..equal];
6462        let value = &field[equal + 1..];
6463        if !attribute.is_empty() {
6464            assignments.push(AttributeAssignment {
6465                attribute: attribute.to_vec(),
6466                state: Some(AttributeState::Value(value.to_vec())),
6467            });
6468        }
6469        return;
6470    }
6471    assignments.push(AttributeAssignment {
6472        attribute: field.to_vec(),
6473        state: Some(AttributeState::Set),
6474    });
6475}
6476
6477fn attribute_all_rank(
6478    attribute: &[u8],
6479    order: &BTreeMap<Vec<u8>, usize>,
6480) -> (usize, usize, Vec<u8>) {
6481    let rank = match attribute {
6482        b"binary" => 0,
6483        b"diff" => 1,
6484        b"merge" => 2,
6485        b"text" => 3,
6486        b"eol" => 5,
6487        _ => 4,
6488    };
6489    let order = order.get(attribute).copied().unwrap_or(usize::MAX);
6490    (rank, order, attribute.to_vec())
6491}
6492
6493fn trim_ascii_whitespace(mut value: &[u8]) -> &[u8] {
6494    while value.first().is_some_and(u8::is_ascii_whitespace) {
6495        value = &value[1..];
6496    }
6497    while value.last().is_some_and(u8::is_ascii_whitespace) {
6498        value = &value[..value.len() - 1];
6499    }
6500    value
6501}
6502
6503impl AttributePattern {
6504    fn matches(&self, path: &[u8]) -> bool {
6505        let path = if self.base.is_empty() {
6506            path
6507        } else {
6508            let Some(rest) = path
6509                .strip_prefix(self.base.as_slice())
6510                .and_then(|rest| rest.strip_prefix(b"/"))
6511            else {
6512                return false;
6513            };
6514            rest
6515        };
6516        if self.anchored || self.has_slash {
6517            return wildcard_path_matches(&self.pattern, path);
6518        }
6519        path.rsplit(|byte| *byte == b'/')
6520            .next()
6521            .is_some_and(|basename| wildcard_path_matches(&self.pattern, basename))
6522    }
6523}
6524
6525// ---------------------------------------------------------------------------
6526// Content filtering on the blob <-> worktree boundary
6527//
6528// Git runs two kinds of conversion when content crosses between the worktree
6529// and the object database:
6530//
6531//   * the line-ending / `core.autocrlf` conversion (driven by the `text`,
6532//     `eol` attributes and the `core.autocrlf` / `core.eol` config), and
6533//   * the long-running `filter.<name>.clean` / `.smudge` driver filters
6534//     (selected by the `filter=<name>` attribute and configured commands).
6535//
6536// "clean" runs on the way *into* the object store (worktree -> blob), e.g. on
6537// `git add` / `git hash-object -w`. "smudge" runs on the way *out* (blob ->
6538// worktree), e.g. on checkout / restore. The driver filter, when present,
6539// wraps the EOL conversion: on clean git first runs the configured `clean`
6540// command and then applies CRLF->LF normalization; on smudge git first applies
6541// LF->CRLF and then runs the `smudge` command.
6542// ---------------------------------------------------------------------------
6543
6544/// The line-ending conversion that applies to a path, derived from its
6545/// attributes and the repository config.
6546#[derive(Debug, Clone, Copy, PartialEq, Eq)]
6547enum EolConversion {
6548    /// No conversion: binary content, or text with `core.autocrlf=false` and no
6549    /// `eol`/`text=auto` request to add carriage returns.
6550    None,
6551    /// Normalize to LF on clean; no carriage returns on smudge (`eol=lf`, or
6552    /// `core.autocrlf=input`).
6553    Lf,
6554    /// Normalize to LF on clean; emit CRLF on smudge (`eol=crlf`, or
6555    /// `core.autocrlf=true`).
6556    Crlf,
6557}
6558
6559/// How git should decide whether a path is text for the purpose of EOL
6560/// conversion.
6561#[derive(Debug, Clone, Copy, PartialEq, Eq)]
6562enum TextDecision {
6563    /// `-text` / `binary`: never convert.
6564    Binary,
6565    /// `text` is set explicitly: always treat as text.
6566    Text,
6567    /// `text=auto` (or implied by `core.autocrlf`): treat as text unless the
6568    /// content looks binary.
6569    Auto,
6570    /// No opinion from attributes or config: leave content untouched.
6571    Unspecified,
6572}
6573
6574/// The fully resolved set of conversions that apply to a single path.
6575#[derive(Debug, Clone, PartialEq, Eq)]
6576struct ContentFilterPlan {
6577    text: TextDecision,
6578    /// The conversion to apply when `text` resolves to "this is text".
6579    eol: EolConversion,
6580    /// `filter.<name>` driver, if assigned via attributes and configured.
6581    driver: Option<FilterDriver>,
6582}
6583
6584#[derive(Debug, Clone, PartialEq, Eq)]
6585struct FilterDriver {
6586    name: Vec<u8>,
6587    clean: Option<String>,
6588    smudge: Option<String>,
6589    required: bool,
6590}
6591
6592/// Decode one crlf-family attribute (`text` or its legacy alias `crlf`) into a
6593/// text decision, plus whether the value form forced an EOL direction.
6594///
6595/// Mirrors git's `git_path_check_crlf` (convert.c): a *set* attribute is text,
6596/// an *unset* one is binary, `=auto` is auto, `=input` forces LF while still
6597/// counting as text, and any other value is "undefined" — i.e. no opinion, so
6598/// the caller falls through to the next source (the `crlf` alias, then config).
6599fn decode_crlf_family_attribute(state: Option<&AttributeState>) -> (TextDecision, EolConversion) {
6600    match state {
6601        Some(AttributeState::Set) => (TextDecision::Text, EolConversion::None),
6602        Some(AttributeState::Unset) => (TextDecision::Binary, EolConversion::None),
6603        Some(AttributeState::Value(value)) if value == b"auto" => {
6604            (TextDecision::Auto, EolConversion::None)
6605        }
6606        // `crlf=input` / `text=input`: text content normalized to LF (no CR on
6607        // smudge), exactly like `core.autocrlf=input`.
6608        Some(AttributeState::Value(value)) if value == b"input" => {
6609            (TextDecision::Text, EolConversion::Lf)
6610        }
6611        // `=<other>` is CRLF_UNDEFINED in git for the `crlf` alias: no opinion.
6612        _ => (TextDecision::Unspecified, EolConversion::None),
6613    }
6614}
6615
6616impl ContentFilterPlan {
6617    /// Build the plan for `path` from the parsed attributes and repo config.
6618    fn resolve(config: &GitConfig, checks: &[AttributeCheck]) -> Self {
6619        let text_attr = checks.iter().find(|check| check.attribute == b"text");
6620        let crlf_attr = checks.iter().find(|check| check.attribute == b"crlf");
6621        let eol_attr = checks.iter().find(|check| check.attribute == b"eol");
6622        let filter_attr = checks.iter().find(|check| check.attribute == b"filter");
6623
6624        // Resolve the eol attribute first; `eol=crlf|lf` also forces text.
6625        let eol_value = eol_attr.and_then(|check| match &check.state {
6626            Some(AttributeState::Value(value)) => Some(value.clone()),
6627            _ => None,
6628        });
6629
6630        // The `text` attribute decides first; only when it is unspecified does
6631        // git consult the legacy `crlf` alias (convert.c `convert_attrs`).
6632        let mut forced_eol = EolConversion::None;
6633        let mut text = match text_attr.map(|check| &check.state) {
6634            Some(Some(AttributeState::Set)) => TextDecision::Text,
6635            Some(Some(AttributeState::Unset)) => TextDecision::Binary,
6636            Some(Some(AttributeState::Value(value))) if value == b"auto" => TextDecision::Auto,
6637            Some(Some(AttributeState::Value(value))) if value == b"input" => {
6638                forced_eol = EolConversion::Lf;
6639                TextDecision::Text
6640            }
6641            // `text=<other>` is treated by git as a set text attribute.
6642            Some(Some(AttributeState::Value(_))) => TextDecision::Text,
6643            // `!text` (unspecified) or no text attribute: fall through to `crlf`.
6644            _ => {
6645                let (decision, eol) =
6646                    decode_crlf_family_attribute(crlf_attr.and_then(|check| check.state.as_ref()));
6647                forced_eol = eol;
6648                decision
6649            }
6650        };
6651
6652        // A concrete `eol` attribute implies the path is text even when `text`
6653        // was left unspecified (git: `eol` without `text` is treated as
6654        // `text=auto`-ish; upstream forces conversion). We honour eol only when
6655        // text is not explicitly binary.
6656        let eol = match (&text, eol_value.as_deref()) {
6657            (TextDecision::Binary, _) => EolConversion::None,
6658            (_, Some(b"crlf")) => {
6659                if text == TextDecision::Unspecified {
6660                    text = TextDecision::Text;
6661                }
6662                EolConversion::Crlf
6663            }
6664            (_, Some(b"lf")) => {
6665                if text == TextDecision::Unspecified {
6666                    text = TextDecision::Text;
6667                }
6668                EolConversion::Lf
6669            }
6670            // No explicit `eol` attribute, but `text=input`/`crlf=input` already
6671            // forced the LF direction (git's CRLF_TEXT_INPUT). Honour it over the
6672            // config-derived default.
6673            _ if forced_eol == EolConversion::Lf => EolConversion::Lf,
6674            // No eol attribute: derive direction from config.
6675            _ => eol_from_config(config),
6676        };
6677
6678        // When the path is text but neither `eol` nor `core.autocrlf`/`core.eol`
6679        // asked for carriage returns, we still normalize to LF on clean. That is
6680        // modelled by `EolConversion::Lf` (clean strips CR, smudge adds none).
6681        let eol = match (&text, eol) {
6682            (TextDecision::Text | TextDecision::Auto, EolConversion::None) => EolConversion::Lf,
6683            (_, eol) => eol,
6684        };
6685
6686        // If config does not enable autocrlf and there is no eol/text opinion,
6687        // there is genuinely nothing to do.
6688        let text = match (text, eol_attr.is_some()) {
6689            (TextDecision::Unspecified, _) => {
6690                // Without any text/eol attribute, only `core.autocrlf` can make a
6691                // path eligible, and then it behaves like `text=auto`.
6692                if autocrlf_enabled(config) {
6693                    TextDecision::Auto
6694                } else {
6695                    TextDecision::Unspecified
6696                }
6697            }
6698            (text, _) => text,
6699        };
6700
6701        let driver = resolve_filter_driver(config, filter_attr);
6702
6703        ContentFilterPlan { text, eol, driver }
6704    }
6705
6706    /// Whether EOL conversion should run for the given content.
6707    fn convert_eol(&self, content: &[u8]) -> bool {
6708        match self.text {
6709            TextDecision::Binary | TextDecision::Unspecified => false,
6710            TextDecision::Text => self.eol != EolConversion::None,
6711            // `text=auto`: only when the blob does not look binary.
6712            TextDecision::Auto => self.eol != EolConversion::None && !looks_binary(content),
6713        }
6714    }
6715
6716    /// The smudge-side LF->CRLF safety check, mirroring convert.c
6717    /// `will_convert_lf_to_crlf`. Returns false (no conversion) when:
6718    ///   * there is no naked LF to convert, or
6719    ///   * the action is `text=auto`-derived (the "new safer autocrlf") AND the
6720    ///     content already contains a lone CR or a CRLF pair, or looks binary.
6721    ///
6722    /// An explicit `text`/`eol=crlf` (non-auto) path always converts naked LFs.
6723    fn will_convert_lf_to_crlf(&self, content: &[u8]) -> bool {
6724        self.will_convert_lf_to_crlf_stats(&gather_convert_stats(content))
6725    }
6726
6727    /// Stats-based variant of [`will_convert_lf_to_crlf`], mirroring convert.c
6728    /// `will_convert_lf_to_crlf(struct text_stat *, ...)`. Used by the safecrlf
6729    /// round-trip simulation, which mutates a copy of the stats rather than
6730    /// re-scanning the buffer.
6731    fn will_convert_lf_to_crlf_stats(&self, stats: &ConvertStats) -> bool {
6732        // `output_eol(crlf_action) != EOL_CRLF` short-circuits in git.
6733        if self.eol != EolConversion::Crlf {
6734            return false;
6735        }
6736        // No naked LF? Nothing to convert.
6737        if stats.lonelf == 0 {
6738            return false;
6739        }
6740        if self.text == TextDecision::Auto {
6741            // Any CR or CRLF already present: leave it untouched (irreversible).
6742            if stats.lonecr > 0 || stats.crlf > 0 {
6743                return false;
6744            }
6745            if convert_is_binary(stats) {
6746                return false;
6747            }
6748        }
6749        true
6750    }
6751
6752    /// Whether this path is a candidate for the `core.safecrlf` round-trip check
6753    /// at all: git only warns for non-`CRLF_BINARY` actions. `Binary` and
6754    /// `Unspecified` (with autocrlf off) correspond to git's `CRLF_BINARY`.
6755    fn safecrlf_applies(&self) -> bool {
6756        matches!(self.text, TextDecision::Text | TextDecision::Auto)
6757    }
6758
6759    /// Emit git's `core.safecrlf` round-trip warning for `path`, mirroring the
6760    /// stderr side-effect of convert.c `crlf_to_git` (the `CONV_EOL_RNDTRP_*`
6761    /// branch). `old_stats` are the stats of the *pre-conversion* worktree
6762    /// content (already gathered by the caller so the buffer is scanned once);
6763    /// `index_has_crlf` is whether the path's current index blob already has a
6764    /// CRLF (git's `has_crlf_in_index`, used only for the auto-crlf decision).
6765    ///
6766    /// This never inspects or alters the bytes written to the object store; it is
6767    /// purely the additive warning git prints alongside `git add`/`commit`.
6768    /// Returns `Err` only under `core.safecrlf=true` when the round-trip is
6769    /// irreversible (git `die`s).
6770    fn check_safe_crlf_stats(
6771        &self,
6772        old_stats: &ConvertStats,
6773        index_has_crlf: bool,
6774        flags: ConvFlags,
6775        path: &[u8],
6776    ) -> Result<()> {
6777        if flags == ConvFlags::Off || !self.safecrlf_applies() {
6778            return Ok(());
6779        }
6780
6781        // Replicate `crlf_to_git`'s `convert_crlf_into_lf` decision (the clean
6782        // direction). It starts as "there is a CRLF to collapse"; auto paths
6783        // suppress conversion for binary content or content whose index blob
6784        // already carries a CRLF (the "new safer autocrlf").
6785        let mut convert_crlf_into_lf = old_stats.crlf > 0;
6786        if self.text == TextDecision::Auto {
6787            if convert_is_binary(old_stats) {
6788                // git returns 0 here: no conversion *and* no warning.
6789                return Ok(());
6790            }
6791            if index_has_crlf {
6792                convert_crlf_into_lf = false;
6793            }
6794        }
6795
6796        // Simulate the round-trip on a copy of the stats.
6797        let mut new_stats = old_stats.clone();
6798        // Simulate "git add" (clean: CRLF -> LF).
6799        if convert_crlf_into_lf {
6800            new_stats.lonelf += new_stats.crlf;
6801            new_stats.crlf = 0;
6802        }
6803        // Simulate "git checkout" (smudge: LF -> CRLF).
6804        if self.will_convert_lf_to_crlf_stats(&new_stats) {
6805            new_stats.crlf += new_stats.lonelf;
6806            new_stats.lonelf = 0;
6807        }
6808        check_safe_crlf(old_stats, &new_stats, flags, path)
6809    }
6810}
6811
6812/// Derive the smudge-direction line ending from `core.autocrlf` / `core.eol`.
6813fn eol_from_config(config: &GitConfig) -> EolConversion {
6814    if let Some(value) = config.get("core", None, "autocrlf") {
6815        match value.to_ascii_lowercase().as_str() {
6816            "input" => return EolConversion::Lf,
6817            "true" | "yes" | "on" | "1" => return EolConversion::Crlf,
6818            _ => {}
6819        }
6820    }
6821    if config.get_bool("core", None, "autocrlf") == Some(true) {
6822        return EolConversion::Crlf;
6823    }
6824    match config
6825        .get("core", None, "eol")
6826        .map(|v| v.to_ascii_lowercase())
6827    {
6828        Some(ref v) if v == "crlf" => EolConversion::Crlf,
6829        Some(ref v) if v == "lf" => EolConversion::Lf,
6830        _ => EolConversion::None,
6831    }
6832}
6833
6834/// Whether `core.autocrlf` is set to anything that enables conversion
6835/// (`true` or `input`).
6836fn autocrlf_enabled(config: &GitConfig) -> bool {
6837    if let Some(value) = config.get("core", None, "autocrlf")
6838        && value.eq_ignore_ascii_case("input")
6839    {
6840        return true;
6841    }
6842    config.get_bool("core", None, "autocrlf") == Some(true)
6843}
6844
6845/// Resolve the `filter=<name>` attribute against `filter.<name>.*` config.
6846fn resolve_filter_driver(
6847    config: &GitConfig,
6848    filter_attr: Option<&AttributeCheck>,
6849) -> Option<FilterDriver> {
6850    let name = match filter_attr.map(|check| &check.state) {
6851        Some(Some(AttributeState::Value(value))) => value.clone(),
6852        // `filter` set/unset without a value selects no driver.
6853        _ => return None,
6854    };
6855    let subsection = String::from_utf8_lossy(&name).into_owned();
6856    let clean = config
6857        .get("filter", Some(&subsection), "clean")
6858        .filter(|cmd| !cmd.is_empty())
6859        .map(str::to_owned);
6860    let smudge = config
6861        .get("filter", Some(&subsection), "smudge")
6862        .filter(|cmd| !cmd.is_empty())
6863        .map(str::to_owned);
6864    let required = config
6865        .get_bool("filter", Some(&subsection), "required")
6866        .unwrap_or(false);
6867    // A filter with neither command and not required is a no-op.
6868    if clean.is_none() && smudge.is_none() && !required {
6869        return None;
6870    }
6871    Some(FilterDriver {
6872        name,
6873        clean,
6874        smudge,
6875        required,
6876    })
6877}
6878
6879/// Heuristic mirroring git's `buffer_is_binary`: content is treated as binary
6880/// when a NUL byte appears within the first 8000 bytes.
6881fn looks_binary(content: &[u8]) -> bool {
6882    const FIRST_FEW_BYTES: usize = 8000;
6883    let window = &content[..content.len().min(FIRST_FEW_BYTES)];
6884    window.contains(&0)
6885}
6886
6887/// Strip carriage returns that immediately precede a line feed (CRLF -> LF).
6888/// A lone CR (old-Mac line ending) is left untouched, matching git, which only
6889/// collapses CRLF pairs.
6890fn convert_crlf_to_lf_cow(content: Cow<'_, [u8]>) -> Cow<'_, [u8]> {
6891    if !content.windows(2).any(|window| window == b"\r\n") {
6892        return content;
6893    }
6894    let mut out = Vec::with_capacity(content.len());
6895    let mut index = 0;
6896    while index < content.len() {
6897        let byte = content[index];
6898        if byte == b'\r' && content.get(index + 1) == Some(&b'\n') {
6899            // Drop the CR; the LF is emitted on the next iteration.
6900            index += 1;
6901            continue;
6902        }
6903        out.push(byte);
6904        index += 1;
6905    }
6906    Cow::Owned(out)
6907}
6908
6909/// Convert lone LF bytes to CRLF (LF -> CRLF). An LF already preceded by a CR
6910/// is left as-is so content is not double-converted, matching git.
6911fn convert_lf_to_crlf(content: &[u8]) -> Vec<u8> {
6912    let mut out = Vec::with_capacity(content.len() + content.len() / 16);
6913    let mut prev = 0u8;
6914    for &byte in content {
6915        if byte == b'\n' && prev != b'\r' {
6916            out.push(b'\r');
6917        }
6918        out.push(byte);
6919        prev = byte;
6920    }
6921    out
6922}
6923
6924/// Run a configured `clean`/`smudge` command as a subprocess, feeding `content`
6925/// on stdin and returning its stdout. Errors carry enough context for the
6926/// caller to decide whether the failure is fatal (required filter) or should be
6927/// silently ignored (optional filter passthrough).
6928fn run_filter_command(command: &str, path: &[u8], content: &[u8]) -> Result<Vec<u8>> {
6929    // Git expands `%f` in the filter command to the path of the file being
6930    // filtered (quoted). We perform the same substitution.
6931    let display_path = String::from_utf8_lossy(path);
6932    let expanded = command.replace("%f", &shell_quote(&display_path));
6933    // Run through the platform shell so pipelines / arguments in the configured
6934    // command behave the same way git's `run_command`-with-shell does.
6935    let (shell, flag) = if cfg!(windows) {
6936        ("cmd", "/C")
6937    } else {
6938        ("/bin/sh", "-c")
6939    };
6940    let mut child = Command::new(shell)
6941        .arg(flag)
6942        .arg(&expanded)
6943        .stdin(Stdio::piped())
6944        .stdout(Stdio::piped())
6945        .stderr(Stdio::piped())
6946        .spawn()
6947        .map_err(|err| GitError::Command(format!("failed to spawn filter `{command}`: {err}")))?;
6948    // Write the content to the child's stdin on a separate thread so we never
6949    // deadlock against a filter that streams output before consuming all input.
6950    let mut stdin = child
6951        .stdin
6952        .take()
6953        .ok_or_else(|| GitError::Command(format!("filter `{command}` stdin unavailable")))?;
6954    let payload = content.to_vec();
6955    let writer = std::thread::spawn(move || {
6956        let _ = stdin.write_all(&payload);
6957        // Dropping `stdin` here closes the pipe so the child sees EOF.
6958    });
6959    let output = child
6960        .wait_with_output()
6961        .map_err(|err| GitError::Command(format!("filter `{command}` failed: {err}")))?;
6962    // Join the writer; its own errors (e.g. broken pipe) are non-fatal because
6963    // the child's exit status is the authoritative signal.
6964    let _ = writer.join();
6965    if !output.status.success() {
6966        let stderr = String::from_utf8_lossy(&output.stderr);
6967        return Err(GitError::Command(format!(
6968            "filter `{command}` exited with {}: {}",
6969            output.status,
6970            stderr.trim()
6971        )));
6972    }
6973    Ok(output.stdout)
6974}
6975
6976/// Minimal POSIX single-quote escaping for substituting `%f` into a shell
6977/// command (used only for the path passed to driver filters).
6978fn shell_quote(value: &str) -> String {
6979    let mut out = String::with_capacity(value.len() + 2);
6980    out.push('\'');
6981    for ch in value.chars() {
6982        if ch == '\'' {
6983            out.push_str("'\\''");
6984        } else {
6985            out.push(ch);
6986        }
6987    }
6988    out.push('\'');
6989    out
6990}
6991
6992/// Apply the *clean* conversion to `content` for `path` (worktree -> blob):
6993/// first the configured `filter.<name>.clean` driver (if any), then CRLF->LF
6994/// normalization when EOL conversion applies.
6995///
6996/// `config` is the repository config (`GitConfig`) and `path` is the
6997/// repository-relative path of the file (forward-slash separated, e.g.
6998/// `src/main.rs`). When no filter or EOL conversion applies the input is
6999/// returned unchanged.
7000///
7001/// A *required* driver (`filter.<name>.required=true`) whose `clean` command is
7002/// missing or fails produces a [`GitError::Command`]; a non-required driver
7003/// failure (or absence of a `clean` command) passes the content through
7004/// unfiltered, matching git.
7005pub fn apply_clean_filter(
7006    worktree_root: impl AsRef<Path>,
7007    git_dir: impl AsRef<Path>,
7008    config: &GitConfig,
7009    path: &[u8],
7010    content: &[u8],
7011) -> Result<Vec<u8>> {
7012    // On clean the worktree file exists, so the live `.gitattributes` chain is
7013    // authoritative. `git_dir` is accepted for symmetry with the smudge entry
7014    // point (which falls back to the index) and for future use.
7015    let _ = git_dir.as_ref();
7016    let checks = filter_attribute_checks(worktree_root.as_ref(), path)?;
7017    apply_clean_filter_with_attributes(config, &checks, path, content)
7018}
7019
7020/// A reusable handle that captures the worktree's `.gitattributes` chain once so
7021/// repeated clean-filter calls (e.g. `hash-object --stdin-paths` hashing many
7022/// paths in one process) don't re-walk the worktree and re-read every
7023/// `.gitattributes`/global config per path.
7024///
7025/// Build it once with [`WorktreeAttributes::from_worktree_root`], then call
7026/// [`WorktreeAttributes::apply_clean_filter`] per path. This mirrors
7027/// [`apply_clean_filter`] exactly except the expensive attribute-source scan is
7028/// amortized across calls.
7029pub struct WorktreeAttributes {
7030    matcher: AttributeMatcher,
7031}
7032
7033impl WorktreeAttributes {
7034    /// Read the worktree's attribute sources once (global/`core.attributesFile`,
7035    /// every in-tree `.gitattributes`, and `$GIT_DIR/info/attributes`).
7036    pub fn from_worktree_root(worktree_root: impl AsRef<Path>) -> Result<Self> {
7037        Ok(Self {
7038            matcher: AttributeMatcher::from_worktree_root(worktree_root.as_ref())?,
7039        })
7040    }
7041
7042    /// Apply the clean conversion to `content` for `path`, reusing the cached
7043    /// attribute chain. Behaviourally identical to [`apply_clean_filter`].
7044    pub fn apply_clean_filter(
7045        &self,
7046        config: &GitConfig,
7047        path: &[u8],
7048        content: &[u8],
7049    ) -> Result<Vec<u8>> {
7050        let checks = self
7051            .matcher
7052            .attributes_for_path(path, &filter_attribute_names(), false);
7053        apply_clean_filter_with_attributes(config, &checks, path, content)
7054    }
7055}
7056
7057/// A reusable handle that captures a *tree's* `.gitattributes` chain once so
7058/// repeated smudge-filter calls (e.g. `git archive` streaming every blob in a
7059/// tree) resolve attributes from the tree being processed rather than the live
7060/// worktree.
7061///
7062/// This is the attribute direction `git archive` uses: upstream unpacks the
7063/// archived tree into a scratch index and sets `GIT_ATTR_INDEX`, so the
7064/// `.gitattributes` that govern conversion come from the *archived tree* (plus
7065/// the global/`core.attributesFile` chain and `$GIT_DIR/info/attributes`), not
7066/// from whatever happens to be checked out. `--worktree-attributes` callers
7067/// should use [`WorktreeAttributes`] instead.
7068///
7069/// Build it once with [`TreeAttributes::from_tree`], then call
7070/// [`TreeAttributes::apply_smudge_filter`] per blob. Behaviourally this mirrors
7071/// [`apply_smudge_filter`] except the attribute source is the supplied tree and
7072/// the expensive source scan is amortized across calls.
7073pub struct TreeAttributes {
7074    matcher: AttributeMatcher,
7075}
7076
7077impl TreeAttributes {
7078    /// Read the attribute sources for `tree_oid` once: the global /
7079    /// `core.attributesFile` chain, every `.gitattributes` blob found while
7080    /// walking `tree_oid`, and `$GIT_DIR/info/attributes`.
7081    ///
7082    /// `attr_root` locates the global config (`read_configured_attributes`);
7083    /// pass the worktree root for a non-bare repo, or the git dir for a bare
7084    /// one. `git_dir` locates `info/attributes` directly (so this works for bare
7085    /// repos, where there is no nested `.git`). No worktree `.gitattributes`
7086    /// files are read — use [`WorktreeAttributes`] for the
7087    /// `--worktree-attributes` direction.
7088    pub fn from_tree(
7089        attr_root: impl AsRef<Path>,
7090        git_dir: impl AsRef<Path>,
7091        db: &FileObjectDatabase,
7092        format: ObjectFormat,
7093        tree_oid: &ObjectId,
7094    ) -> Result<Self> {
7095        let attr_root = attr_root.as_ref();
7096        let mut matcher = AttributeMatcher::default();
7097        if !matcher.read_configured_attributes(attr_root) {
7098            matcher.read_default_global_attributes();
7099        }
7100        collect_attribute_patterns_from_tree(db, format, tree_oid, Vec::new(), &mut matcher)?;
7101        read_attribute_patterns(
7102            git_dir.as_ref().join("info").join("attributes"),
7103            &mut matcher,
7104            &[],
7105            b"info/attributes",
7106        );
7107        Ok(Self { matcher })
7108    }
7109
7110    /// Apply the smudge conversion (blob -> worktree: EOL `LF`->`CRLF` plus any
7111    /// configured `filter.<name>.smudge` driver) to `content` for `path`,
7112    /// reusing the cached attribute chain. Behaviourally identical to
7113    /// [`apply_smudge_filter`] except attributes come from the tree this handle
7114    /// was built from.
7115    pub fn apply_smudge_filter(
7116        &self,
7117        config: &GitConfig,
7118        path: &[u8],
7119        content: &[u8],
7120    ) -> Result<Vec<u8>> {
7121        let checks = self
7122            .matcher
7123            .attributes_for_path(path, &filter_attribute_names(), false);
7124        apply_smudge_filter_with_attributes(config, &checks, path, content)
7125    }
7126
7127    /// True when `path` has the `export-subst` attribute set (git's
7128    /// `check_attr_export_subst`), meaning `git archive` should run
7129    /// `$Format:…$` keyword substitution on its content.
7130    pub fn export_subst_for_path(&self, path: &[u8]) -> bool {
7131        self.attribute_is_set(path, b"export-subst")
7132    }
7133
7134    /// True when `path` has the `export-ignore` attribute set (git's
7135    /// `check_attr_export_ignore`), meaning `git archive` should omit the path
7136    /// (and, for a directory, its whole subtree) from the archive.
7137    pub fn export_ignore_for_path(&self, path: &[u8]) -> bool {
7138        self.attribute_is_set(path, b"export-ignore")
7139    }
7140
7141    fn attribute_is_set(&self, path: &[u8], attribute: &[u8]) -> bool {
7142        let requested = [attribute.to_vec()];
7143        let checks = self.matcher.attributes_for_path(path, &requested, false);
7144        matches!(
7145            checks.first().and_then(|check| check.state.as_ref()),
7146            Some(AttributeState::Set)
7147        )
7148    }
7149
7150    /// The `diff` attribute state for `path` (`Set` for `diff`, `Unset` for
7151    /// `-diff`, `Value(name)` for `diff=<name>`, `None` when unspecified). Used
7152    /// by `git archive`'s zip backend to classify text vs. binary via the
7153    /// path's userdiff driver.
7154    pub fn diff_attribute_for_path(&self, path: &[u8]) -> Option<AttributeState> {
7155        let requested = [b"diff".to_vec()];
7156        let checks = self.matcher.attributes_for_path(path, &requested, false);
7157        checks.into_iter().next().and_then(|check| check.state)
7158    }
7159}
7160
7161/// Like [`apply_clean_filter`] but takes already-resolved attribute checks,
7162/// letting callers that have computed attributes once reuse them.
7163pub fn apply_clean_filter_with_attributes(
7164    config: &GitConfig,
7165    attributes: &[AttributeCheck],
7166    path: &[u8],
7167    content: &[u8],
7168) -> Result<Vec<u8>> {
7169    Ok(apply_clean_filter_with_attributes_cow(config, attributes, path, content)?.into_owned())
7170}
7171
7172/// Borrow-first variant of [`apply_clean_filter_with_attributes`].
7173///
7174/// When no filter or EOL conversion changes the content, the returned value
7175/// borrows `content`; callers that can consume a [`Cow`] avoid allocating for
7176/// the common pass-through case.
7177pub fn apply_clean_filter_with_attributes_cow<'a>(
7178    config: &GitConfig,
7179    attributes: &[AttributeCheck],
7180    path: &[u8],
7181    content: &'a [u8],
7182) -> Result<Cow<'a, [u8]>> {
7183    apply_clean_filter_with_attributes_cow_safecrlf(
7184        config,
7185        attributes,
7186        path,
7187        content,
7188        ConvFlags::Off,
7189        SafeCrlfIndexBlob::None,
7190    )
7191}
7192
7193/// How the safecrlf check should learn whether this path's *current index blob*
7194/// already contains a CRLF (git's `has_crlf_in_index`). Only consulted on the
7195/// `text=auto` / `core.autocrlf` path.
7196pub enum SafeCrlfIndexBlob<'a> {
7197    /// No index blob is available (the staging caller has none, or safecrlf is
7198    /// off) — treated as "no CRLF in index".
7199    None,
7200    /// The path's current index blob, read on demand from this object database
7201    /// only when the auto-crlf decision actually needs it.
7202    Lookup {
7203        odb: &'a FileObjectDatabase,
7204        oid: ObjectId,
7205    },
7206}
7207
7208impl SafeCrlfIndexBlob<'_> {
7209    fn has_crlf(&self) -> bool {
7210        match self {
7211            SafeCrlfIndexBlob::None => false,
7212            SafeCrlfIndexBlob::Lookup { odb, oid } => has_crlf_in_index(odb, oid),
7213        }
7214    }
7215}
7216
7217/// [`apply_clean_filter_with_attributes_cow`] plus git's additive `core.safecrlf`
7218/// round-trip warning (convert.c `crlf_to_git`).
7219///
7220/// The conversion result is byte-for-byte identical to the plain variant;
7221/// `flags`/`index_blob` only drive the stderr warning git prints when a
7222/// CRLF<->LF round-trip would not be reversible. The warning is computed on the
7223/// *post-driver, pre-EOL-conversion* content, matching git's ordering in
7224/// `convert_to_git` (apply_filter -> crlf_to_git).
7225pub fn apply_clean_filter_with_attributes_cow_safecrlf<'a>(
7226    config: &GitConfig,
7227    attributes: &[AttributeCheck],
7228    path: &[u8],
7229    content: &'a [u8],
7230    flags: ConvFlags,
7231    index_blob: SafeCrlfIndexBlob<'_>,
7232) -> Result<Cow<'a, [u8]>> {
7233    let plan = ContentFilterPlan::resolve(config, attributes);
7234    let mut data = Cow::Borrowed(content);
7235    if let Some(driver) = &plan.driver {
7236        data = run_driver(driver, driver.clean.as_deref(), path, data)?;
7237    }
7238    // The safecrlf check scans the (post-driver) buffer once for line-ending
7239    // stats. Gate it tightly so the extra scan never runs on the dominant
7240    // pass-through paths: only when safecrlf is enabled, the path is a real
7241    // conversion candidate (not `CRLF_BINARY`), and the buffer is non-empty.
7242    if flags != ConvFlags::Off && !data.is_empty() && plan.safecrlf_applies() {
7243        let old_stats = gather_convert_stats(&data);
7244        plan.check_safe_crlf_stats(&old_stats, index_blob.has_crlf(), flags, path)?;
7245    }
7246    if plan.convert_eol(&data) {
7247        data = convert_crlf_to_lf_cow(data);
7248    }
7249    Ok(data)
7250}
7251
7252/// Apply the *smudge* conversion to `content` for `path` (blob -> worktree):
7253/// first LF->CRLF when EOL conversion applies, then the configured
7254/// `filter.<name>.smudge` driver (if any).
7255///
7256/// Semantics mirror [`apply_clean_filter`]: a required driver with a missing or
7257/// failing `smudge` command errors, while a non-required one passes the content
7258/// through.
7259pub fn apply_smudge_filter(
7260    worktree_root: impl AsRef<Path>,
7261    git_dir: impl AsRef<Path>,
7262    format: ObjectFormat,
7263    config: &GitConfig,
7264    path: &[u8],
7265    content: &[u8],
7266) -> Result<Vec<u8>> {
7267    // On smudge (checkout) the worktree file may not exist yet, so resolve the
7268    // attributes from the `.gitattributes` recorded in the index.
7269    let checks =
7270        smudge_attribute_checks_from_index(worktree_root.as_ref(), git_dir.as_ref(), format, path)?;
7271    apply_smudge_filter_with_attributes(config, &checks, path, content)
7272}
7273
7274/// Like [`apply_smudge_filter`] but takes already-resolved attribute checks.
7275pub fn apply_smudge_filter_with_attributes(
7276    config: &GitConfig,
7277    attributes: &[AttributeCheck],
7278    path: &[u8],
7279    content: &[u8],
7280) -> Result<Vec<u8>> {
7281    Ok(apply_smudge_filter_with_attributes_cow(config, attributes, path, content)?.into_owned())
7282}
7283
7284/// Borrow-first variant of [`apply_smudge_filter_with_attributes`].
7285///
7286/// When no filter or EOL conversion changes the content, the returned value
7287/// borrows `content`; callers that can consume a [`Cow`] avoid allocating for
7288/// the common pass-through case.
7289pub fn apply_smudge_filter_with_attributes_cow<'a>(
7290    config: &GitConfig,
7291    attributes: &[AttributeCheck],
7292    path: &[u8],
7293    content: &'a [u8],
7294) -> Result<Cow<'a, [u8]>> {
7295    let plan = ContentFilterPlan::resolve(config, attributes);
7296    let mut data = Cow::Borrowed(content);
7297    if plan.eol == EolConversion::Crlf
7298        && plan.convert_eol(&data)
7299        && plan.will_convert_lf_to_crlf(&data)
7300    {
7301        data = Cow::Owned(convert_lf_to_crlf(&data));
7302    }
7303    if let Some(driver) = &plan.driver {
7304        data = run_driver(driver, driver.smudge.as_deref(), path, data)?;
7305    }
7306    Ok(data)
7307}
7308
7309/// Execute one direction of a driver filter, honouring the `required` flag.
7310fn run_driver<'a>(
7311    driver: &FilterDriver,
7312    command: Option<&str>,
7313    path: &[u8],
7314    content: Cow<'a, [u8]>,
7315) -> Result<Cow<'a, [u8]>> {
7316    let Some(command) = command else {
7317        // No command in this direction. Required filters must error; optional
7318        // ones pass content through unchanged.
7319        if driver.required {
7320            return Err(GitError::Command(format!(
7321                "required filter `{}` has no configured command for this direction",
7322                String::from_utf8_lossy(&driver.name)
7323            )));
7324        }
7325        return Ok(content);
7326    };
7327    match run_filter_command(command, path, &content) {
7328        Ok(output) => Ok(Cow::Owned(output)),
7329        Err(err) => {
7330            if driver.required {
7331                Err(err)
7332            } else {
7333                // Non-required filter failure: fall back to the unfiltered
7334                // content, matching git's behaviour.
7335                Ok(content)
7336            }
7337        }
7338    }
7339}
7340
7341/// Compute the attributes relevant to content filtering (`text`, `eol`,
7342/// `filter`) for `path` from the worktree `.gitattributes` chain.
7343fn filter_attribute_checks(worktree_root: &Path, path: &[u8]) -> Result<Vec<AttributeCheck>> {
7344    let requested = filter_attribute_names();
7345    let mut matcher = AttributeMatcher::default();
7346    if !matcher.read_configured_attributes(worktree_root) {
7347        matcher.read_default_global_attributes();
7348    }
7349    read_dir_attribute_patterns_for_base(worktree_root, &[], &mut matcher)?;
7350    let mut prefix = Vec::new();
7351    let mut parts = path.split(|byte| *byte == b'/').peekable();
7352    while let Some(part) = parts.next() {
7353        if parts.peek().is_none() {
7354            break;
7355        }
7356        if !prefix.is_empty() {
7357            prefix.push(b'/');
7358        }
7359        prefix.extend_from_slice(part);
7360        let dir = worktree_root.join(repo_path_to_os_path(&prefix)?);
7361        read_dir_attribute_patterns_for_base(&dir, &prefix, &mut matcher)?;
7362    }
7363    read_attribute_patterns(
7364        worktree_root.join(".git").join("info").join("attributes"),
7365        &mut matcher,
7366        &[],
7367        b".git/info/attributes",
7368    );
7369    Ok(matcher.attributes_for_path(path, &requested, false))
7370}
7371
7372/// Compute filtering attributes for a checkout (blob -> worktree).
7373///
7374/// `git checkout -- <pathspec>` / `git restore` materialize through git's
7375/// **default** attr direction, which is `GIT_ATTR_CHECKIN` (attr.c: the static
7376/// `direction` is zero-initialized and `builtin/checkout.c` never overrides it
7377/// for the pathspec path). Under that direction `read_attr` reads each
7378/// `.gitattributes` frame from the **worktree file first**, falling back to the
7379/// staged blob only when no worktree file exists at that directory level
7380/// (sparse-checkout). This is the precedence the smudge filter must use:
7381/// t0027 commits an *empty* root `.gitattributes`, then overwrites the worktree
7382/// copy with `*.txt text eol=crlf` *without re-staging* — and git's checkout
7383/// still honours the worktree copy. Reading the index alone (or index-first)
7384/// made checkout under-convert line endings, because the staged blob was empty.
7385fn smudge_attribute_checks_from_index(
7386    worktree_root: &Path,
7387    git_dir: &Path,
7388    format: ObjectFormat,
7389    path: &[u8],
7390) -> Result<Vec<AttributeCheck>> {
7391    let requested = filter_attribute_names();
7392    let mut matcher = AttributeMatcher::default();
7393    if !matcher.read_configured_attributes(worktree_root) {
7394        matcher.read_default_global_attributes();
7395    }
7396
7397    // Build the set of `.gitattributes` blobs the index carries, keyed by the
7398    // directory they govern, so each ancestry frame can prefer the staged copy.
7399    let index_attributes = index_gitattributes_by_base(git_dir, format)?;
7400
7401    // Walk root -> ... -> the file's parent directory, folding each frame's
7402    // `.gitattributes` in shallow-to-deep order so deeper directories win.
7403    fold_checkout_attribute_frame(
7404        worktree_root,
7405        &[],
7406        &index_attributes,
7407        &mut matcher,
7408    )?;
7409    let mut prefix = Vec::new();
7410    let mut parts = path.split(|byte| *byte == b'/').peekable();
7411    while let Some(part) = parts.next() {
7412        if parts.peek().is_none() {
7413            break;
7414        }
7415        if !prefix.is_empty() {
7416            prefix.push(b'/');
7417        }
7418        prefix.extend_from_slice(part);
7419        let dir = worktree_root.join(repo_path_to_os_path(&prefix)?);
7420        fold_checkout_attribute_frame(&dir, &prefix, &index_attributes, &mut matcher)?;
7421    }
7422
7423    read_attribute_patterns(
7424        worktree_root.join(".git").join("info").join("attributes"),
7425        &mut matcher,
7426        &[],
7427        b".git/info/attributes",
7428    );
7429    Ok(matcher.attributes_for_path(path, &requested, false))
7430}
7431
7432/// Fold the `.gitattributes` governing directory `base` (whose on-disk location
7433/// is `dir`) into `matcher`, preferring the worktree file and falling back to
7434/// the staged blob. Mirrors one attr-stack frame under `GIT_ATTR_CHECKIN`
7435/// (git's default direction, used by `checkout -- <pathspec>` / `restore`).
7436fn fold_checkout_attribute_frame(
7437    dir: &Path,
7438    base: &[u8],
7439    index_attributes: &BTreeMap<Vec<u8>, Vec<u8>>,
7440    matcher: &mut AttributeMatcher,
7441) -> Result<()> {
7442    let worktree_file = dir.join(".gitattributes");
7443    if let Ok(contents) = fs::read(&worktree_file) {
7444        // A worktree `.gitattributes` exists at this level: it wins outright
7445        // (git only consults the index when the worktree file is absent).
7446        read_attribute_patterns_from_bytes(&contents, matcher, base);
7447    } else if let Some(contents) = index_attributes.get(base) {
7448        read_attribute_patterns_from_bytes(contents, matcher, base);
7449    }
7450    Ok(())
7451}
7452
7453/// Read every staged `.gitattributes` blob, keyed by the repo-relative directory
7454/// it governs (`""` for the worktree root). Stage-0 blob entries only.
7455fn index_gitattributes_by_base(
7456    git_dir: &Path,
7457    format: ObjectFormat,
7458) -> Result<BTreeMap<Vec<u8>, Vec<u8>>> {
7459    let mut map = BTreeMap::new();
7460    let index_path = repository_index_path(git_dir);
7461    if !index_path.exists() {
7462        return Ok(map);
7463    }
7464    let db = FileObjectDatabase::from_git_dir(git_dir, format);
7465    let entries = Index::parse(&fs::read(index_path)?, format)?.entries;
7466    for entry in entries {
7467        let is_attributes_file =
7468            entry.path == b".gitattributes" || entry.path.as_bytes().ends_with(b"/.gitattributes");
7469        if index_entry_stage(&entry) != 0
7470            || tree_entry_object_type(entry.mode) != ObjectType::Blob
7471            || !is_attributes_file
7472        {
7473            continue;
7474        }
7475        let base = match entry.path.as_bytes().strip_suffix(b".gitattributes") {
7476            Some(b"") => Vec::new(),
7477            Some(parent) => parent.strip_suffix(b"/").unwrap_or(parent).to_vec(),
7478            None => continue,
7479        };
7480        let object = db
7481            .read_object(&entry.oid)
7482            .map_err(|err| expect_missing_object_kind(err, entry.oid, MissingObjectKind::Blob))?;
7483        if object.object_type == ObjectType::Blob {
7484            map.insert(base, object.body.clone());
7485        }
7486    }
7487    Ok(map)
7488}
7489
7490fn filter_attribute_names() -> Vec<Vec<u8>> {
7491    // `crlf` is git's legacy alias for `text` (convert.c registers both); it is
7492    // consulted as a fallback when `text` is unspecified, so we must resolve it.
7493    vec![
7494        b"text".to_vec(),
7495        b"crlf".to_vec(),
7496        b"eol".to_vec(),
7497        b"filter".to_vec(),
7498    ]
7499}
7500
7501// ---------------------------------------------------------------------------
7502// `ls-files --eol` line-ending information
7503//
7504// Git's `git ls-files --eol` prints, for each path, three fields:
7505//   i/<stat>  — line-ending statistics of the *index* blob content
7506//   w/<stat>  — line-ending statistics of the *worktree* file content
7507//   attr/<a>  — the resolved crlf/eol attribute action (attributes only, no
7508//               config) — `get_convert_attr_ascii` in convert.c
7509// The two stat fields mirror `gather_convert_stats_ascii`; the attr field
7510// mirrors `convert_attrs` up to `ca->attr_action` (i.e. *before* the config
7511// derived `text` -> input/crlf substitution and the `core.autocrlf` fallback).
7512// ---------------------------------------------------------------------------
7513
7514/// Line-ending statistics of a byte buffer, mirroring convert.c `gather_stats`.
7515#[derive(Clone)]
7516struct ConvertStats {
7517    nul: u32,
7518    lonecr: u32,
7519    lonelf: u32,
7520    crlf: u32,
7521    printable: u32,
7522    nonprintable: u32,
7523}
7524
7525fn gather_convert_stats(buf: &[u8]) -> ConvertStats {
7526    let mut stats = ConvertStats {
7527        nul: 0,
7528        lonecr: 0,
7529        lonelf: 0,
7530        crlf: 0,
7531        printable: 0,
7532        nonprintable: 0,
7533    };
7534    let mut i = 0;
7535    while i < buf.len() {
7536        let c = buf[i];
7537        if c == b'\r' {
7538            if buf.get(i + 1) == Some(&b'\n') {
7539                stats.crlf += 1;
7540                i += 1;
7541            } else {
7542                stats.lonecr += 1;
7543            }
7544            i += 1;
7545            continue;
7546        }
7547        if c == b'\n' {
7548            stats.lonelf += 1;
7549            i += 1;
7550            continue;
7551        }
7552        if c == 127 {
7553            // DEL
7554            stats.nonprintable += 1;
7555        } else if c < 32 {
7556            match c {
7557                // BS, HT, ESC and FF are printable.
7558                0x08 | 0x09 | 0x1b | 0x0c => stats.printable += 1,
7559                0 => {
7560                    stats.nul += 1;
7561                    stats.nonprintable += 1;
7562                }
7563                _ => stats.nonprintable += 1,
7564            }
7565        } else {
7566            stats.printable += 1;
7567        }
7568        i += 1;
7569    }
7570    // A trailing EOF (^Z, 0x1a) is not counted as non-printable.
7571    if buf.last() == Some(&0x1a) {
7572        stats.nonprintable = stats.nonprintable.saturating_sub(1);
7573    }
7574    stats
7575}
7576
7577/// Mirror of convert.c `has_crlf_in_index`: whether the blob currently recorded
7578/// in the index for this path is non-binary text containing a CRLF. Used only by
7579/// the auto-crlf safecrlf decision to keep an already-CRLF index blob from being
7580/// silently collapsed. A missing/unreadable blob (or a non-blob entry) counts as
7581/// "no CRLF", matching git's `read_blob_data_from_index` returning NULL.
7582fn has_crlf_in_index(odb: &FileObjectDatabase, oid: &ObjectId) -> bool {
7583    let Ok(object) = odb.read_object(oid) else {
7584        return false;
7585    };
7586    if object.object_type != ObjectType::Blob {
7587        return false;
7588    }
7589    let data = &object.body;
7590    // git short-circuits on the first '\r' via memchr before gathering stats.
7591    if !data.contains(&b'\r') {
7592        return false;
7593    }
7594    let stats = gather_convert_stats(data);
7595    !convert_is_binary(&stats) && stats.crlf > 0
7596}
7597
7598/// Mirror of convert.c `convert_is_binary`: a lone CR or NUL, or a high
7599/// non-printable ratio, marks the content as binary.
7600fn convert_is_binary(stats: &ConvertStats) -> bool {
7601    if stats.lonecr > 0 {
7602        return true;
7603    }
7604    if stats.nul > 0 {
7605        return true;
7606    }
7607    (stats.printable >> 7) < stats.nonprintable
7608}
7609
7610/// The `core.safecrlf` round-trip-warning mode, mirroring git's
7611/// `global_conv_flags_eol` (environment.c). git's *default* — when
7612/// `core.safecrlf` is unset — is [`ConvFlags::Warn`], so the warning fires even
7613/// without any explicit config.
7614#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7615pub enum ConvFlags {
7616    /// `core.safecrlf=false`: never warn.
7617    Off,
7618    /// `core.safecrlf=warn` (and the unset default): emit a warning when a
7619    /// CRLF<->LF round-trip would not be reversible.
7620    Warn,
7621    /// `core.safecrlf=true`: die instead of warn.
7622    Die,
7623}
7624
7625impl ConvFlags {
7626    /// Resolve `core.safecrlf` from config, mirroring environment.c
7627    /// `git_default_core_config`: `warn` -> [`ConvFlags::Warn`], a boolean-true
7628    /// value -> [`ConvFlags::Die`], a boolean-false value -> [`ConvFlags::Off`].
7629    /// When the key is absent git leaves `global_conv_flags_eol` at its initial
7630    /// [`ConvFlags::Warn`], so unset also resolves to [`ConvFlags::Warn`].
7631    pub fn from_config(config: &GitConfig) -> Self {
7632        match config.get("core", None, "safecrlf") {
7633            Some(value) if value.eq_ignore_ascii_case("warn") => ConvFlags::Warn,
7634            Some(_) => {
7635                if config.get_bool("core", None, "safecrlf") == Some(true) {
7636                    ConvFlags::Die
7637                } else {
7638                    ConvFlags::Off
7639                }
7640            }
7641            None => ConvFlags::Warn,
7642        }
7643    }
7644}
7645
7646/// Mirror of convert.c `check_global_conv_flags_eol`: compare the pre-conversion
7647/// `old_stats` against the simulated round-trip `new_stats` and, when the
7648/// CRLF/LF content would not survive a clean+smudge cycle, warn (or die under
7649/// `core.safecrlf=true`).
7650///
7651/// Returns `Err(GitError::Exit(128))` when `flags` is [`ConvFlags::Die`] and the
7652/// round-trip is irreversible (git `die`s with exit 128 here); otherwise prints
7653/// the warning to stderr and returns `Ok(())`. This is a pure stderr-side
7654/// effect: it never changes the bytes written to the object store.
7655fn check_safe_crlf(
7656    old_stats: &ConvertStats,
7657    new_stats: &ConvertStats,
7658    flags: ConvFlags,
7659    path: &[u8],
7660) -> Result<()> {
7661    if flags == ConvFlags::Off {
7662        return Ok(());
7663    }
7664    let display = String::from_utf8_lossy(path);
7665    if old_stats.crlf > 0 && new_stats.crlf == 0 {
7666        // CRLFs would not be restored by checkout.
7667        match flags {
7668            ConvFlags::Die => {
7669                eprintln!("fatal: CRLF would be replaced by LF in {display}");
7670                return Err(GitError::Exit(128));
7671            }
7672            ConvFlags::Warn => {
7673                eprintln!(
7674                    "warning: in the working copy of '{display}', CRLF will be replaced by LF the next time Git touches it"
7675                );
7676            }
7677            ConvFlags::Off => unreachable!("handled above"),
7678        }
7679    } else if old_stats.lonelf > 0 && new_stats.lonelf == 0 {
7680        // CRLFs would be added by checkout.
7681        match flags {
7682            ConvFlags::Die => {
7683                eprintln!("fatal: LF would be replaced by CRLF in {display}");
7684                return Err(GitError::Exit(128));
7685            }
7686            ConvFlags::Warn => {
7687                eprintln!(
7688                    "warning: in the working copy of '{display}', LF will be replaced by CRLF the next time Git touches it"
7689                );
7690            }
7691            ConvFlags::Off => unreachable!("handled above"),
7692        }
7693    }
7694    Ok(())
7695}
7696
7697/// Compute the `i/` or `w/` stat string for `content`, mirroring
7698/// convert.c `gather_convert_stats_ascii`.
7699fn convert_stats_ascii(content: &[u8]) -> &'static str {
7700    if content.is_empty() {
7701        return "none";
7702    }
7703    let stats = gather_convert_stats(content);
7704    if convert_is_binary(&stats) {
7705        return "-text";
7706    }
7707    match (stats.lonelf > 0, stats.crlf > 0) {
7708        (true, false) => "lf",
7709        (false, true) => "crlf",
7710        (true, true) => "mixed",
7711        (false, false) => "none",
7712    }
7713}
7714
7715/// The resolved crlf/eol attribute action for a path, mirroring convert.c
7716/// `convert_attrs` up to `ca->attr_action` (attributes only, no config), and
7717/// `get_convert_attr_ascii` for the ascii spelling.
7718fn convert_attr_ascii(checks: &[AttributeCheck]) -> &'static str {
7719    fn state_of<'a>(checks: &'a [AttributeCheck], name: &[u8]) -> Option<&'a AttributeState> {
7720        checks
7721            .iter()
7722            .find(|check| check.attribute == name)
7723            .and_then(|check| check.state.as_ref())
7724    }
7725
7726    // git_path_check_crlf: ATTR_TRUE -> TEXT, ATTR_FALSE -> BINARY,
7727    // ATTR_UNSET -> (fall through), "input" -> TEXT_INPUT, "auto" -> AUTO,
7728    // anything else -> UNDEFINED.
7729    #[derive(Clone, Copy, PartialEq)]
7730    enum Action {
7731        Undefined,
7732        Binary,
7733        Text,
7734        TextInput,
7735        TextCrlf,
7736        Auto,
7737        AutoCrlf,
7738        AutoInput,
7739    }
7740    fn check_crlf(state: Option<&AttributeState>) -> Action {
7741        match state {
7742            Some(AttributeState::Set) => Action::Text,
7743            Some(AttributeState::Unset) => Action::Binary,
7744            Some(AttributeState::Value(value)) if value == b"input" => Action::TextInput,
7745            Some(AttributeState::Value(value)) if value == b"auto" => Action::Auto,
7746            // ATTR_UNSET / any other value -> CRLF_UNDEFINED.
7747            _ => Action::Undefined,
7748        }
7749    }
7750
7751    // Resolve from the `text` attribute, then fall back to the legacy `crlf`
7752    // alias only when `text` left the action undefined.
7753    let mut action = check_crlf(state_of(checks, b"text"));
7754    if action == Action::Undefined {
7755        action = check_crlf(state_of(checks, b"crlf"));
7756    }
7757
7758    if action != Action::Binary {
7759        // git_path_check_eol: only "lf"/"crlf" values matter.
7760        let eol = match state_of(checks, b"eol") {
7761            Some(AttributeState::Value(value)) if value == b"lf" => Some(false),
7762            Some(AttributeState::Value(value)) if value == b"crlf" => Some(true),
7763            _ => None,
7764        };
7765        action = match (action, eol) {
7766            (Action::Auto, Some(false)) => Action::AutoInput,
7767            (Action::Auto, Some(true)) => Action::AutoCrlf,
7768            (_, Some(false)) if action != Action::Auto => Action::TextInput,
7769            (_, Some(true)) if action != Action::Auto => Action::TextCrlf,
7770            _ => action,
7771        };
7772    }
7773
7774    match action {
7775        Action::Undefined => "",
7776        Action::Binary => "-text",
7777        Action::Text => "text",
7778        Action::TextInput => "text eol=lf",
7779        Action::TextCrlf => "text eol=crlf",
7780        Action::Auto => "text=auto",
7781        Action::AutoCrlf => "text=auto eol=crlf",
7782        Action::AutoInput => "text=auto eol=lf",
7783    }
7784}
7785
7786/// The three `ls-files --eol` fields for a single path.
7787pub struct EolInfo {
7788    /// Stat of the index blob (`i/...`); empty when there is no index blob.
7789    pub index: &'static str,
7790    /// Stat of the worktree file (`w/...`); empty when the file is absent.
7791    pub worktree: &'static str,
7792    /// Resolved crlf/eol attribute action (`attr/...`).
7793    pub attr: &'static str,
7794}
7795
7796impl EolInfo {
7797    /// Format as git's `ls-files --eol` prefix: `i/%-5s w/%-5s attr/%-17s\t`.
7798    pub fn format_prefix(&self) -> String {
7799        format!(
7800            "i/{:<5} w/{:<5} attr/{:<17}\t",
7801            self.index, self.worktree, self.attr
7802        )
7803    }
7804}
7805
7806/// Compute the `ls-files --eol` info for `path`.
7807///
7808/// `index_content` is the raw index blob bytes (None when the path has no
7809/// index entry or is not a regular file). The worktree file is read from
7810/// `worktree_root/path`; if it is absent or not a regular file the `w/` field
7811/// is empty. Attributes are resolved from the worktree `.gitattributes` chain
7812/// via `attr_checks`.
7813pub fn eol_info_for_path(
7814    worktree_root: impl AsRef<Path>,
7815    path: &[u8],
7816    index_content: Option<&[u8]>,
7817    attr_checks: &[AttributeCheck],
7818) -> EolInfo {
7819    let index = index_content.map(convert_stats_ascii).unwrap_or("");
7820
7821    let worktree_root = worktree_root.as_ref();
7822    let worktree = match repo_path_to_os_path(path) {
7823        Ok(rel) => {
7824            let absolute = worktree_root.join(rel);
7825            match fs::symlink_metadata(&absolute) {
7826                // git: only regular files get a `w/` stat (lstat + S_ISREG).
7827                Ok(meta) if meta.file_type().is_file() => match fs::read(&absolute) {
7828                    Ok(content) => convert_stats_ascii_owned(&content),
7829                    Err(_) => "",
7830                },
7831                _ => "",
7832            }
7833        }
7834        Err(_) => "",
7835    };
7836
7837    let attr = convert_attr_ascii(attr_checks);
7838
7839    EolInfo {
7840        index,
7841        worktree,
7842        attr,
7843    }
7844}
7845
7846/// `convert_stats_ascii` over an owned buffer; the result is a `'static` str so
7847/// the buffer can be dropped.
7848fn convert_stats_ascii_owned(content: &[u8]) -> &'static str {
7849    convert_stats_ascii(content)
7850}
7851
7852/// Resolve the crlf/eol/text/filter attributes for `path` from the worktree
7853/// `.gitattributes` chain (the set `ls-files --eol` needs for its `attr/`
7854/// field).
7855pub fn eol_attribute_checks(
7856    worktree_root: impl AsRef<Path>,
7857    path: &[u8],
7858) -> Result<Vec<AttributeCheck>> {
7859    filter_attribute_checks(worktree_root.as_ref(), path)
7860}
7861
7862pub fn deleted_index_entries(
7863    worktree_root: impl AsRef<Path>,
7864    git_dir: impl AsRef<Path>,
7865    format: ObjectFormat,
7866) -> Result<Vec<IndexEntry>> {
7867    let worktree_root = worktree_root.as_ref();
7868    let git_dir = git_dir.as_ref();
7869    let index_path = repository_index_path(git_dir);
7870    if !index_path.exists() {
7871        return Ok(Vec::new());
7872    }
7873    let index = Index::parse(&fs::read(index_path)?, format)?;
7874    let mut deleted = Vec::new();
7875    for entry in index.entries {
7876        if !worktree_path(worktree_root, entry.path.as_bytes())?.exists() {
7877            deleted.push(entry);
7878        }
7879    }
7880    Ok(deleted)
7881}
7882
7883pub fn modified_index_entries(
7884    worktree_root: impl AsRef<Path>,
7885    git_dir: impl AsRef<Path>,
7886    format: ObjectFormat,
7887) -> Result<Vec<IndexEntry>> {
7888    let worktree_root = worktree_root.as_ref();
7889    let git_dir = git_dir.as_ref();
7890    let index_path = repository_index_path(git_dir);
7891    if !index_path.exists() {
7892        return Ok(Vec::new());
7893    }
7894    let index = Index::parse(&fs::read(&index_path)?, format)?;
7895    // Reuse the same racy-git stat shortcut here: build the cache from the index
7896    // we just parsed (no second parse) so the worktree walk can skip re-hashing
7897    // unchanged files. A cached oid is only trusted on a non-racy stat match, so
7898    // genuinely modified files still fall through to a hash and are reported.
7899    let stat_cache = IndexStatCache::from_index(&index, &index_path);
7900    let worktree = worktree_entries_with_stat_cache(
7901        worktree_root,
7902        git_dir,
7903        format,
7904        Some(&stat_cache),
7905        None,
7906        None,
7907    )?;
7908    let mut modified = Vec::new();
7909    for entry in index.entries {
7910        let Some(worktree_entry) = worktree.get(entry.path.as_bytes()) else {
7911            modified.push(entry);
7912            continue;
7913        };
7914        if worktree_entry.mode != entry.mode || worktree_entry.oid != entry.oid {
7915            modified.push(entry);
7916        }
7917    }
7918    Ok(modified)
7919}
7920
7921pub fn checkout_branch(
7922    worktree_root: impl AsRef<Path>,
7923    git_dir: impl AsRef<Path>,
7924    format: ObjectFormat,
7925    branch: &str,
7926    committer: Vec<u8>,
7927) -> Result<CheckoutResult> {
7928    let worktree_root = worktree_root.as_ref();
7929    let git_dir = git_dir.as_ref();
7930    let branch_ref = branch_ref_name(branch)?;
7931    let refs = FileRefStore::new(git_dir, format);
7932    let target = match sley_refs::resolve_ref_peeled(&refs, &branch_ref)? {
7933        Some(oid) => oid,
7934        None => {
7935            checkout_switch_head_symbolic(&refs, branch_ref, committer, branch, None, None)?;
7936            return Ok(CheckoutResult {
7937                branch: branch.into(),
7938                oid: ObjectId::null(format),
7939                files: 0,
7940            });
7941        }
7942    };
7943    let current_head = resolve_head_commit_oid(git_dir, format)?;
7944    let files = if current_head == Some(target) {
7945        0
7946    } else {
7947        checkout_commit_to_index_and_worktree(worktree_root, git_dir, format, &target)?
7948    };
7949    checkout_switch_head_symbolic(
7950        &refs,
7951        branch_ref,
7952        committer,
7953        branch,
7954        Some(target),
7955        Some(target),
7956    )?;
7957    Ok(CheckoutResult {
7958        branch: branch.into(),
7959        oid: target,
7960        files,
7961    })
7962}
7963
7964pub fn checkout_detached(
7965    worktree_root: impl AsRef<Path>,
7966    git_dir: impl AsRef<Path>,
7967    format: ObjectFormat,
7968    target: &ObjectId,
7969    committer: Vec<u8>,
7970    message: Vec<u8>,
7971) -> Result<CheckoutResult> {
7972    let worktree_root = worktree_root.as_ref();
7973    let git_dir = git_dir.as_ref();
7974    let files = checkout_commit_to_index_and_worktree(worktree_root, git_dir, format, target)?;
7975    let refs = FileRefStore::new(git_dir, format);
7976    let zero = ObjectId::null(format);
7977    let mut tx = refs.transaction();
7978    tx.update(RefUpdate {
7979        name: "HEAD".into(),
7980        expected: None,
7981        new: RefTarget::Direct(*target),
7982        reflog: Some(ReflogEntry {
7983            old_oid: zero,
7984            new_oid: *target,
7985            committer,
7986            message,
7987        }),
7988    });
7989    tx.commit()?;
7990    Ok(CheckoutResult {
7991        branch: target.to_string(),
7992        oid: *target,
7993        files,
7994    })
7995}
7996
7997/// Like [`checkout_branch`], but runs the smudge-side content filters
7998/// (`core.autocrlf`/`text`/`eol` EOL conversion and `filter.<name>.smudge`
7999/// drivers) on each blob as it is written to the worktree. `config` is the
8000/// repository config used to resolve the filters.
8001pub fn checkout_branch_filtered(
8002    worktree_root: impl AsRef<Path>,
8003    git_dir: impl AsRef<Path>,
8004    format: ObjectFormat,
8005    branch: &str,
8006    committer: Vec<u8>,
8007    config: &GitConfig,
8008) -> Result<CheckoutResult> {
8009    let worktree_root = worktree_root.as_ref();
8010    let git_dir = git_dir.as_ref();
8011    let branch_ref = branch_ref_name(branch)?;
8012    let refs = FileRefStore::new(git_dir, format);
8013    let target = match sley_refs::resolve_ref_peeled(&refs, &branch_ref)? {
8014        Some(oid) => oid,
8015        None => {
8016            checkout_switch_head_symbolic(&refs, branch_ref, committer, branch, None, None)?;
8017            return Ok(CheckoutResult {
8018                branch: branch.into(),
8019                oid: ObjectId::null(format),
8020                files: 0,
8021            });
8022        }
8023    };
8024    let current_head = resolve_head_commit_oid(git_dir, format)?;
8025    let files = if current_head == Some(target) {
8026        0
8027    } else {
8028        checkout_commit_to_index_and_worktree_filtered(
8029            worktree_root,
8030            git_dir,
8031            format,
8032            &target,
8033            Some(config),
8034        )?
8035    };
8036    checkout_switch_head_symbolic(
8037        &refs,
8038        branch_ref,
8039        committer,
8040        branch,
8041        Some(target),
8042        Some(target),
8043    )?;
8044    Ok(CheckoutResult {
8045        branch: branch.into(),
8046        oid: target,
8047        files,
8048    })
8049}
8050
8051/// Like [`checkout_detached`], but runs the smudge-side content filters (see
8052/// [`checkout_branch_filtered`]).
8053pub fn checkout_detached_filtered(
8054    worktree_root: impl AsRef<Path>,
8055    git_dir: impl AsRef<Path>,
8056    format: ObjectFormat,
8057    target: &ObjectId,
8058    committer: Vec<u8>,
8059    message: Vec<u8>,
8060    config: &GitConfig,
8061) -> Result<CheckoutResult> {
8062    let worktree_root = worktree_root.as_ref();
8063    let git_dir = git_dir.as_ref();
8064    let files = checkout_commit_to_index_and_worktree_filtered(
8065        worktree_root,
8066        git_dir,
8067        format,
8068        target,
8069        Some(config),
8070    )?;
8071    let refs = FileRefStore::new(git_dir, format);
8072    let zero = ObjectId::null(format);
8073    let mut tx = refs.transaction();
8074    tx.update(RefUpdate {
8075        name: "HEAD".into(),
8076        expected: None,
8077        new: RefTarget::Direct(*target),
8078        reflog: Some(ReflogEntry {
8079            old_oid: zero,
8080            new_oid: *target,
8081            committer,
8082            message,
8083        }),
8084    });
8085    tx.commit()?;
8086    Ok(CheckoutResult {
8087        branch: target.to_string(),
8088        oid: *target,
8089        files,
8090    })
8091}
8092
8093fn checkout_commit_to_index_and_worktree(
8094    worktree_root: &Path,
8095    git_dir: &Path,
8096    format: ObjectFormat,
8097    target: &ObjectId,
8098) -> Result<usize> {
8099    checkout_commit_to_index_and_worktree_filtered(worktree_root, git_dir, format, target, None)
8100}
8101
8102/// Like [`checkout_commit_to_index_and_worktree`] but optionally runs the
8103/// smudge-side content filters (see [`apply_smudge_filter`]) on each blob before
8104/// it is written to the worktree. Attribute lookups use the `.gitattributes`
8105/// recorded in the *target tree* so the rules of the checked-out commit apply.
8106fn checkout_commit_to_index_and_worktree_filtered(
8107    worktree_root: &Path,
8108    git_dir: &Path,
8109    format: ObjectFormat,
8110    target: &ObjectId,
8111    smudge_config: Option<&GitConfig>,
8112) -> Result<usize> {
8113    let status = short_status(worktree_root, git_dir, format)?;
8114    if status
8115        .iter()
8116        .any(|entry| !status_entry_is_untracked_or_ignored(entry))
8117    {
8118        return Err(GitError::Transaction(
8119            "checkout requires a clean working tree".into(),
8120        ));
8121    }
8122    let db = FileObjectDatabase::from_git_dir(git_dir, format);
8123    let commit = read_commit(&db, format, target)?;
8124    let mut target_entries = BTreeMap::new();
8125    collect_tree_entries(&db, format, &commit.tree, &mut target_entries)?;
8126
8127    let attributes = smudge_config
8128        .map(|_| build_tree_attribute_matcher(worktree_root, &db, format, &commit.tree))
8129        .transpose()?;
8130
8131    for path in read_index_entries(git_dir, format)?.keys() {
8132        if !target_entries.contains_key(path) {
8133            remove_worktree_file(worktree_root, path)?;
8134        }
8135    }
8136
8137    let mut index_entries = Vec::new();
8138    for (path, entry) in &target_entries {
8139        // Gitlinks go through the shared materialization step (mkdir + zeroed
8140        // stat); smudge filters never apply to a submodule directory.
8141        if entry.mode == 0o160000 {
8142            index_entries.push(materialize_tree_entry(&db, worktree_root, path, entry)?);
8143            continue;
8144        }
8145        let object = read_expected_object(&db, &entry.oid, ObjectType::Blob)?;
8146        let body: Cow<'_, [u8]> = match (smudge_config, &attributes) {
8147            (Some(config), Some(matcher)) => {
8148                let checks = matcher.attributes_for_path(path, &filter_attribute_names(), false);
8149                apply_smudge_filter_with_attributes_cow(config, &checks, path, &object.body)?
8150            }
8151            _ => Cow::Borrowed(&object.body),
8152        };
8153        let file_path = worktree_path(worktree_root, path)?;
8154        if let Some(parent) = file_path.parent() {
8155            fs::create_dir_all(parent)?;
8156        }
8157        fs::write(&file_path, &body)?;
8158        set_worktree_file_mode(&file_path, entry.mode)?;
8159        let metadata = fs::metadata(&file_path)?;
8160        let mut index_entry = index_entry_from_metadata(path.clone(), entry.oid, &metadata);
8161        index_entry.mode = entry.mode;
8162        index_entries.push(index_entry);
8163    }
8164    index_entries.sort_by(|left, right| left.path.cmp(&right.path));
8165    fs::write(
8166        repository_index_path(git_dir),
8167        Index {
8168            version: 2,
8169            entries: index_entries,
8170            extensions: Vec::new(),
8171            checksum: None,
8172        }
8173        .write(format)?,
8174    )?;
8175    Ok(target_entries.len())
8176}
8177
8178/// Build an [`AttributeMatcher`] from the `.gitattributes` files contained in a
8179/// tree, plus the repo-level (`core.attributesFile`, `.git/info/attributes`)
8180/// sources, mirroring [`standard_attributes_for_path_from_tree`].
8181fn build_tree_attribute_matcher(
8182    worktree_root: &Path,
8183    db: &FileObjectDatabase,
8184    format: ObjectFormat,
8185    tree_oid: &ObjectId,
8186) -> Result<AttributeMatcher> {
8187    let mut matcher = AttributeMatcher::default();
8188    if !matcher.read_configured_attributes(worktree_root) {
8189        matcher.read_default_global_attributes();
8190    }
8191    collect_attribute_patterns_from_tree(db, format, tree_oid, Vec::new(), &mut matcher)?;
8192    read_attribute_patterns(
8193        worktree_root.join(".git").join("info").join("attributes"),
8194        &mut matcher,
8195        &[],
8196        b".git/info/attributes",
8197    );
8198    Ok(matcher)
8199}
8200
8201/// Sparse- and skip-worktree-aware variant of
8202/// [`checkout_commit_to_index_and_worktree`].
8203///
8204/// When `sparse` is `None` this behaves like the plain checkout except that it
8205/// preserves any pre-existing skip-worktree bits (so an already-sparse worktree
8206/// is not silently re-expanded). When `sparse` is `Some`, every target path is
8207/// additionally classified against the patterns: in-cone paths are written and
8208/// have their skip-worktree bit cleared, while out-of-cone paths are left out
8209/// of the worktree, get their skip-worktree bit set, and have any stale file
8210/// removed.
8211fn checkout_commit_to_index_and_worktree_sparse(
8212    worktree_root: &Path,
8213    git_dir: &Path,
8214    format: ObjectFormat,
8215    target: &ObjectId,
8216    sparse: Option<(&SparseCheckout, SparseCheckoutMode)>,
8217) -> Result<usize> {
8218    let previously_skipped = skip_worktree_paths(git_dir, format)?;
8219    let db = FileObjectDatabase::from_git_dir(git_dir, format);
8220    let commit = read_commit(&db, format, target)?;
8221    let mut target_entries = BTreeMap::new();
8222    collect_tree_entries(&db, format, &commit.tree, &mut target_entries)?;
8223
8224    // Honor skip-worktree: a path whose worktree file is intentionally absent
8225    // must not be treated as a dirty (deleted) change blocking the checkout.
8226    let status = short_status(worktree_root, git_dir, format)?;
8227    if status.iter().any(|entry| {
8228        if previously_skipped.contains(entry.path.as_slice()) {
8229            return false;
8230        }
8231        // Submodule state never blocks a checkout: upstream unpack-trees
8232        // treats gitlinks as always up-to-date (ie_match_stat refuses to pay
8233        // for a submodule dirtiness probe), so new commits / dirty content in
8234        // a submodule must not fail the branch switch.
8235        if entry.index_mode == Some(0o160000) || entry.worktree_mode == Some(0o160000) {
8236            return false;
8237        }
8238        // An untracked embedded repository where the target tree records a
8239        // gitlink is reused as-is (upstream entry.c write_entry: mkdir with
8240        // EEXIST is success), so it does not block the checkout either.
8241        if entry.index == b'?' && entry.worktree == b'?' {
8242            let path = entry
8243                .path
8244                .strip_suffix(b"/")
8245                .unwrap_or(entry.path.as_slice());
8246            if target_entries
8247                .get(path)
8248                .is_some_and(|target| target.mode == 0o160000)
8249            {
8250                return false;
8251            }
8252        }
8253        true
8254    }) {
8255        return Err(GitError::Transaction(
8256            "checkout requires a clean working tree".into(),
8257        ));
8258    }
8259
8260    let matcher = sparse.map(|(spec, mode)| SparseMatcher::new(spec, mode));
8261
8262    for path in read_index_entries(git_dir, format)?.keys() {
8263        if target_entries.contains_key(path) {
8264            continue;
8265        }
8266        // Do not disturb the worktree state of an intentionally skipped path.
8267        if previously_skipped.contains(path) {
8268            continue;
8269        }
8270        remove_worktree_file(worktree_root, path)?;
8271    }
8272
8273    let mut index_entries = Vec::new();
8274    for (path, entry) in &target_entries {
8275        let in_cone = matcher.as_ref().is_none_or(|matcher| {
8276            // A path already marked skip-worktree stays out unless it now
8277            // matches the sparse cone, mirroring upstream "honor skip-worktree".
8278            matcher.includes_file(path)
8279        });
8280        let index_entry = if in_cone {
8281            // `materialize_tree_entry` leaves flags_extended at 0, so the
8282            // skip-worktree bit is already clear for in-cone paths.
8283            materialize_tree_entry(&db, worktree_root, path, entry)?
8284        } else {
8285            // Out of cone: ensure no stale worktree file remains and synthesize
8286            // an index entry straight from the tree (no worktree metadata),
8287            // then mark it skip-worktree.
8288            remove_worktree_file(worktree_root, path)?;
8289            let mut index_entry = restored_head_index_entry(worktree_root, &db, path, entry)?;
8290            set_skip_worktree(&mut index_entry);
8291            index_entry
8292        };
8293        index_entries.push(index_entry);
8294    }
8295    index_entries.sort_by(|left, right| left.path.cmp(&right.path));
8296    let mut index = Index {
8297        version: 2,
8298        entries: index_entries,
8299        extensions: Vec::new(),
8300        checksum: None,
8301    };
8302    normalize_index_version_for_extended_flags(&mut index);
8303    fs::write(repository_index_path(git_dir), index.write(format)?)?;
8304    Ok(target_entries.len())
8305}
8306
8307fn skip_worktree_paths(git_dir: &Path, format: ObjectFormat) -> Result<BTreeSet<Vec<u8>>> {
8308    let index_path = repository_index_path(git_dir);
8309    if !index_path.exists() {
8310        return Ok(BTreeSet::new());
8311    }
8312    let index = Index::parse(&fs::read(index_path)?, format)?;
8313    Ok(index
8314        .entries
8315        .into_iter()
8316        .filter(index_entry_skip_worktree)
8317        .map(|entry| entry.path.into_bytes())
8318        .collect())
8319}
8320
8321pub fn restore_worktree_paths(
8322    worktree_root: impl AsRef<Path>,
8323    git_dir: impl AsRef<Path>,
8324    format: ObjectFormat,
8325    paths: &[PathBuf],
8326) -> Result<RestoreResult> {
8327    restore_worktree_paths_inner(
8328        worktree_root.as_ref(),
8329        git_dir.as_ref(),
8330        format,
8331        paths,
8332        None,
8333    )
8334}
8335
8336/// Like [`restore_worktree_paths`], applying the smudge-side content filters
8337/// (CRLF / ident / filter drivers) the way a checkout writes blobs.
8338pub fn restore_worktree_paths_filtered(
8339    worktree_root: impl AsRef<Path>,
8340    git_dir: impl AsRef<Path>,
8341    format: ObjectFormat,
8342    paths: &[PathBuf],
8343    config: &GitConfig,
8344) -> Result<RestoreResult> {
8345    restore_worktree_paths_inner(
8346        worktree_root.as_ref(),
8347        git_dir.as_ref(),
8348        format,
8349        paths,
8350        Some(config),
8351    )
8352}
8353
8354fn restore_worktree_paths_inner(
8355    worktree_root: &Path,
8356    git_dir: &Path,
8357    format: ObjectFormat,
8358    paths: &[PathBuf],
8359    smudge_config: Option<&GitConfig>,
8360) -> Result<RestoreResult> {
8361    let index_path = repository_index_path(git_dir);
8362    if !index_path.exists() {
8363        return Err(GitError::Exit(1));
8364    }
8365    let index = Index::parse(&fs::read(index_path)?, format)?;
8366    let db = FileObjectDatabase::from_git_dir(git_dir, format);
8367    let mut restored = BTreeSet::new();
8368    for path in paths {
8369        let absolute = if path.is_absolute() {
8370            path.clone()
8371        } else {
8372            worktree_root.join(path)
8373        };
8374        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
8375            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
8376        })?;
8377        let git_path = git_path_bytes(relative)?;
8378        let recursive = path == Path::new(".")
8379            || path.to_string_lossy().ends_with('/')
8380            || absolute.is_dir()
8381            || index_has_entry_under(&index.entries, &git_path);
8382        let mut matched = false;
8383        for entry in &index.entries {
8384            if entry.path.as_bytes() == git_path.as_slice()
8385                || (recursive && index_entry_is_under_path(entry.path.as_bytes(), &git_path))
8386            {
8387                restore_index_entry(worktree_root, git_dir, format, &db, entry, smudge_config)?;
8388                restored.insert(entry.path.clone());
8389                matched = true;
8390            }
8391        }
8392        if !matched {
8393            eprintln!(
8394                "error: pathspec '{}' did not match any file(s) known to git",
8395                path.display()
8396            );
8397            return Err(GitError::Exit(1));
8398        }
8399    }
8400    Ok(RestoreResult {
8401        restored: restored.len(),
8402    })
8403}
8404
8405pub fn restore_index_paths_from_head(
8406    worktree_root: impl AsRef<Path>,
8407    git_dir: impl AsRef<Path>,
8408    format: ObjectFormat,
8409    paths: &[PathBuf],
8410) -> Result<RestoreResult> {
8411    let worktree_root = worktree_root.as_ref();
8412    let git_dir = git_dir.as_ref();
8413    let index_path = repository_index_path(git_dir);
8414    let index = if index_path.exists() {
8415        Index::parse(&fs::read(&index_path)?, format)?
8416    } else {
8417        Index {
8418            version: 2,
8419            entries: Vec::new(),
8420            extensions: Vec::new(),
8421            checksum: None,
8422        }
8423    };
8424    let db = FileObjectDatabase::from_git_dir(git_dir, format);
8425    let head_entries = head_tree_entries(git_dir, format, &db)?;
8426    restore_index_paths_from_entries(
8427        worktree_root,
8428        git_dir,
8429        format,
8430        &db,
8431        index,
8432        &head_entries,
8433        paths,
8434    )
8435}
8436
8437pub fn restore_index_paths_from_tree(
8438    worktree_root: impl AsRef<Path>,
8439    git_dir: impl AsRef<Path>,
8440    format: ObjectFormat,
8441    tree_oid: &ObjectId,
8442    paths: &[PathBuf],
8443) -> Result<RestoreResult> {
8444    let worktree_root = worktree_root.as_ref();
8445    let git_dir = git_dir.as_ref();
8446    let index_path = repository_index_path(git_dir);
8447    let index = if index_path.exists() {
8448        Index::parse(&fs::read(&index_path)?, format)?
8449    } else {
8450        Index {
8451            version: 2,
8452            entries: Vec::new(),
8453            extensions: Vec::new(),
8454            checksum: None,
8455        }
8456    };
8457    let db = FileObjectDatabase::from_git_dir(git_dir, format);
8458    let source_entries = tree_entries(&db, format, tree_oid)?;
8459    restore_index_paths_from_entries(
8460        worktree_root,
8461        git_dir,
8462        format,
8463        &db,
8464        index,
8465        &source_entries,
8466        paths,
8467    )
8468}
8469
8470fn restore_index_paths_from_entries(
8471    worktree_root: &Path,
8472    git_dir: &Path,
8473    format: ObjectFormat,
8474    db: &FileObjectDatabase,
8475    index: Index,
8476    source_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
8477    paths: &[PathBuf],
8478) -> Result<RestoreResult> {
8479    let mut index_entries = index
8480        .entries
8481        .into_iter()
8482        .map(|entry| (entry.path.as_bytes().to_vec(), entry))
8483        .collect::<BTreeMap<_, _>>();
8484    let mut restored = BTreeSet::new();
8485    for path in paths {
8486        let absolute = if path.is_absolute() {
8487            path.clone()
8488        } else {
8489            worktree_root.join(path)
8490        };
8491        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
8492            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
8493        })?;
8494        let git_path = git_path_bytes(relative)?;
8495        let recursive = path == Path::new(".")
8496            || path.to_string_lossy().ends_with('/')
8497            || absolute.is_dir()
8498            || index_entries
8499                .keys()
8500                .any(|entry| index_entry_is_under_path(entry, &git_path))
8501            || source_entries
8502                .keys()
8503                .any(|entry| index_entry_is_under_path(entry, &git_path));
8504        let mut matched_paths = BTreeSet::new();
8505        for path in index_entries.keys().chain(source_entries.keys()) {
8506            if path.as_slice() == git_path.as_slice()
8507                || (recursive && index_entry_is_under_path(path, &git_path))
8508            {
8509                matched_paths.insert(path.clone());
8510            }
8511        }
8512        if matched_paths.is_empty() {
8513            eprintln!(
8514                "error: pathspec '{}' did not match any file(s) known to git",
8515                path.display()
8516            );
8517            return Err(GitError::Exit(1));
8518        }
8519        for path in matched_paths {
8520            if let Some(entry) = source_entries.get(&path) {
8521                // git's pathspec reset (`reset_index` → diff against the source
8522                // tree) only rewrites entries that actually CHANGE: an entry whose
8523                // oid and mode already equal the source is left untouched, so its
8524                // cached stat is preserved and `git diff-files` stays clean (t7102
8525                // "resetting an unmodified path is a no-op"). Only when the entry
8526                // genuinely changes does git write a fresh, stat-zeroed entry.
8527                let unchanged = index_entries
8528                    .get(&path)
8529                    .is_some_and(|existing| existing.oid == entry.oid && existing.mode == entry.mode);
8530                if !unchanged {
8531                    index_entries.insert(
8532                        path.clone(),
8533                        restored_head_index_entry(worktree_root, db, &path, entry)?,
8534                    );
8535                }
8536            } else {
8537                index_entries.remove(&path);
8538            }
8539            restored.insert(path);
8540        }
8541    }
8542    let mut entries = index_entries.into_values().collect::<Vec<_>>();
8543    entries.sort_by(|left, right| left.path.cmp(&right.path));
8544    fs::write(
8545        repository_index_path(git_dir),
8546        Index {
8547            version: 2,
8548            entries,
8549            extensions: Vec::new(),
8550            checksum: None,
8551        }
8552        .write(format)?,
8553    )?;
8554    Ok(RestoreResult {
8555        restored: restored.len(),
8556    })
8557}
8558
8559pub fn restore_index_and_worktree_paths_from_head(
8560    worktree_root: impl AsRef<Path>,
8561    git_dir: impl AsRef<Path>,
8562    format: ObjectFormat,
8563    paths: &[PathBuf],
8564) -> Result<RestoreResult> {
8565    let worktree_root = worktree_root.as_ref();
8566    let git_dir = git_dir.as_ref();
8567    let index_path = repository_index_path(git_dir);
8568    let index = if index_path.exists() {
8569        Index::parse(&fs::read(&index_path)?, format)?
8570    } else {
8571        Index {
8572            version: 2,
8573            entries: Vec::new(),
8574            extensions: Vec::new(),
8575            checksum: None,
8576        }
8577    };
8578    let db = FileObjectDatabase::from_git_dir(git_dir, format);
8579    let head_entries = head_tree_entries(git_dir, format, &db)?;
8580    restore_index_and_worktree_paths_from_entries(
8581        worktree_root,
8582        git_dir,
8583        format,
8584        &db,
8585        index,
8586        &head_entries,
8587        paths,
8588    )
8589}
8590
8591pub fn restore_index_and_worktree_paths_from_tree(
8592    worktree_root: impl AsRef<Path>,
8593    git_dir: impl AsRef<Path>,
8594    format: ObjectFormat,
8595    tree_oid: &ObjectId,
8596    paths: &[PathBuf],
8597) -> Result<RestoreResult> {
8598    let worktree_root = worktree_root.as_ref();
8599    let git_dir = git_dir.as_ref();
8600    let index_path = repository_index_path(git_dir);
8601    let index = if index_path.exists() {
8602        Index::parse(&fs::read(&index_path)?, format)?
8603    } else {
8604        Index {
8605            version: 2,
8606            entries: Vec::new(),
8607            extensions: Vec::new(),
8608            checksum: None,
8609        }
8610    };
8611    let db = FileObjectDatabase::from_git_dir(git_dir, format);
8612    let source_entries = tree_entries(&db, format, tree_oid)?;
8613    restore_index_and_worktree_paths_from_entries(
8614        worktree_root,
8615        git_dir,
8616        format,
8617        &db,
8618        index,
8619        &source_entries,
8620        paths,
8621    )
8622}
8623
8624fn restore_index_and_worktree_paths_from_entries(
8625    worktree_root: &Path,
8626    git_dir: &Path,
8627    format: ObjectFormat,
8628    db: &FileObjectDatabase,
8629    index: Index,
8630    source_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
8631    paths: &[PathBuf],
8632) -> Result<RestoreResult> {
8633    let mut index_entries = index
8634        .entries
8635        .into_iter()
8636        .map(|entry| (entry.path.as_bytes().to_vec(), entry))
8637        .collect::<BTreeMap<_, _>>();
8638    let mut restored = BTreeSet::new();
8639    for path in paths {
8640        let absolute = if path.is_absolute() {
8641            path.clone()
8642        } else {
8643            worktree_root.join(path)
8644        };
8645        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
8646            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
8647        })?;
8648        let git_path = git_path_bytes(relative)?;
8649        let recursive = path == Path::new(".")
8650            || path.to_string_lossy().ends_with('/')
8651            || absolute.is_dir()
8652            || index_entries
8653                .keys()
8654                .any(|entry| index_entry_is_under_path(entry, &git_path))
8655            || source_entries
8656                .keys()
8657                .any(|entry| index_entry_is_under_path(entry, &git_path));
8658        let mut matched_paths = BTreeSet::new();
8659        for path in index_entries.keys().chain(source_entries.keys()) {
8660            if path.as_slice() == git_path.as_slice()
8661                || (recursive && index_entry_is_under_path(path, &git_path))
8662            {
8663                matched_paths.insert(path.clone());
8664            }
8665        }
8666        if matched_paths.is_empty() {
8667            eprintln!(
8668                "error: pathspec '{}' did not match any file(s) known to git",
8669                path.display()
8670            );
8671            return Err(GitError::Exit(1));
8672        }
8673        for path in matched_paths {
8674            if let Some(entry) = source_entries.get(&path) {
8675                index_entries.insert(
8676                    path.clone(),
8677                    restore_head_entry_to_worktree_and_index(worktree_root, db, &path, entry)?,
8678                );
8679            } else {
8680                index_entries.remove(&path);
8681                remove_worktree_file(worktree_root, &path)?;
8682            }
8683            restored.insert(path);
8684        }
8685    }
8686    let mut entries = index_entries.into_values().collect::<Vec<_>>();
8687    entries.sort_by(|left, right| left.path.cmp(&right.path));
8688    fs::write(
8689        repository_index_path(git_dir),
8690        Index {
8691            version: 2,
8692            entries,
8693            extensions: Vec::new(),
8694            checksum: None,
8695        }
8696        .write(format)?,
8697    )?;
8698    Ok(RestoreResult {
8699        restored: restored.len(),
8700    })
8701}
8702
8703pub fn reset_index_and_worktree_to_commit(
8704    worktree_root: impl AsRef<Path>,
8705    git_dir: impl AsRef<Path>,
8706    format: ObjectFormat,
8707    commit_oid: &ObjectId,
8708) -> Result<RestoreResult> {
8709    let worktree_root = worktree_root.as_ref();
8710    let git_dir = git_dir.as_ref();
8711    let db = FileObjectDatabase::from_git_dir(git_dir, format);
8712    let commit = read_commit(&db, format, commit_oid)?;
8713    let mut target_entries = BTreeMap::new();
8714    collect_tree_entries(&db, format, &commit.tree, &mut target_entries)?;
8715
8716    for path in read_index_entries(git_dir, format)?.keys() {
8717        if !target_entries.contains_key(path) {
8718            remove_worktree_file(worktree_root, path)?;
8719        }
8720    }
8721
8722    let mut index_entries = Vec::new();
8723    for (path, entry) in &target_entries {
8724        index_entries.push(materialize_tree_entry(&db, worktree_root, path, entry)?);
8725    }
8726    index_entries.sort_by(|left, right| left.path.cmp(&right.path));
8727    fs::write(
8728        repository_index_path(git_dir),
8729        Index {
8730            version: 2,
8731            entries: index_entries,
8732            extensions: Vec::new(),
8733            checksum: None,
8734        }
8735        .write(format)?,
8736    )?;
8737    Ok(RestoreResult {
8738        restored: target_entries.len(),
8739    })
8740}
8741
8742/// Write one target tree entry into the worktree and return its index entry —
8743/// the shared materialization step for every checkout/reset worktree rebuild.
8744///
8745/// Gitlinks (mode 160000) never touch the object database: their oid names a
8746/// commit in the *submodule's* repository, not an object here. Upstream
8747/// (entry.c `write_entry` S_IFGITLINK) just mkdirs the path — an
8748/// already-populated submodule is left untouched (EEXIST is success) — and
8749/// records the oid in the index with a zeroed stat so status re-evaluates the
8750/// gitlink against the embedded repository's HEAD.
8751fn materialize_tree_entry(
8752    db: &FileObjectDatabase,
8753    worktree_root: &Path,
8754    path: &[u8],
8755    entry: &TrackedEntry,
8756) -> Result<IndexEntry> {
8757    if entry.mode == 0o160000 {
8758        let dir_path = worktree_path(worktree_root, path)?;
8759        fs::create_dir_all(&dir_path)?;
8760        return Ok(IndexEntry {
8761            ctime_seconds: 0,
8762            ctime_nanoseconds: 0,
8763            mtime_seconds: 0,
8764            mtime_nanoseconds: 0,
8765            dev: 0,
8766            ino: 0,
8767            mode: entry.mode,
8768            uid: 0,
8769            gid: 0,
8770            size: 0,
8771            oid: entry.oid,
8772            flags: path.len().min(0x0fff) as u16,
8773            flags_extended: 0,
8774            path: BString::from(path),
8775        });
8776    }
8777    let file_path = write_worktree_blob_entry(db, worktree_root, path, entry)?;
8778    let metadata = fs::symlink_metadata(&file_path)?;
8779    let mut index_entry = index_entry_from_metadata(path.to_vec(), entry.oid, &metadata);
8780    index_entry.mode = entry.mode;
8781    Ok(index_entry)
8782}
8783
8784/// Materialize a blob (or symlink) tree entry into the worktree at `path`,
8785/// returning the absolute path written. Shared by every checkout/reset worktree
8786/// rebuild so the type-change handling is identical everywhere.
8787///
8788/// Mirrors git's entry.c `write_entry`: it unlinks whatever currently occupies
8789/// the path before creating the new object, so a type transition (regular file ⇄
8790/// symlink, or a stale symlink/directory in the way) is overwritten rather than
8791/// left in place or failing with EEXIST. A plain `fs::write` follows an existing
8792/// symlink and would write *through* it (leaving the link), so the unlink is
8793/// load-bearing for the symlink-stash / reset-hard type-change cases.
8794fn write_worktree_blob_entry(
8795    db: &FileObjectDatabase,
8796    worktree_root: &Path,
8797    path: &[u8],
8798    entry: &TrackedEntry,
8799) -> Result<PathBuf> {
8800    let object = read_expected_object(db, &entry.oid, ObjectType::Blob)?;
8801    let file_path = worktree_path(worktree_root, path)?;
8802    if let Some(parent) = file_path.parent() {
8803        fs::create_dir_all(parent)?;
8804    }
8805    remove_existing_worktree_path(&file_path)?;
8806    if (entry.mode & 0o170000) == 0o120000 {
8807        // Symlink entry (mode 120000): the blob body is the link target.
8808        #[cfg(unix)]
8809        {
8810            use std::os::unix::ffi::OsStringExt;
8811            let target =
8812                std::path::PathBuf::from(std::ffi::OsString::from_vec(object.body.clone()));
8813            std::os::unix::fs::symlink(&target, &file_path)?;
8814        }
8815        #[cfg(not(unix))]
8816        fs::write(&file_path, &object.body)?;
8817    } else {
8818        fs::write(&file_path, &object.body)?;
8819        set_worktree_file_mode(&file_path, entry.mode)?;
8820    }
8821    Ok(file_path)
8822}
8823
8824/// Remove whatever currently occupies a worktree path before writing a new
8825/// object there — a symlink (even a dangling one, which `Path::exists` misses),
8826/// a regular file, or a directory subtree. Uses `symlink_metadata` (lstat) so a
8827/// symlink is removed as the link, never followed.
8828fn remove_existing_worktree_path(file_path: &Path) -> Result<()> {
8829    let metadata = match fs::symlink_metadata(file_path) {
8830        Ok(metadata) => metadata,
8831        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(()),
8832        Err(err) => return Err(err.into()),
8833    };
8834    if metadata.is_dir() {
8835        // A directory in the way of a file (D/F transition) or a populated
8836        // gitlink: remove the subtree so the file can be created.
8837        match fs::remove_dir_all(file_path) {
8838            Ok(()) => {}
8839            Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
8840            Err(err) => return Err(err.into()),
8841        }
8842    } else {
8843        fs::remove_file(file_path)?;
8844    }
8845    Ok(())
8846}
8847
8848/// chmod a freshly-materialized worktree blob to match its tree/index entry mode.
8849///
8850/// `fs::write` truncates an existing file *in place*, preserving its prior
8851/// permission bits. For a mode-only diff (identical oid, 100644 vs 100755) that
8852/// leaves the wrong exec bit on disk — which is exactly the `reset --hard` /
8853/// checkout bug this guards against. git's checkout path unlinks+recreates the
8854/// file precisely to "get the new one with the right permissions" (entry.c
8855/// `write_entry`); we instead chmod the just-written file.
8856///
8857/// Mirrors the observable result of git's `create_file` (entry.c):
8858/// `(mode & 0100) ? 0777 : 0666` masked by the standard umask (0022), i.e. 0755
8859/// for an executable entry and 0644 otherwise. Only regular-file entries (100644
8860/// / 100755) are chmod'd; gitlinks and symlinks have no meaningful exec bit.
8861///
8862/// We set the perms directly (rather than relying on a fresh `open(2)` to apply
8863/// the umask) because `fs::write` truncates an existing file in place, leaving its
8864/// old permission bits — the very thing that breaks a mode-only checkout/reset.
8865/// Matching git's default-umask output keeps the worktree byte-for-byte aligned
8866/// with the oracle, which is what the parity suite asserts.
8867#[cfg(unix)]
8868fn set_worktree_file_mode(file_path: &Path, entry_mode: u32) -> Result<()> {
8869    use std::os::unix::fs::PermissionsExt;
8870    let perms = match entry_mode {
8871        0o100755 => 0o755,
8872        0o100644 => 0o644,
8873        _ => return Ok(()),
8874    };
8875    fs::set_permissions(file_path, fs::Permissions::from_mode(perms))?;
8876    Ok(())
8877}
8878
8879#[cfg(not(unix))]
8880fn set_worktree_file_mode(_file_path: &Path, _entry_mode: u32) -> Result<()> {
8881    Ok(())
8882}
8883
8884/// Materialize a tree object into the index and worktree.
8885pub fn checkout_tree_to_index_and_worktree(
8886    worktree_root: impl AsRef<Path>,
8887    git_dir: impl AsRef<Path>,
8888    format: ObjectFormat,
8889    tree_oid: &ObjectId,
8890) -> Result<RestoreResult> {
8891    let worktree_root = worktree_root.as_ref();
8892    let git_dir = git_dir.as_ref();
8893    let db = FileObjectDatabase::from_git_dir(git_dir, format);
8894    let mut target_entries = BTreeMap::new();
8895    collect_tree_entries(&db, format, tree_oid, &mut target_entries)?;
8896
8897    for path in read_index_entries(git_dir, format)?.keys() {
8898        if !target_entries.contains_key(path) {
8899            remove_worktree_file(worktree_root, path)?;
8900        }
8901    }
8902
8903    let mut index_entries = Vec::new();
8904    for (path, entry) in &target_entries {
8905        index_entries.push(materialize_tree_entry(&db, worktree_root, path, entry)?);
8906    }
8907    index_entries.sort_by(|left, right| left.path.cmp(&right.path));
8908    fs::write(
8909        repository_index_path(git_dir),
8910        Index {
8911            version: 2,
8912            entries: index_entries,
8913            extensions: Vec::new(),
8914            checksum: None,
8915        }
8916        .write(format)?,
8917    )?;
8918    Ok(RestoreResult {
8919        restored: target_entries.len(),
8920    })
8921}
8922
8923pub fn reset_index_to_commit(
8924    worktree_root: impl AsRef<Path>,
8925    git_dir: impl AsRef<Path>,
8926    format: ObjectFormat,
8927    commit_oid: &ObjectId,
8928) -> Result<RestoreResult> {
8929    let worktree_root = worktree_root.as_ref();
8930    let git_dir = git_dir.as_ref();
8931    let db = FileObjectDatabase::from_git_dir(git_dir, format);
8932    let commit = read_commit(&db, format, commit_oid)?;
8933    let mut target_entries = BTreeMap::new();
8934    collect_tree_entries(&db, format, &commit.tree, &mut target_entries)?;
8935    // git's `reset --mixed` preserves the skip-worktree bit on entries that survive
8936    // the reset (t7102 "--mixed preserves skip-worktree"): carry it forward from the
8937    // pre-reset index keyed by path, so reconstructed entries keep CE_SKIP_WORKTREE.
8938    let index_path = repository_index_path(git_dir);
8939    let prior_skip_worktree: BTreeSet<Vec<u8>> = match fs::read(&index_path) {
8940        Ok(bytes) => Index::parse(&bytes, format)?
8941            .entries
8942            .iter()
8943            .filter(|entry| entry.is_skip_worktree())
8944            .map(|entry| entry.path.as_bytes().to_vec())
8945            .collect(),
8946        Err(err) if err.kind() == std::io::ErrorKind::NotFound => BTreeSet::new(),
8947        Err(err) => return Err(err.into()),
8948    };
8949    let mut index_entries = Vec::new();
8950    for (path, entry) in &target_entries {
8951        let mut restored = restored_head_index_entry(worktree_root, &db, path, entry)?;
8952        if prior_skip_worktree.contains(path) {
8953            restored.set_skip_worktree(true);
8954        }
8955        index_entries.push(restored);
8956    }
8957    index_entries.sort_by(|left, right| left.path.cmp(&right.path));
8958    let mut index = Index {
8959        version: 2,
8960        entries: index_entries,
8961        extensions: Vec::new(),
8962        checksum: None,
8963    };
8964    index.upgrade_version_for_flags();
8965    fs::write(&index_path, index.write(format)?)?;
8966    Ok(RestoreResult {
8967        restored: target_entries.len(),
8968    })
8969}
8970
8971/// Build a fresh in-memory index that mirrors the tree `tree_oid`, the way
8972/// `git read-tree <tree>` does: every blob, symlink, and gitlink leaf (found by
8973/// recursing subtrees) becomes a stage-0 entry carrying the tree mode and oid,
8974/// with a fully zeroed stat (so nothing is treated as stat-clean) and size 0.
8975/// Entries are sorted by path; the index is version 2 with no extensions.
8976///
8977/// This does not touch the worktree or write anything to disk — serialize the
8978/// result with [`Index::write`] (and persist it) when you want to replace
8979/// `.git/index`.
8980pub fn index_from_tree(
8981    db: &FileObjectDatabase,
8982    format: ObjectFormat,
8983    tree_oid: &ObjectId,
8984) -> Result<Index> {
8985    let mut entries: Vec<IndexEntry> = Vec::new();
8986    if *tree_oid != ObjectId::empty_tree(format) {
8987        let mut tree_entries = BTreeMap::new();
8988        collect_tree_entries(db, format, tree_oid, &mut tree_entries)?;
8989        entries.reserve(tree_entries.len());
8990        for (path, entry) in tree_entries {
8991            let name_len = (path.len().min(0x0fff)) as u16;
8992            entries.push(IndexEntry {
8993                ctime_seconds: 0,
8994                ctime_nanoseconds: 0,
8995                mtime_seconds: 0,
8996                mtime_nanoseconds: 0,
8997                dev: 0,
8998                ino: 0,
8999                mode: entry.mode,
9000                uid: 0,
9001                gid: 0,
9002                size: 0,
9003                oid: entry.oid,
9004                flags: name_len,
9005                flags_extended: 0,
9006                path: path.into(),
9007            });
9008        }
9009    }
9010    // git orders index entries by path bytes; BTreeMap already yields that, but
9011    // sort explicitly so the contract holds regardless of how entries arrive.
9012    entries.sort_by(|left, right| left.path.cmp(&right.path));
9013    Ok(Index {
9014        version: 2,
9015        entries,
9016        extensions: Vec::new(),
9017        checksum: None,
9018    })
9019}
9020
9021/// Enforces a [`SparseCheckout`] against the current index and worktree.
9022///
9023/// Every stage-0 index entry is classified with the sparse patterns (see
9024/// [`SparseCheckoutMode`] for the matching semantics):
9025///
9026/// * **In cone**: the skip-worktree bit is cleared and, if the worktree file is
9027///   missing, it is re-materialized from the entry's blob in the object
9028///   database. Existing worktree files are left untouched so local content is
9029///   preserved.
9030/// * **Out of cone**: the skip-worktree bit is set and any existing worktree
9031///   file is removed (empty parent directories are pruned).
9032///
9033/// Conflicted entries (stage != 0) are never given the skip-worktree bit and
9034/// are left alone, matching upstream Git. The index is rewritten in place.
9035pub fn apply_sparse_checkout(
9036    worktree_root: impl AsRef<Path>,
9037    git_dir: impl AsRef<Path>,
9038    format: ObjectFormat,
9039    sparse: &SparseCheckout,
9040) -> Result<ApplySparseResult> {
9041    apply_sparse_checkout_with_mode(
9042        worktree_root,
9043        git_dir,
9044        format,
9045        sparse,
9046        SparseCheckoutMode::Auto,
9047    )
9048}
9049
9050/// Like [`apply_sparse_checkout`] but lets the caller force the pattern
9051/// interpretation instead of auto-detecting it.
9052pub fn apply_sparse_checkout_with_mode(
9053    worktree_root: impl AsRef<Path>,
9054    git_dir: impl AsRef<Path>,
9055    format: ObjectFormat,
9056    sparse: &SparseCheckout,
9057    mode: SparseCheckoutMode,
9058) -> Result<ApplySparseResult> {
9059    let worktree_root = worktree_root.as_ref();
9060    let git_dir = git_dir.as_ref();
9061    let index_path = repository_index_path(git_dir);
9062    let mut index = if index_path.exists() {
9063        Index::parse(&fs::read(&index_path)?, format)?
9064    } else {
9065        return Ok(ApplySparseResult {
9066            materialized: Vec::new(),
9067            skipped: Vec::new(),
9068            not_up_to_date: Vec::new(),
9069        });
9070    };
9071    let matcher = SparseMatcher::new(sparse, mode);
9072    let db = FileObjectDatabase::from_git_dir(git_dir, format);
9073    let mut materialized = Vec::new();
9074    let mut skipped = Vec::new();
9075    let mut not_up_to_date = Vec::new();
9076    for entry in &mut index.entries {
9077        // Never touch conflicted entries.
9078        if index_entry_stage(entry) != 0 {
9079            continue;
9080        }
9081        if matcher.includes_file(entry.path.as_bytes()) {
9082            clear_skip_worktree(entry);
9083            let file_path = worktree_path(worktree_root, entry.path.as_bytes())?;
9084            if !file_path.exists() {
9085                materialize_index_entry_file(&db, &file_path, entry)?;
9086            }
9087            materialized.push(entry.path.as_bytes().to_vec());
9088        } else {
9089            // The path is out of cone, so its worktree file should be removed and
9090            // the entry marked skip-worktree. But git refuses to delete a file
9091            // that is *not up to date* with the index (e.g. one that reappeared in
9092            // the worktree after the path was already sparse): it leaves the file,
9093            // leaves the skip-worktree bit clear, and reports the path in its "not
9094            // up to date" warning. Mirror that to avoid silent data loss.
9095            let file_path = worktree_path(worktree_root, entry.path.as_bytes())?;
9096            match fs::symlink_metadata(&file_path) {
9097                Ok(metadata) if !worktree_entry_is_uptodate(entry, &metadata) => {
9098                    clear_skip_worktree(entry);
9099                    not_up_to_date.push(entry.path.as_bytes().to_vec());
9100                }
9101                _ => {
9102                    set_skip_worktree(entry);
9103                    remove_worktree_file(worktree_root, entry.path.as_bytes())?;
9104                    skipped.push(entry.path.as_bytes().to_vec());
9105                }
9106            }
9107        }
9108    }
9109    not_up_to_date.sort();
9110    normalize_index_version_for_extended_flags(&mut index);
9111    fs::write(index_path, index.write(format)?)?;
9112    Ok(ApplySparseResult {
9113        materialized,
9114        skipped,
9115        not_up_to_date,
9116    })
9117}
9118
9119/// Whether the worktree file described by `metadata` is up to date with `entry`'s
9120/// cached index stat, using the size + mtime heuristic at the core of git's
9121/// `ie_match_stat`. A freshly-checked-out (clean) file matches; a file that was
9122/// deleted and later recreated — as happens when an out-of-cone path reappears in
9123/// the worktree — gets a fresh mtime and so reads as modified, which is exactly
9124/// the state git declines to overwrite during a sparse update.
9125fn worktree_entry_is_uptodate(entry: &IndexEntry, metadata: &fs::Metadata) -> bool {
9126    if u64::from(entry.size) != metadata.len() {
9127        return false;
9128    }
9129    let Some((mtime_seconds, mtime_nanoseconds)) = file_mtime_parts(metadata) else {
9130        // Without a usable mtime we cannot prove the file is clean; treat it as
9131        // not up to date so a present file is never silently discarded.
9132        return false;
9133    };
9134    u64::from(entry.mtime_seconds) == mtime_seconds
9135        && u64::from(entry.mtime_nanoseconds) == mtime_nanoseconds
9136}
9137
9138fn worktree_entry_ref_is_uptodate(entry: &IndexEntryRef<'_>, metadata: &fs::Metadata) -> bool {
9139    if u64::from(entry.size) != metadata.len() {
9140        return false;
9141    }
9142    let Some((mtime_seconds, mtime_nanoseconds)) = file_mtime_parts(metadata) else {
9143        return false;
9144    };
9145    u64::from(entry.mtime_seconds) == mtime_seconds
9146        && u64::from(entry.mtime_nanoseconds) == mtime_nanoseconds
9147}
9148
9149/// The file's modification time split into whole seconds and the sub-second
9150/// nanosecond remainder, matching how git stores `mtime` in the index.
9151fn file_mtime_parts(metadata: &fs::Metadata) -> Option<(u64, u64)> {
9152    let modified = metadata.modified().ok()?;
9153    let duration = modified.duration_since(UNIX_EPOCH).ok()?;
9154    Some((duration.as_secs(), u64::from(duration.subsec_nanos())))
9155}
9156
9157/// Write a git metadata file through a sibling `.lock` file and atomic rename.
9158///
9159/// This helper is intended for small repository/worktree metadata files such as
9160/// `HEAD`, `config.worktree`, or state files under `.git/`. It deliberately does
9161/// not try to replace object or pack writers, which have their own durability
9162/// and naming rules.
9163pub fn write_metadata_file_atomic(
9164    path: impl AsRef<Path>,
9165    bytes: &[u8],
9166    options: AtomicMetadataWriteOptions,
9167) -> Result<AtomicMetadataWriteResult> {
9168    let path = path.as_ref();
9169    let parent = path.parent().ok_or_else(|| {
9170        GitError::InvalidPath(format!("metadata path has no parent: {}", path.display()))
9171    })?;
9172    if !parent.as_os_str().is_empty() {
9173        fs::create_dir_all(parent)?;
9174    }
9175    let lock_path = metadata_lock_path(path)?;
9176    let mut lock = match fs::OpenOptions::new()
9177        .write(true)
9178        .create_new(true)
9179        .open(&lock_path)
9180    {
9181        Ok(lock) => lock,
9182        Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => {
9183            return Err(GitError::Transaction(format!(
9184                "metadata lock already exists: {}",
9185                lock_path.display()
9186            )));
9187        }
9188        Err(err) => return Err(err.into()),
9189    };
9190    if let Err(err) = lock.write_all(bytes) {
9191        let _ = fs::remove_file(&lock_path);
9192        return Err(err.into());
9193    }
9194    if options.fsync_file
9195        && let Err(err) = lock.sync_all()
9196    {
9197        let _ = fs::remove_file(&lock_path);
9198        return Err(err.into());
9199    }
9200    drop(lock);
9201    if let Err(err) = fs::rename(&lock_path, path) {
9202        let _ = fs::remove_file(&lock_path);
9203        return Err(err.into());
9204    }
9205    if options.fsync_dir
9206        && let Ok(dir) = fs::File::open(parent)
9207    {
9208        dir.sync_all()?;
9209    }
9210    let metadata = fs::metadata(path)?;
9211    Ok(AtomicMetadataWriteResult {
9212        path: path.to_path_buf(),
9213        len: metadata.len(),
9214        mtime: file_mtime_parts(&metadata),
9215    })
9216}
9217
9218fn metadata_lock_path(path: &Path) -> Result<PathBuf> {
9219    let file_name = path.file_name().ok_or_else(|| {
9220        GitError::InvalidPath(format!("metadata path has no filename: {}", path.display()))
9221    })?;
9222    let mut lock_name = file_name.to_os_string();
9223    lock_name.push(".lock");
9224    Ok(path.with_file_name(lock_name))
9225}
9226
9227/// Checks out `target` like [`checkout_detached`], but materializes the
9228/// worktree through the supplied [`SparseCheckout`]: out-of-cone paths are not
9229/// written, get their skip-worktree bit set, and have any stale worktree file
9230/// removed. Existing public checkout entry points are unchanged; this is an
9231/// additive sparse-aware variant.
9232///
9233/// The pattern interpretation is auto-detected ([`SparseCheckoutMode::Auto`]);
9234/// to reconcile an existing checkout under an explicit mode use
9235/// [`apply_sparse_checkout_with_mode`].
9236pub fn checkout_detached_sparse(
9237    worktree_root: impl AsRef<Path>,
9238    git_dir: impl AsRef<Path>,
9239    format: ObjectFormat,
9240    target: &ObjectId,
9241    committer: Vec<u8>,
9242    message: Vec<u8>,
9243    sparse: &SparseCheckout,
9244) -> Result<CheckoutResult> {
9245    let worktree_root = worktree_root.as_ref();
9246    let git_dir = git_dir.as_ref();
9247    let files = checkout_commit_to_index_and_worktree_sparse(
9248        worktree_root,
9249        git_dir,
9250        format,
9251        target,
9252        Some((sparse, SparseCheckoutMode::Auto)),
9253    )?;
9254    let refs = FileRefStore::new(git_dir, format);
9255    let zero = ObjectId::null(format);
9256    let mut tx = refs.transaction();
9257    tx.update(RefUpdate {
9258        name: "HEAD".into(),
9259        expected: None,
9260        new: RefTarget::Direct(*target),
9261        reflog: Some(ReflogEntry {
9262            old_oid: zero,
9263            new_oid: *target,
9264            committer,
9265            message,
9266        }),
9267    });
9268    tx.commit()?;
9269    Ok(CheckoutResult {
9270        branch: target.to_string(),
9271        oid: *target,
9272        files,
9273    })
9274}
9275
9276fn materialize_index_entry_file(
9277    db: &FileObjectDatabase,
9278    file_path: &Path,
9279    entry: &IndexEntry,
9280) -> Result<()> {
9281    let object = read_expected_object(db, &entry.oid, ObjectType::Blob)?;
9282    if let Some(parent) = file_path.parent() {
9283        fs::create_dir_all(parent)?;
9284    }
9285    fs::write(file_path, &object.body)?;
9286    set_worktree_file_mode(file_path, entry.mode)?;
9287    Ok(())
9288}
9289
9290fn set_skip_worktree(entry: &mut IndexEntry) {
9291    entry.flags |= INDEX_FLAG_EXTENDED;
9292    entry.flags_extended |= INDEX_EXTENDED_FLAG_SKIP_WORKTREE;
9293}
9294
9295fn clear_skip_worktree(entry: &mut IndexEntry) {
9296    entry.flags_extended &= !INDEX_EXTENDED_FLAG_SKIP_WORKTREE;
9297    if entry.flags_extended == 0 {
9298        entry.flags &= !INDEX_FLAG_EXTENDED;
9299    }
9300}
9301
9302pub fn restore_worktree_paths_from_head(
9303    worktree_root: impl AsRef<Path>,
9304    git_dir: impl AsRef<Path>,
9305    format: ObjectFormat,
9306    paths: &[PathBuf],
9307) -> Result<RestoreResult> {
9308    let worktree_root = worktree_root.as_ref();
9309    let git_dir = git_dir.as_ref();
9310    let index_path = repository_index_path(git_dir);
9311    let index = if index_path.exists() {
9312        Index::parse(&fs::read(&index_path)?, format)?
9313    } else {
9314        Index {
9315            version: 2,
9316            entries: Vec::new(),
9317            extensions: Vec::new(),
9318            checksum: None,
9319        }
9320    };
9321    let db = FileObjectDatabase::from_git_dir(git_dir, format);
9322    let head_entries = head_tree_entries(git_dir, format, &db)?;
9323    restore_worktree_paths_from_entries(worktree_root, &db, index, &head_entries, paths)
9324}
9325
9326pub fn restore_worktree_paths_from_tree(
9327    worktree_root: impl AsRef<Path>,
9328    git_dir: impl AsRef<Path>,
9329    format: ObjectFormat,
9330    tree_oid: &ObjectId,
9331    paths: &[PathBuf],
9332) -> Result<RestoreResult> {
9333    let worktree_root = worktree_root.as_ref();
9334    let git_dir = git_dir.as_ref();
9335    let index_path = repository_index_path(git_dir);
9336    let index = if index_path.exists() {
9337        Index::parse(&fs::read(&index_path)?, format)?
9338    } else {
9339        Index {
9340            version: 2,
9341            entries: Vec::new(),
9342            extensions: Vec::new(),
9343            checksum: None,
9344        }
9345    };
9346    let db = FileObjectDatabase::from_git_dir(git_dir, format);
9347    let source_entries = tree_entries(&db, format, tree_oid)?;
9348    restore_worktree_paths_from_entries(worktree_root, &db, index, &source_entries, paths)
9349}
9350
9351fn restore_worktree_paths_from_entries(
9352    worktree_root: &Path,
9353    db: &FileObjectDatabase,
9354    index: Index,
9355    source_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
9356    paths: &[PathBuf],
9357) -> Result<RestoreResult> {
9358    let index_entries = index
9359        .entries
9360        .into_iter()
9361        .map(|entry| entry.path.into_bytes())
9362        .collect::<BTreeSet<_>>();
9363    let mut restored = BTreeSet::new();
9364    for path in paths {
9365        let absolute = if path.is_absolute() {
9366            path.clone()
9367        } else {
9368            worktree_root.join(path)
9369        };
9370        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
9371            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
9372        })?;
9373        let git_path = git_path_bytes(relative)?;
9374        let recursive = path == Path::new(".")
9375            || path.to_string_lossy().ends_with('/')
9376            || absolute.is_dir()
9377            || index_entries
9378                .iter()
9379                .any(|entry| index_entry_is_under_path(entry, &git_path))
9380            || source_entries
9381                .keys()
9382                .any(|entry| index_entry_is_under_path(entry, &git_path));
9383        let mut matched_paths = BTreeSet::new();
9384        for path in index_entries.iter().chain(source_entries.keys()) {
9385            if path.as_slice() == git_path.as_slice()
9386                || (recursive && index_entry_is_under_path(path, &git_path))
9387            {
9388                matched_paths.insert(path.clone());
9389            }
9390        }
9391        if matched_paths.is_empty() {
9392            eprintln!(
9393                "error: pathspec '{}' did not match any file(s) known to git",
9394                path.display()
9395            );
9396            return Err(GitError::Exit(1));
9397        }
9398        for path in matched_paths {
9399            if let Some(entry) = source_entries.get(&path) {
9400                restore_head_entry_to_worktree(worktree_root, db, &path, entry)?;
9401            } else {
9402                remove_worktree_file(worktree_root, &path)?;
9403            }
9404            restored.insert(path);
9405        }
9406    }
9407    Ok(RestoreResult {
9408        restored: restored.len(),
9409    })
9410}
9411
9412pub fn remove_index_and_worktree_paths(
9413    worktree_root: impl AsRef<Path>,
9414    git_dir: impl AsRef<Path>,
9415    format: ObjectFormat,
9416    paths: &[PathBuf],
9417    options: RemoveOptions,
9418    config_parameters_env: Option<&str>,
9419) -> Result<RemoveResult> {
9420    let worktree_root = worktree_root.as_ref();
9421    let git_dir = git_dir.as_ref();
9422    let index_path = repository_index_path(git_dir);
9423    let index = if index_path.exists() {
9424        Index::parse(&fs::read(&index_path)?, format)?
9425    } else {
9426        Index {
9427            version: 2,
9428            entries: Vec::new(),
9429            extensions: Vec::new(),
9430            checksum: None,
9431        }
9432    };
9433    let db = FileObjectDatabase::from_git_dir(git_dir, format);
9434    let head_entries = head_tree_entries(git_dir, format, &db)?;
9435    // Stat cache for the local-modification check (git's `ie_match_stat`):
9436    // proves a path unchanged from the cached stat without reading its blob, so
9437    // a `git rm --cached` of an untouched path whose blob was removed still
9438    // succeeds (cf. t1450-fsck cell 90). (`sley_index::IndexStatCache` is a
9439    // distinct type from this crate's same-named probe helper above.)
9440    let rm_stat_cache = sley_index::IndexStatCache::from_index(&index, &index_path);
9441    let Index {
9442        version: index_version,
9443        entries: index_entry_list,
9444        extensions: index_extensions,
9445        ..
9446    } = index;
9447    // The set of distinct index paths (any stage) — used for membership tests.
9448    let index_paths: BTreeSet<Vec<u8>> = index_entry_list
9449        .iter()
9450        .map(|entry| entry.path.as_bytes().to_vec())
9451        .collect();
9452    // Paths selected for removal. A single selected path removes ALL of its
9453    // stage entries (so resolving an unmerged path by removal drops stages
9454    // 1/2/3 together), matching git's name-keyed removal.
9455    let mut selected = BTreeSet::new();
9456    for path in paths {
9457        let absolute = if path.is_absolute() {
9458            path.clone()
9459        } else {
9460            worktree_root.join(path)
9461        };
9462        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
9463            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
9464        })?;
9465        // A pathspec with a trailing slash (e.g. `git rm dir/`) only matches a
9466        // directory: it must never match a same-named tracked file. `Path`'s
9467        // component iterator drops the slash, so capture it before it is lost.
9468        let has_trailing_slash = path_has_trailing_separator(&absolute);
9469        let git_path = git_path_bytes(relative)?;
9470        if !has_trailing_slash && index_paths.contains(&git_path) {
9471            selected.insert(git_path);
9472            continue;
9473        }
9474        // A wildcard pathspec (e.g. `git rm "*"` or `git rm "dir/*.c"`) matches
9475        // index entries by git's pathspec matcher rather than by literal path or
9476        // directory prefix. Try the glob match first when the spec contains
9477        // wildcard metacharacters; a glob match removes the entries directly
9478        // (no `-r` needed — the pathspec already names the files).
9479        if pathspec_is_glob(&git_path) {
9480            let glob_matched = index_paths
9481                .iter()
9482                .filter(|entry| {
9483                    pathspec_item_matches(&git_path, entry, PathspecMatchMagic::default())
9484                })
9485                .cloned()
9486                .collect::<Vec<_>>();
9487            if !glob_matched.is_empty() {
9488                selected.extend(glob_matched);
9489                continue;
9490            }
9491            if options.ignore_unmatch {
9492                continue;
9493            }
9494            eprintln!(
9495                "fatal: pathspec '{}' did not match any files",
9496                String::from_utf8_lossy(&git_path)
9497            );
9498            return Err(GitError::Exit(128));
9499        }
9500        let matched = index_paths
9501            .iter()
9502            .filter(|entry| index_entry_is_under_path(entry, &git_path))
9503            .cloned()
9504            .collect::<Vec<_>>();
9505        if matched.is_empty() {
9506            if options.ignore_unmatch {
9507                continue;
9508            }
9509            eprintln!(
9510                "fatal: pathspec '{}' did not match any files",
9511                String::from_utf8_lossy(&git_path)
9512            );
9513            return Err(GitError::Exit(128));
9514        }
9515        if !options.recursive {
9516            eprintln!(
9517                "fatal: not removing '{}' recursively without -r",
9518                String::from_utf8_lossy(&git_path)
9519            );
9520            return Err(GitError::Exit(128));
9521        }
9522        selected.extend(matched);
9523    }
9524
9525    // `git rm` runs the local-modification safety check unless `-f` is given —
9526    // even for `--cached`. The check (a faithful port of builtin/rm.c's
9527    // `check_local_mod`) buckets each selected path into one of three error
9528    // classes and prints all of them at once (collected, not fail-fast), so a
9529    // single `git rm a b c` reports every offending path. See the message
9530    // assertions in t3600-rm.sh.
9531    if !options.force {
9532        let config =
9533            sley_config::read_repo_config(git_dir, config_parameters_env).unwrap_or_default();
9534        // advice.rmhints (default true) gates the parenthetical "(use ...)" hints.
9535        let show_hints = config
9536            .get_bool("advice", None, "rmhints")
9537            .unwrap_or(true);
9538        // Map each selected path to its stage-0 index entry for the check; an
9539        // unmerged path (no stage 0) is skipped, exactly like git's loop
9540        // (index_name_pos fails, and a non-gitlink ours entry `continue`s).
9541        let stage0: BTreeMap<&[u8], &IndexEntry> = index_entry_list
9542            .iter()
9543            .filter(|entry| entry.stage() == Stage::Normal)
9544            .map(|entry| (entry.path.as_bytes(), entry))
9545            .collect();
9546        let mut files_staged: Vec<&[u8]> = Vec::new();
9547        let mut files_cached: Vec<&[u8]> = Vec::new();
9548        let mut files_local: Vec<&[u8]> = Vec::new();
9549        for path in &selected {
9550            let Some(index_entry) = stage0.get(path.as_slice()) else {
9551                // Unmerged path with no stage-0 entry: resolving by removal is
9552                // safe and not warning-worthy.
9553                continue;
9554            };
9555            let worktree_file = worktree_path(worktree_root, path)?;
9556            // Is the worktree path different from the index?
9557            //
9558            // Mirror builtin/rm.c's `check_local_mod`: when `lstat` fails with a
9559            // "missing file" error (ENOENT *or* ENOTDIR — the path vanished, or a
9560            // leading component became a file) the file has already gone from the
9561            // working tree, so git `continue`s and never buckets the path. Same
9562            // for a tracked plain path that is now a directory on disk: git
9563            // treats that as ENOENT and skips it (the later worktree-removal step
9564            // is what fails on a non-empty directory).
9565            let local_changes = match fs::symlink_metadata(&worktree_file) {
9566                Err(err)
9567                    if matches!(
9568                        err.kind(),
9569                        std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
9570                    ) || err.raw_os_error() == Some(20) =>
9571                {
9572                    // ENOENT/ENOTDIR: already gone — not warning-worthy.
9573                    continue;
9574                }
9575                Err(err) => return Err(err.into()),
9576                Ok(meta) if meta.is_dir() => continue,
9577                Ok(meta) => {
9578                    // git refreshes the index before `check_local_mod`, so a path
9579                    // whose stat changed but whose content is unchanged is up to
9580                    // date. We mirror that: a clean cached stat short-circuits to
9581                    // "unchanged"; otherwise re-hash the (clean-filtered) worktree
9582                    // content and compare to the index entry's *cached oid* (git's
9583                    // refresh `hash_object`), NOT the stored blob. Comparing to the
9584                    // oid — not the blob bytes — means a removed object does not
9585                    // abort the check (the worktree may still hash to the cached
9586                    // oid), so `git rm --cached` of a path whose blob was deleted
9587                    // still succeeds.
9588                    match rm_stat_cache.index_entry_worktree_stat_verdict(index_entry, &meta) {
9589                        sley_index::StatVerdict::Clean => false,
9590                        sley_index::StatVerdict::Dirty
9591                        | sley_index::StatVerdict::RacyNeedsContentCheck => {
9592                            let worktree_bytes = apply_clean_filter(
9593                                worktree_root,
9594                                git_dir,
9595                                &config,
9596                                path,
9597                                &fs::read(&worktree_file)?,
9598                            )?;
9599                            let worktree_oid =
9600                                EncodedObject::new(ObjectType::Blob, worktree_bytes)
9601                                    .object_id(format)?;
9602                            worktree_oid != index_entry.oid
9603                        }
9604                    }
9605                }
9606            };
9607            // Is the index different from the HEAD commit? (Before the first
9608            // commit, anything staged is treated as changed from HEAD.)
9609            let staged_changes = match head_entries.get(path) {
9610                Some(head_entry) => {
9611                    head_entry.oid != index_entry.oid || head_entry.mode != index_entry.mode
9612                }
9613                None => true,
9614            };
9615            if local_changes && staged_changes {
9616                // `git rm --cached` of an intent-to-add entry is safe.
9617                if !options.cached || !index_entry.is_intent_to_add() {
9618                    files_staged.push(path);
9619                }
9620            } else if !options.cached {
9621                if staged_changes {
9622                    files_cached.push(path);
9623                }
9624                if local_changes {
9625                    files_local.push(path);
9626                }
9627            }
9628        }
9629        let mut errs = false;
9630        print_rm_error_files(
9631            &files_staged,
9632            "the following file has staged content different from both the\nfile and the HEAD:",
9633            "the following files have staged content different from both the\nfile and the HEAD:",
9634            "\n(use -f to force removal)",
9635            show_hints,
9636            &mut errs,
9637        );
9638        print_rm_error_files(
9639            &files_cached,
9640            "the following file has changes staged in the index:",
9641            "the following files have changes staged in the index:",
9642            "\n(use --cached to keep the file, or -f to force removal)",
9643            show_hints,
9644            &mut errs,
9645        );
9646        print_rm_error_files(
9647            &files_local,
9648            "the following file has local modifications:",
9649            "the following files have local modifications:",
9650            "\n(use --cached to keep the file, or -f to force removal)",
9651            show_hints,
9652            &mut errs,
9653        );
9654        if errs {
9655            return Err(GitError::Exit(1));
9656        }
9657    }
9658
9659    if options.dry_run {
9660        return Ok(RemoveResult {
9661            removed: selected.into_iter().collect(),
9662        });
9663    }
9664    // Mirror builtin/rm.c's ordering: remove the worktree files BEFORE writing
9665    // the new index. If the very first removal fails (and nothing has been
9666    // removed yet), abort without committing the index, so a `git rm d` where
9667    // `d` is now a non-empty directory fails AND leaves the index untouched.
9668    // Once any file has been removed we commit to finishing (git does the same).
9669    if !options.cached {
9670        let mut removed_any = false;
9671        for path in &selected {
9672            match remove_tracked_worktree_path(worktree_root, path)? {
9673                true => removed_any = true,
9674                false if !removed_any => {
9675                    eprintln!(
9676                        "fatal: git rm: '{}': Is a directory",
9677                        String::from_utf8_lossy(path)
9678                    );
9679                    return Err(GitError::Exit(128));
9680                }
9681                false => {}
9682            }
9683        }
9684    }
9685    // Keep every entry whose path was not selected, preserving original order
9686    // and all stages of unmerged paths that were not removed.
9687    let entries = index_entry_list
9688        .into_iter()
9689        .filter(|entry| !selected.contains(entry.path.as_bytes()))
9690        .collect::<Vec<_>>();
9691    // Removing entries invalidates the cache-tree (`TREE` extension): a stale
9692    // cached subtree id makes `git diff --cached`/`git status` short-circuit the
9693    // comparison of an affected directory against HEAD and miss the deletion
9694    // (observed: `git rm dir/nested.txt` left a valid `dir/` cache-tree, so the
9695    // deletion never showed in the cached diff). Git invalidates the cache-tree
9696    // on any index mutation; drop it so it is rebuilt on the next write, exactly
9697    // like the `add` path does above.
9698    let extensions = index_extensions_without_cache_tree(&index_extensions);
9699    fs::write(
9700        index_path,
9701        Index {
9702            version: index_version,
9703            entries,
9704            extensions,
9705            checksum: None,
9706        }
9707        .write(format)?,
9708    )?;
9709    Ok(RemoveResult {
9710        removed: selected.into_iter().collect(),
9711    })
9712}
9713
9714/// Remove a tracked path from the working tree, mirroring builtin/rm.c's
9715/// `remove_path`: unlink the file and prune now-empty parent directories.
9716/// Returns `Ok(true)` when a file was removed, `Ok(false)` when the path could
9717/// not be unlinked because it is a directory (the caller decides whether that
9718/// aborts the run). A path that has already vanished is a no-op success.
9719fn remove_tracked_worktree_path(root: &Path, path: &[u8]) -> Result<bool> {
9720    let file = worktree_path(root, path)?;
9721    match fs::symlink_metadata(&file) {
9722        Err(err)
9723            if matches!(
9724                err.kind(),
9725                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
9726            ) =>
9727        {
9728            return Ok(true);
9729        }
9730        Err(err) if err.raw_os_error() == Some(20) => return Ok(true), // ENOTDIR
9731        Err(err) => return Err(err.into()),
9732        // A directory in the worktree where a plain file is tracked cannot be
9733        // unlinked (git's remove_path fails on EISDIR). Report it so the caller
9734        // can abort the removal without committing the index.
9735        Ok(meta) if meta.is_dir() => return Ok(false),
9736        Ok(_) => {}
9737    }
9738    fs::remove_file(&file)?;
9739    prune_empty_parents(root, file.parent())?;
9740    Ok(true)
9741}
9742
9743/// Print one batched `git rm` safety error block (mirrors builtin/rm.c's
9744/// `print_error_files`): the main message, the indented list of offending
9745/// paths, and — when `advice.rmhints` is enabled — the trailing hint. Sets
9746/// `*errs` so the caller can fail after collecting every class.
9747fn print_rm_error_files(
9748    files: &[&[u8]],
9749    singular: &str,
9750    plural: &str,
9751    hint: &str,
9752    show_hints: bool,
9753    errs: &mut bool,
9754) {
9755    if files.is_empty() {
9756        return;
9757    }
9758    let mut message = String::from(if files.len() == 1 { singular } else { plural });
9759    for path in files {
9760        message.push_str("\n    ");
9761        message.push_str(&String::from_utf8_lossy(path));
9762    }
9763    if show_hints {
9764        message.push_str(hint);
9765    }
9766    eprintln!("error: {message}");
9767    *errs = true;
9768}
9769
9770pub fn move_index_and_worktree_path(
9771    worktree_root: impl AsRef<Path>,
9772    git_dir: impl AsRef<Path>,
9773    format: ObjectFormat,
9774    source: &Path,
9775    destination: &Path,
9776    options: MoveOptions,
9777) -> Result<MoveResult> {
9778    let worktree_root = worktree_root.as_ref();
9779    let git_dir = git_dir.as_ref();
9780    let index_path = repository_index_path(git_dir);
9781    let mut index = if index_path.exists() {
9782        Index::parse(&fs::read(&index_path)?, format)?
9783    } else {
9784        Index {
9785            version: 2,
9786            entries: Vec::new(),
9787            extensions: Vec::new(),
9788            checksum: None,
9789        }
9790    };
9791    let source_absolute = if source.is_absolute() {
9792        source.to_path_buf()
9793    } else {
9794        worktree_root.join(source)
9795    };
9796    let destination_absolute = if destination.is_absolute() {
9797        destination.to_path_buf()
9798    } else {
9799        worktree_root.join(destination)
9800    };
9801    let destination_absolute = if destination_absolute.is_dir() {
9802        let Some(file_name) = source_absolute.file_name() else {
9803            return Err(GitError::InvalidPath(format!(
9804                "invalid source path {}",
9805                source.display()
9806            )));
9807        };
9808        destination_absolute.join(file_name)
9809    } else {
9810        destination_absolute
9811    };
9812    let source_relative = source_absolute.strip_prefix(worktree_root).map_err(|_| {
9813        GitError::InvalidPath(format!("path {} is outside worktree", source.display()))
9814    })?;
9815    let destination_relative = destination_absolute
9816        .strip_prefix(worktree_root)
9817        .map_err(|_| {
9818            GitError::InvalidPath(format!(
9819                "path {} is outside worktree",
9820                destination.display()
9821            ))
9822        })?;
9823    let source_path = git_path_bytes(source_relative)?;
9824    let destination_path = git_path_bytes(destination_relative)?;
9825    let destination_has_trailing_separator = path_has_trailing_separator(&destination_absolute);
9826    if destination_has_trailing_separator && !destination_absolute.is_dir() {
9827        if options.skip_errors {
9828            return Ok(MoveResult {
9829                source: source_path,
9830                destination: destination_path,
9831                skipped: true,
9832                fatal: None,
9833                details: Vec::new(),
9834            });
9835        }
9836        let mut destination = String::from_utf8_lossy(&destination_path).into_owned();
9837        destination.push('/');
9838        if options.dry_run {
9839            let fatal = format!(
9840                "fatal: destination directory does not exist, source={}, destination={destination}",
9841                String::from_utf8_lossy(&source_path),
9842            );
9843            return Ok(MoveResult {
9844                source: source_path,
9845                destination: destination.clone().into_bytes(),
9846                skipped: false,
9847                fatal: Some(fatal),
9848                details: Vec::new(),
9849            });
9850        }
9851        eprintln!(
9852            "fatal: destination directory does not exist, source={}, destination={destination}",
9853            String::from_utf8_lossy(&source_path),
9854        );
9855        return Err(GitError::Exit(128));
9856    }
9857    if destination_absolute.exists() {
9858        if !options.force {
9859            if options.skip_errors {
9860                return Ok(MoveResult {
9861                    source: source_path,
9862                    destination: destination_path,
9863                    skipped: true,
9864                    fatal: None,
9865                    details: Vec::new(),
9866                });
9867            }
9868            if options.dry_run {
9869                let fatal = format!(
9870                    "fatal: destination exists, source={}, destination={}",
9871                    String::from_utf8_lossy(&source_path),
9872                    String::from_utf8_lossy(&destination_path)
9873                );
9874                return Ok(MoveResult {
9875                    source: source_path,
9876                    destination: destination_path,
9877                    skipped: false,
9878                    fatal: Some(fatal),
9879                    details: Vec::new(),
9880                });
9881            }
9882            eprintln!(
9883                "fatal: destination exists, source={}, destination={}",
9884                String::from_utf8_lossy(&source_path),
9885                String::from_utf8_lossy(&destination_path)
9886            );
9887            return Err(GitError::Exit(128));
9888        }
9889        if !options.dry_run && destination_absolute.is_dir() {
9890            fs::remove_dir_all(&destination_absolute)?;
9891        } else if !options.dry_run {
9892            fs::remove_file(&destination_absolute)?;
9893        }
9894    }
9895    let directory_prefix = {
9896        let mut prefix = source_path.clone();
9897        prefix.push(b'/');
9898        prefix
9899    };
9900    let directory_entries: Vec<_> = index
9901        .entries
9902        .iter()
9903        .filter(|entry| entry.path.as_bytes().starts_with(&directory_prefix))
9904        .cloned()
9905        .collect();
9906    if !directory_entries.is_empty() {
9907        let details: Vec<_> = directory_entries
9908            .iter()
9909            .map(|entry| {
9910                let suffix = &entry.path.as_bytes()[source_path.len()..];
9911                let mut destination = destination_path.clone();
9912                destination.extend_from_slice(suffix);
9913                MoveDetail {
9914                    source: entry.path.as_bytes().to_vec(),
9915                    destination,
9916                    skipped: false,
9917                }
9918            })
9919            .collect();
9920        if options.dry_run {
9921            return Ok(MoveResult {
9922                source: source_path,
9923                destination: destination_path,
9924                skipped: false,
9925                fatal: None,
9926                details,
9927            });
9928        }
9929        fs::rename(&source_absolute, &destination_absolute)?;
9930        let moved_paths: Vec<_> = details
9931            .iter()
9932            .map(|detail| detail.destination.clone())
9933            .collect();
9934        index.entries.retain(|entry| {
9935            !entry.path.as_bytes().starts_with(&directory_prefix)
9936                && !moved_paths
9937                    .iter()
9938                    .any(|m| m.as_slice() == entry.path.as_bytes())
9939        });
9940        for (source_entry, detail) in directory_entries.into_iter().zip(details.iter()) {
9941            let relative_path = git_path_to_relative_path(&detail.destination)?;
9942            let metadata = fs::metadata(worktree_root.join(relative_path))?;
9943            let mut destination_entry =
9944                index_entry_from_metadata(detail.destination.clone(), source_entry.oid, &metadata);
9945            destination_entry.mode = source_entry.mode;
9946            index.entries.push(destination_entry);
9947        }
9948        index
9949            .entries
9950            .sort_by(|left, right| left.path.cmp(&right.path));
9951        index.extensions.clear();
9952        fs::write(index_path, index.write(format)?)?;
9953        return Ok(MoveResult {
9954            source: source_path,
9955            destination: destination_path,
9956            skipped: false,
9957            fatal: None,
9958            details,
9959        });
9960    }
9961
9962    let Some(position) = index
9963        .entries
9964        .iter()
9965        .position(|entry| entry.path == source_path)
9966    else {
9967        if options.skip_errors {
9968            return Ok(MoveResult {
9969                source: source_path,
9970                destination: destination_path,
9971                skipped: true,
9972                fatal: None,
9973                details: Vec::new(),
9974            });
9975        }
9976        let source_kind = if source_absolute.exists() {
9977            "not under version control"
9978        } else {
9979            "bad source"
9980        };
9981        if options.dry_run {
9982            let fatal = format!(
9983                "fatal: {source_kind}, source={}, destination={}",
9984                String::from_utf8_lossy(&source_path),
9985                String::from_utf8_lossy(&destination_path)
9986            );
9987            return Ok(MoveResult {
9988                source: source_path,
9989                destination: destination_path,
9990                skipped: false,
9991                fatal: Some(fatal),
9992                details: Vec::new(),
9993            });
9994        }
9995        eprintln!(
9996            "fatal: {source_kind}, source={}, destination={}",
9997            String::from_utf8_lossy(&source_path),
9998            String::from_utf8_lossy(&destination_path)
9999        );
10000        return Err(GitError::Exit(128));
10001    };
10002    if options.dry_run {
10003        return Ok(MoveResult {
10004            source: source_path,
10005            destination: destination_path,
10006            skipped: false,
10007            fatal: None,
10008            details: Vec::new(),
10009        });
10010    }
10011    if let Some(parent) = destination_absolute.parent()
10012        && !parent.exists()
10013    {
10014        if options.skip_errors {
10015            return Ok(MoveResult {
10016                source: source_path,
10017                destination: destination_path,
10018                skipped: true,
10019                fatal: None,
10020                details: Vec::new(),
10021            });
10022        }
10023        eprintln!(
10024            "fatal: renaming '{}' failed: No such file or directory",
10025            String::from_utf8_lossy(&source_path)
10026        );
10027        return Err(GitError::Exit(128));
10028    }
10029    fs::rename(&source_absolute, &destination_absolute)?;
10030    let metadata = fs::metadata(&destination_absolute)?;
10031    let source_entry = index.entries.remove(position);
10032    let mut destination_entry =
10033        index_entry_from_metadata(destination_path.clone(), source_entry.oid, &metadata);
10034    destination_entry.mode = source_entry.mode;
10035    index.entries.retain(|entry| entry.path != destination_path);
10036    index.entries.push(destination_entry);
10037    index
10038        .entries
10039        .sort_by(|left, right| left.path.cmp(&right.path));
10040    index.extensions.clear();
10041    fs::write(index_path, index.write(format)?)?;
10042    Ok(MoveResult {
10043        source: source_path,
10044        destination: destination_path,
10045        skipped: false,
10046        fatal: None,
10047        details: Vec::new(),
10048    })
10049}
10050
10051fn restore_index_entry(
10052    worktree_root: &Path,
10053    git_dir: &Path,
10054    format: ObjectFormat,
10055    db: &FileObjectDatabase,
10056    entry: &IndexEntry,
10057    smudge_config: Option<&GitConfig>,
10058) -> Result<()> {
10059    let object = read_expected_object(db, &entry.oid, ObjectType::Blob)?;
10060    let body: Cow<'_, [u8]> = match smudge_config {
10061        Some(config) => {
10062            let checks = smudge_attribute_checks_from_index(
10063                worktree_root,
10064                git_dir,
10065                format,
10066                entry.path.as_bytes(),
10067            )?;
10068            apply_smudge_filter_with_attributes_cow(
10069                config,
10070                &checks,
10071                entry.path.as_bytes(),
10072                &object.body,
10073            )?
10074        }
10075        None => Cow::Borrowed(&object.body),
10076    };
10077    let file_path = worktree_path(worktree_root, entry.path.as_bytes())?;
10078    if let Some(parent) = file_path.parent() {
10079        fs::create_dir_all(parent)?;
10080    }
10081    fs::write(&file_path, &body)?;
10082    set_worktree_file_mode(&file_path, entry.mode)?;
10083    Ok(())
10084}
10085
10086fn restored_head_index_entry(
10087    worktree_root: &Path,
10088    db: &FileObjectDatabase,
10089    path: &[u8],
10090    entry: &TrackedEntry,
10091) -> Result<IndexEntry> {
10092    let file_path = worktree_path(worktree_root, path)?;
10093    // This restores the index from a tree (reset --mixed / stash / sparse) WITHOUT
10094    // rewriting the worktree file, so the file on disk may hold different content
10095    // than `entry.oid`. Crucially we must NOT copy the worktree file's stat onto
10096    // this entry: that would make the cached stat match a file whose real content
10097    // hashes to a DIFFERENT oid, breaking git's "stat-match implies oid-match"
10098    // invariant that the status stat-cache relies on. Leave the stat zeroed so
10099    // status always re-hashes this path and detects any modification -- exactly
10100    // git's behavior for tree-sourced entries until a later refresh validates them.
10101    let size = if entry.mode == 0o160000 {
10102        // A gitlink's oid names a commit in the submodule's repository — it is
10103        // not readable here, and a tree-sourced gitlink entry carries size 0.
10104        0
10105    } else {
10106        match fs::metadata(&file_path) {
10107            Ok(metadata) => metadata.len().min(u32::MAX as u64) as u32,
10108            Err(_) => {
10109                let object = read_expected_object(db, &entry.oid, ObjectType::Blob)?;
10110                object.body.len().min(u32::MAX as usize) as u32
10111            }
10112        }
10113    };
10114    Ok(IndexEntry {
10115        ctime_seconds: 0,
10116        ctime_nanoseconds: 0,
10117        mtime_seconds: 0,
10118        mtime_nanoseconds: 0,
10119        dev: 0,
10120        ino: 0,
10121        mode: entry.mode,
10122        uid: 0,
10123        gid: 0,
10124        size,
10125        oid: entry.oid,
10126        flags: path.len().min(0x0fff) as u16,
10127        flags_extended: 0,
10128        path: BString::from(path),
10129    })
10130}
10131
10132fn restore_head_entry_to_worktree(
10133    worktree_root: &Path,
10134    db: &FileObjectDatabase,
10135    path: &[u8],
10136    entry: &TrackedEntry,
10137) -> Result<()> {
10138    write_worktree_blob_entry(db, worktree_root, path, entry)?;
10139    Ok(())
10140}
10141
10142fn restore_head_entry_to_worktree_and_index(
10143    worktree_root: &Path,
10144    db: &FileObjectDatabase,
10145    path: &[u8],
10146    entry: &TrackedEntry,
10147) -> Result<IndexEntry> {
10148    let file_path = write_worktree_blob_entry(db, worktree_root, path, entry)?;
10149    let metadata = fs::symlink_metadata(&file_path)?;
10150    let mut index_entry = index_entry_from_metadata(path.to_vec(), entry.oid, &metadata);
10151    index_entry.mode = entry.mode;
10152    Ok(index_entry)
10153}
10154
10155fn index_has_entry_under(entries: &[IndexEntry], directory: &[u8]) -> bool {
10156    entries
10157        .iter()
10158        .any(|entry| index_entry_is_under_path(entry.path.as_bytes(), directory))
10159}
10160
10161fn index_entry_is_under_path(entry_path: &[u8], directory: &[u8]) -> bool {
10162    if directory.is_empty() {
10163        return true;
10164    }
10165    entry_path
10166        .strip_prefix(directory)
10167        .and_then(|rest| rest.strip_prefix(b"/"))
10168        .is_some()
10169}
10170
10171fn index_entry_from_metadata(
10172    path: impl Into<BString>,
10173    oid: ObjectId,
10174    metadata: &fs::Metadata,
10175) -> IndexEntry {
10176    let modified = metadata.modified().ok();
10177    let duration = modified
10178        .and_then(|time| time.duration_since(UNIX_EPOCH).ok())
10179        .unwrap_or_default();
10180    let mode = file_mode(metadata);
10181    let path = path.into();
10182    let flags = path.len().min(0x0fff) as u16;
10183    IndexEntry {
10184        ctime_seconds: duration.as_secs().min(u32::MAX as u64) as u32,
10185        ctime_nanoseconds: duration.subsec_nanos(),
10186        mtime_seconds: duration.as_secs().min(u32::MAX as u64) as u32,
10187        mtime_nanoseconds: duration.subsec_nanos(),
10188        dev: 0,
10189        ino: 0,
10190        mode,
10191        uid: 0,
10192        gid: 0,
10193        size: metadata.len().min(u32::MAX as u64) as u32,
10194        oid,
10195        flags,
10196        flags_extended: 0,
10197        path,
10198    }
10199}
10200
10201fn read_expected_object(
10202    db: &FileObjectDatabase,
10203    oid: &ObjectId,
10204    expected: ObjectType,
10205) -> Result<std::sync::Arc<EncodedObject>> {
10206    let object = db
10207        .read_object(oid)
10208        .map_err(|err| expect_missing_object_kind(err, *oid, missing_kind_for_type(expected)))?;
10209    if object.object_type != expected {
10210        return Err(GitError::InvalidObject(format!(
10211            "expected {} {}, found {}",
10212            expected.as_str(),
10213            oid,
10214            object.object_type.as_str()
10215        )));
10216    }
10217    Ok(object)
10218}
10219
10220fn expect_missing_object_kind(
10221    err: GitError,
10222    oid: ObjectId,
10223    expected: MissingObjectKind,
10224) -> GitError {
10225    match err.not_found_kind() {
10226        Some(sley_core::NotFoundKind::Object { .. }) => GitError::object_kind_not_found_in(
10227            oid,
10228            expected,
10229            MissingObjectContext::WorktreeMaterialize,
10230        ),
10231        _ => err,
10232    }
10233}
10234
10235fn missing_kind_for_type(object_type: ObjectType) -> MissingObjectKind {
10236    match object_type {
10237        ObjectType::Blob => MissingObjectKind::Blob,
10238        ObjectType::Tree => MissingObjectKind::Tree,
10239        ObjectType::Commit => MissingObjectKind::Commit,
10240        ObjectType::Tag => MissingObjectKind::Tag,
10241    }
10242}
10243
10244fn read_commit(db: &FileObjectDatabase, format: ObjectFormat, oid: &ObjectId) -> Result<Commit> {
10245    let object = read_expected_object(db, oid, ObjectType::Commit)?;
10246    Commit::parse(format, &object.body)
10247}
10248
10249#[derive(Debug, Clone, PartialEq, Eq)]
10250struct TrackedEntry {
10251    mode: u32,
10252    oid: ObjectId,
10253}
10254
10255/// git's racy-git stat cache: the stage-0 index entries keyed by path (so the
10256/// worktree walk can reuse a cached oid when a file's stat shows it is unchanged
10257/// since it was staged) plus the index *file's* own mtime, which git uses as the
10258/// racy-clean reference timestamp.
10259///
10260/// SAFETY INVARIANT: trusting a cached oid by stat alone is only sound because
10261/// every code path that stamps a worktree stat onto an index entry also hashed
10262/// that exact file content (see `index_entry_from_metadata`), while tree-sourced
10263/// restores (reset --mixed / stash / sparse) leave the stat zeroed
10264/// (`restored_head_index_entry`). So a non-zero, non-racy stat match implies the
10265/// cached oid is the file's true content. When that does not hold we fall through
10266/// to a full read+filter+hash, so a modified file is never reported clean.
10267#[derive(Debug, Clone, Default)]
10268struct IndexStatCache {
10269    entries: HashMap<Vec<u8>, IndexEntry>,
10270    /// The index file's modification time as `(seconds, nanoseconds)`, or `None`
10271    /// when it could not be determined. Used as git's racy-clean reference.
10272    index_mtime: Option<(u64, u64)>,
10273}
10274
10275impl IndexStatCache {
10276    /// Builds the cache from an already-parsed index plus the path of the index
10277    /// file on disk (whose mtime becomes the racy-clean reference). Only stage-0
10278    /// entries are retained; higher merge stages never describe a worktree file.
10279    fn from_index(index: &Index, index_path: &Path) -> Self {
10280        let index_mtime = fs::metadata(index_path)
10281            .ok()
10282            .and_then(|metadata| file_mtime_parts(&metadata));
10283        Self::from_index_mtime(index, index_mtime)
10284    }
10285
10286    fn from_index_mtime(index: &Index, index_mtime: Option<(u64, u64)>) -> Self {
10287        IndexStatCache {
10288            entries: stage0_index_entries(index),
10289            index_mtime,
10290        }
10291    }
10292
10293    fn from_index_mtime_only(index_mtime: Option<(u64, u64)>) -> Self {
10294        IndexStatCache {
10295            entries: HashMap::new(),
10296            index_mtime,
10297        }
10298    }
10299
10300    /// Whether `entry` is "racily clean" in git's sense: its cached mtime is not
10301    /// strictly older than the index file's mtime, so a same-timestamp write
10302    /// could have changed the content without moving the stat. Such entries must
10303    /// always be re-hashed.
10304    ///
10305    /// Conservative by construction: if the index mtime is unknown, or either
10306    /// side's mtime is zero (e.g. a tree-sourced entry whose stat was left
10307    /// zeroed), this returns `true` so the caller re-hashes rather than trusting
10308    /// a stat we cannot prove safe.
10309    fn is_racily_clean(&self, entry: &IndexEntry) -> bool {
10310        let Some(index_mtime) = self.index_mtime else {
10311            return true;
10312        };
10313        if index_mtime == (0, 0) {
10314            return true;
10315        }
10316        let entry_mtime = (
10317            u64::from(entry.mtime_seconds),
10318            u64::from(entry.mtime_nanoseconds),
10319        );
10320        if entry_mtime == (0, 0) {
10321            return true;
10322        }
10323        // Racy unless the index was written strictly after the entry's mtime.
10324        index_mtime <= entry_mtime
10325    }
10326
10327    fn is_racily_clean_ref(&self, entry: &IndexEntryRef<'_>) -> bool {
10328        let Some(index_mtime) = self.index_mtime else {
10329            return true;
10330        };
10331        if index_mtime == (0, 0) {
10332            return true;
10333        }
10334        let entry_mtime = (
10335            u64::from(entry.mtime_seconds),
10336            u64::from(entry.mtime_nanoseconds),
10337        );
10338        if entry_mtime == (0, 0) {
10339            return true;
10340        }
10341        index_mtime <= entry_mtime
10342    }
10343
10344    /// Whether the index has a stage-0 entry for `git_path` (i.e. the path is
10345    /// tracked). Used to skip hashing untracked worktree files.
10346    fn contains(&self, git_path: &[u8]) -> bool {
10347        self.entries.contains_key(git_path)
10348    }
10349
10350    fn tracked_entry(&self, git_path: &[u8]) -> Option<TrackedEntry> {
10351        self.entries.get(git_path).map(|entry| TrackedEntry {
10352            mode: entry.mode,
10353            oid: entry.oid,
10354        })
10355    }
10356
10357    /// Returns the cached [`TrackedEntry`] for `git_path` (reusing its stored
10358    /// oid, so the caller can SKIP reading, filtering, and hashing the file) only
10359    /// when the worktree file is provably unchanged since it was staged: a
10360    /// stage-0 entry exists, its recorded mode matches the file's current mode
10361    /// (catching pure `chmod`s that do not move mtime), the size+mtime stat
10362    /// check passes, and the entry is not racily clean. Otherwise returns `None`
10363    /// and the caller hashes the file as usual.
10364    fn reuse_tracked_entry(
10365        &self,
10366        git_path: &[u8],
10367        worktree_metadata: &fs::Metadata,
10368    ) -> Option<TrackedEntry> {
10369        let entry = self.entries.get(git_path)?;
10370        self.reuse_index_entry(entry, worktree_metadata)
10371    }
10372
10373    fn reuse_index_entry(
10374        &self,
10375        entry: &IndexEntry,
10376        worktree_metadata: &fs::Metadata,
10377    ) -> Option<TrackedEntry> {
10378        if entry.mode != worktree_entry_mode(worktree_metadata) {
10379            return None;
10380        }
10381        if !worktree_entry_is_uptodate(entry, worktree_metadata) {
10382            return None;
10383        }
10384        if self.is_racily_clean(entry) {
10385            return None;
10386        }
10387        Some(TrackedEntry {
10388            mode: entry.mode,
10389            oid: entry.oid,
10390        })
10391    }
10392
10393    fn reuse_index_entry_ref(
10394        &self,
10395        entry: &IndexEntryRef<'_>,
10396        worktree_metadata: &fs::Metadata,
10397    ) -> Option<TrackedEntry> {
10398        if entry.mode != worktree_entry_mode(worktree_metadata) {
10399            return None;
10400        }
10401        if !worktree_entry_ref_is_uptodate(entry, worktree_metadata) {
10402            return None;
10403        }
10404        if self.is_racily_clean_ref(entry) {
10405            return None;
10406        }
10407        Some(TrackedEntry {
10408            mode: entry.mode,
10409            oid: entry.oid,
10410        })
10411    }
10412
10413    /// The stage-0 gitlink (mode 160000) index entry at `git_path`, if any.
10414    fn gitlink_entry(&self, git_path: &[u8]) -> Option<&IndexEntry> {
10415        self.entries
10416            .get(git_path)
10417            .filter(|entry| entry.mode == 0o160000)
10418    }
10419}
10420
10421fn read_index_entries(
10422    git_dir: &Path,
10423    format: ObjectFormat,
10424) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
10425    let db = FileObjectDatabase::from_git_dir(git_dir, format);
10426    Ok(read_index_entries_with_stat_cache(git_dir, format, &db)?.0)
10427}
10428
10429fn resolve_head_tree_oid(
10430    git_dir: &Path,
10431    format: ObjectFormat,
10432    db: &FileObjectDatabase,
10433) -> Result<Option<ObjectId>> {
10434    let Some(commit_oid) = resolve_head_commit_oid(git_dir, format)? else {
10435        return Ok(None);
10436    };
10437    let object = read_expected_object(db, &commit_oid, ObjectType::Commit)?;
10438    let commit = Commit::parse_ref(format, &object.body)?;
10439    Ok(Some(commit.tree))
10440}
10441
10442fn resolve_head_commit_oid(git_dir: &Path, format: ObjectFormat) -> Result<Option<ObjectId>> {
10443    let refs = FileRefStore::new(git_dir, format);
10444    sley_refs::resolve_ref_peeled(&refs, "HEAD")
10445}
10446
10447fn status_entry_is_untracked_or_ignored(entry: &ShortStatusEntry) -> bool {
10448    matches!((entry.index, entry.worktree), (b'?', b'?') | (b'!', b'!'))
10449}
10450
10451fn checkout_switch_head_symbolic(
10452    refs: &FileRefStore,
10453    branch_ref: String,
10454    committer: Vec<u8>,
10455    branch: &str,
10456    old_oid: Option<ObjectId>,
10457    new_oid: Option<ObjectId>,
10458) -> Result<()> {
10459    // Reflog "from" side: the previous branch's short name, or the commit id
10460    // when HEAD was detached (git's `checkout: moving from X to Y` shape,
10461    // which `@{-N}` resolution parses).
10462    let from = match refs.read_ref("HEAD") {
10463        Ok(Some(RefTarget::Symbolic(name))) => name
10464            .strip_prefix("refs/heads/")
10465            .unwrap_or(&name)
10466            .to_string(),
10467        Ok(Some(RefTarget::Direct(oid))) => oid.to_hex(),
10468        _ => "HEAD".to_string(),
10469    };
10470    let mut tx = refs.transaction();
10471    let reflog = match (old_oid, new_oid) {
10472        (Some(old_oid), Some(new_oid)) => Some(ReflogEntry {
10473            old_oid,
10474            new_oid,
10475            committer,
10476            message: format!("checkout: moving from {from} to {branch}").into_bytes(),
10477        }),
10478        _ => None,
10479    };
10480    tx.update(RefUpdate {
10481        name: "HEAD".into(),
10482        expected: None,
10483        new: RefTarget::Symbolic(branch_ref),
10484        reflog,
10485    });
10486    tx.commit()
10487}
10488
10489fn cache_tree_is_valid(tree: &CacheTree) -> bool {
10490    if tree.entry_count < 0 || tree.oid.is_none() {
10491        return false;
10492    }
10493    tree.subtrees
10494        .iter()
10495        .all(|child| cache_tree_is_valid(&child.tree))
10496}
10497
10498fn head_matches_index_from_cache_tree(
10499    index: &Index,
10500    format: ObjectFormat,
10501    head_tree_oid: &ObjectId,
10502    stage0_entry_count: usize,
10503) -> Result<bool> {
10504    let cache_tree = match index.cache_tree(format) {
10505        Ok(Some(cache_tree)) => cache_tree,
10506        Ok(None) | Err(_) => return Ok(false),
10507    };
10508    if !cache_tree_is_valid(&cache_tree) {
10509        return Ok(false);
10510    }
10511    let Some(root_oid) = cache_tree.oid.as_ref() else {
10512        return Ok(false);
10513    };
10514    if root_oid != head_tree_oid {
10515        return Ok(false);
10516    }
10517    Ok(cache_tree.entry_count as usize == stage0_entry_count)
10518}
10519
10520fn head_matches_borrowed_index_from_cache_tree(
10521    index: &BorrowedIndex<'_>,
10522    format: ObjectFormat,
10523    head_tree_oid: &ObjectId,
10524    stage0_entry_count: usize,
10525) -> Result<bool> {
10526    let cache_tree = match index.cache_tree(format) {
10527        Ok(Some(cache_tree)) => cache_tree,
10528        Ok(None) | Err(_) => return Ok(false),
10529    };
10530    if !cache_tree_is_valid(&cache_tree) {
10531        return Ok(false);
10532    }
10533    let Some(root_oid) = cache_tree.oid.as_ref() else {
10534        return Ok(false);
10535    };
10536    if root_oid != head_tree_oid {
10537        return Ok(false);
10538    }
10539    Ok(cache_tree.entry_count as usize == stage0_entry_count)
10540}
10541
10542/// Parses the index a single time and returns both the path -> [`TrackedEntry`]
10543/// map used for status comparisons AND the [`IndexStatCache`] used to short-cut
10544/// the worktree walk, avoiding a second parse of the same file.
10545fn read_index_entries_with_stat_cache(
10546    git_dir: &Path,
10547    format: ObjectFormat,
10548    db: &FileObjectDatabase,
10549) -> Result<(BTreeMap<Vec<u8>, TrackedEntry>, IndexStatCache, bool)> {
10550    let (index, stat_cache, head_matches_index) = read_index_with_stat_cache(git_dir, format, db)?;
10551    let tracked = index_entries_from_index(index);
10552    Ok((tracked, stat_cache, head_matches_index))
10553}
10554
10555fn index_entries_from_index(index: Index) -> BTreeMap<Vec<u8>, TrackedEntry> {
10556    index
10557        .entries
10558        .into_iter()
10559        .filter(|entry| entry.stage() == Stage::Normal)
10560        .map(|entry| {
10561            (
10562                entry.path.into_bytes(),
10563                TrackedEntry {
10564                    mode: entry.mode,
10565                    oid: entry.oid,
10566                },
10567            )
10568        })
10569        .collect()
10570}
10571
10572fn read_index_with_stat_cache(
10573    git_dir: &Path,
10574    format: ObjectFormat,
10575    db: &FileObjectDatabase,
10576) -> Result<(Index, IndexStatCache, bool)> {
10577    read_index_with_stat_cache_entries(git_dir, format, db, true)
10578}
10579
10580fn read_index_with_stat_cache_entries(
10581    git_dir: &Path,
10582    format: ObjectFormat,
10583    db: &FileObjectDatabase,
10584    include_entries: bool,
10585) -> Result<(Index, IndexStatCache, bool)> {
10586    let index_path = repository_index_path(git_dir);
10587    let index_metadata = match fs::metadata(&index_path) {
10588        Ok(metadata) => metadata,
10589        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
10590            return Ok((
10591                Index {
10592                    version: 2,
10593                    entries: Vec::new(),
10594                    extensions: Vec::new(),
10595                    checksum: None,
10596                },
10597                IndexStatCache::default(),
10598                false,
10599            ));
10600        }
10601        Err(err) => return Err(err.into()),
10602    };
10603    let index = Index::parse(&fs::read(&index_path)?, format)?;
10604    let index_mtime = file_mtime_parts(&index_metadata);
10605    let stage0_entry_count = index
10606        .entries
10607        .iter()
10608        .filter(|entry| index_entry_stage(entry) == 0)
10609        .count();
10610    let stat_cache = if include_entries {
10611        IndexStatCache::from_index_mtime(&index, index_mtime)
10612    } else {
10613        IndexStatCache::from_index_mtime_only(index_mtime)
10614    };
10615    let head_matches_index = match resolve_head_tree_oid(git_dir, format, db)? {
10616        Some(head_tree_oid) => {
10617            head_matches_index_from_cache_tree(&index, format, &head_tree_oid, stage0_entry_count)?
10618        }
10619        None => false,
10620    };
10621    Ok((index, stat_cache, head_matches_index))
10622}
10623
10624fn head_tree_entries(
10625    git_dir: &Path,
10626    format: ObjectFormat,
10627    db: &FileObjectDatabase,
10628) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
10629    let refs = FileRefStore::new(git_dir, format);
10630    let Some(head) = refs.read_ref("HEAD")? else {
10631        return Ok(BTreeMap::new());
10632    };
10633    let commit_oid = match head {
10634        RefTarget::Direct(oid) => Some(oid),
10635        RefTarget::Symbolic(name) => match refs.read_ref(&name)? {
10636            Some(RefTarget::Direct(oid)) => Some(oid),
10637            _ => None,
10638        },
10639    };
10640    let Some(commit_oid) = commit_oid else {
10641        return Ok(BTreeMap::new());
10642    };
10643    let object = read_expected_object(db, &commit_oid, ObjectType::Commit)?;
10644    let commit = Commit::parse_ref(format, &object.body)?;
10645    let mut entries = BTreeMap::new();
10646    collect_tree_entries(db, format, &commit.tree, &mut entries)?;
10647    Ok(entries)
10648}
10649
10650fn tree_entries(
10651    db: &FileObjectDatabase,
10652    format: ObjectFormat,
10653    tree_oid: &ObjectId,
10654) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
10655    let mut entries = BTreeMap::new();
10656    collect_tree_entries(db, format, tree_oid, &mut entries)?;
10657    Ok(entries)
10658}
10659
10660/// Flatten a tree's blob leaves into `entries`, keyed by full path.
10661///
10662/// Delegates to the canonical [`sley_diff_merge::flatten_tree`] (the local
10663/// recursive flattener was a byte-identical copy) and adapts its
10664/// `(mode, oid)` tuples into this module's [`TrackedEntry`]. Entries already
10665/// present in `entries` are overwritten, matching the previous insert-based
10666/// behaviour.
10667fn collect_tree_entries(
10668    db: &FileObjectDatabase,
10669    format: ObjectFormat,
10670    tree_oid: &ObjectId,
10671    entries: &mut BTreeMap<Vec<u8>, TrackedEntry>,
10672) -> Result<()> {
10673    for (path, (mode, oid)) in sley_diff_merge::flatten_tree(db, format, tree_oid)? {
10674        entries.insert(path, TrackedEntry { mode, oid });
10675    }
10676    Ok(())
10677}
10678
10679/// Like a full worktree walk, but accepts the index's [`IndexStatCache`] so the
10680/// walk can reuse a cached oid for files that are provably unchanged since they
10681/// were staged, skipping the read+filter+hash for those paths. Passing `None`
10682/// hashes every file when no stat cache is supplied.
10683fn worktree_entries_with_stat_cache(
10684    worktree_root: &Path,
10685    git_dir: &Path,
10686    format: ObjectFormat,
10687    stat_cache: Option<&IndexStatCache>,
10688    tracked_paths: Option<&BTreeSet<Vec<u8>>>,
10689    ignores: Option<&mut IgnoreMatcher>,
10690) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
10691    Ok(worktree_entries_with_submodule_dirt(
10692        worktree_root,
10693        git_dir,
10694        format,
10695        stat_cache,
10696        tracked_paths,
10697        ignores,
10698    )?
10699    .0)
10700}
10701
10702/// Tracked worktree entries keyed by repo path, plus the dirt mask
10703/// ([`DIRTY_SUBMODULE_MODIFIED`] / [`DIRTY_SUBMODULE_UNTRACKED`]) for every
10704/// tracked gitlink path whose submodule working tree is dirty.
10705type WorktreeEntriesWithDirt = (BTreeMap<Vec<u8>, TrackedEntry>, BTreeMap<Vec<u8>, u8>);
10706
10707/// Status worktree snapshot: tracked/untracked entries, gitlink dirt masks, and
10708/// tracked paths observed in the worktree.
10709type StatusWorktreeSnapshot = (
10710    BTreeMap<Vec<u8>, TrackedEntry>,
10711    BTreeMap<Vec<u8>, u8>,
10712    HashSet<Vec<u8>>,
10713);
10714
10715/// Like [`worktree_entries_with_stat_cache`], but also reports, for every
10716/// tracked gitlink path whose submodule working tree is dirty, the dirt mask
10717/// ([`DIRTY_SUBMODULE_MODIFIED`] / [`DIRTY_SUBMODULE_UNTRACKED`]).
10718fn worktree_entries_with_submodule_dirt(
10719    worktree_root: &Path,
10720    git_dir: &Path,
10721    format: ObjectFormat,
10722    stat_cache: Option<&IndexStatCache>,
10723    tracked_paths: Option<&BTreeSet<Vec<u8>>>,
10724    ignores: Option<&mut IgnoreMatcher>,
10725) -> Result<WorktreeEntriesWithDirt> {
10726    let mut entries = BTreeMap::new();
10727    let mut submodule_dirt_map = BTreeMap::new();
10728    let mut tracked_presence = HashSet::new();
10729    // Worktree blobs are compared to the index by OID, so they must be passed
10730    // through the clean filter (core.autocrlf / .gitattributes) first -- exactly
10731    // as `git add` would store them. With no filter configured this is an exact
10732    // passthrough, so unfiltered repositories see identical OIDs.
10733    let config = sley_config::read_repo_config(git_dir, None).unwrap_or_default();
10734    // Seed the matcher with the repo-wide sources only; each directory's
10735    // `.gitattributes` is folded in by `collect_worktree_entries` as it descends,
10736    // so the worktree is read exactly once (a separate full-tree attribute pass was
10737    // a second traversal of every directory).
10738    let mut attr_matcher = AttributeMatcher::from_worktree_base(worktree_root);
10739    let attr_requested = filter_attribute_names();
10740    let mut context = WorktreeEntriesWalk {
10741        git_dir,
10742        format,
10743        config: &config,
10744        matcher: &mut attr_matcher,
10745        requested: &attr_requested,
10746        stat_cache,
10747        tracked_paths,
10748        ignores,
10749        entries: &mut entries,
10750        submodule_dirt: &mut submodule_dirt_map,
10751        tracked_presence: &mut tracked_presence,
10752        record_clean_tracked: true,
10753    };
10754    collect_worktree_entries(&mut context, worktree_root, &[])?;
10755    Ok((entries, submodule_dirt_map))
10756}
10757
10758fn status_worktree_entries_with_submodule_dirt(
10759    worktree_root: &Path,
10760    git_dir: &Path,
10761    format: ObjectFormat,
10762    stat_cache: &IndexStatCache,
10763    tracked_paths: Option<&BTreeSet<Vec<u8>>>,
10764    ignores: Option<&mut IgnoreMatcher>,
10765) -> Result<StatusWorktreeSnapshot> {
10766    let mut entries = BTreeMap::new();
10767    let mut submodule_dirt_map = BTreeMap::new();
10768    let mut tracked_presence = HashSet::new();
10769    let config = sley_config::read_repo_config(git_dir, None).unwrap_or_default();
10770    let mut attr_matcher = AttributeMatcher::from_worktree_base(worktree_root);
10771    let attr_requested = filter_attribute_names();
10772    let mut context = WorktreeEntriesWalk {
10773        git_dir,
10774        format,
10775        config: &config,
10776        matcher: &mut attr_matcher,
10777        requested: &attr_requested,
10778        stat_cache: Some(stat_cache),
10779        tracked_paths,
10780        ignores,
10781        entries: &mut entries,
10782        submodule_dirt: &mut submodule_dirt_map,
10783        tracked_presence: &mut tracked_presence,
10784        record_clean_tracked: false,
10785    };
10786    collect_worktree_entries(&mut context, worktree_root, &[])?;
10787    Ok((entries, submodule_dirt_map, tracked_presence))
10788}
10789
10790fn worktree_entry_for_git_path(
10791    worktree_root: &Path,
10792    git_dir: &Path,
10793    format: ObjectFormat,
10794    git_path: &[u8],
10795    expected_oid: &ObjectId,
10796    expected_mode: u32,
10797    stat_cache: Option<&IndexStatCache>,
10798) -> Result<Option<TrackedEntry>> {
10799    let absolute = worktree_root.join(repo_path_to_os_path(git_path)?);
10800    let metadata = match fs::symlink_metadata(&absolute) {
10801        Ok(metadata) => metadata,
10802        Err(err)
10803            if matches!(
10804                err.kind(),
10805                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
10806            ) =>
10807        {
10808            return Ok(None);
10809        }
10810        Err(err) => return Err(err.into()),
10811    };
10812
10813    if expected_mode == 0o160000 {
10814        if !metadata.is_dir() {
10815            return Ok(Some(TrackedEntry {
10816                mode: worktree_entry_mode(&metadata),
10817                oid: ObjectId::null(format),
10818            }));
10819        }
10820        let oid = sley_diff_merge::gitlink_head_oid(&absolute, format).unwrap_or(*expected_oid);
10821        return Ok(Some(TrackedEntry {
10822            mode: 0o160000,
10823            oid,
10824        }));
10825    }
10826
10827    if metadata.is_dir() {
10828        return Ok(Some(TrackedEntry {
10829            mode: worktree_entry_mode(&metadata),
10830            oid: ObjectId::null(format),
10831        }));
10832    }
10833
10834    if !(metadata.is_file() || metadata.file_type().is_symlink()) {
10835        return Ok(Some(TrackedEntry {
10836            mode: worktree_entry_mode(&metadata),
10837            oid: ObjectId::null(format),
10838        }));
10839    }
10840
10841    if let Some(tracked) =
10842        stat_cache.and_then(|cache| cache.reuse_tracked_entry(git_path, &metadata))
10843    {
10844        return Ok(Some(tracked));
10845    }
10846
10847    let mode = worktree_entry_mode(&metadata);
10848    let body = if metadata.file_type().is_symlink() {
10849        symlink_target_bytes(&absolute)?
10850    } else {
10851        let config = sley_config::read_repo_config(git_dir, None).unwrap_or_default();
10852        let body = fs::read(&absolute)?;
10853        apply_clean_filter(worktree_root, git_dir, &config, git_path, &body)?
10854    };
10855    let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
10856    Ok(Some(TrackedEntry { mode, oid }))
10857}
10858
10859fn worktree_entry_for_index_entry_with_attributes(
10860    worktree_root: &Path,
10861    git_dir: &Path,
10862    format: ObjectFormat,
10863    index_entry: &IndexEntry,
10864    stat_cache: &IndexStatCache,
10865    clean_filter: &mut Option<TrackedOnlyCleanFilter>,
10866) -> Result<Option<TrackedEntry>> {
10867    let git_path = index_entry.path.as_bytes();
10868    let expected_mode = index_entry.mode;
10869    let absolute = worktree_root.join(repo_path_to_os_path(git_path)?);
10870    let metadata = match fs::symlink_metadata(&absolute) {
10871        Ok(metadata) => metadata,
10872        Err(err)
10873            if matches!(
10874                err.kind(),
10875                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
10876            ) =>
10877        {
10878            return Ok(None);
10879        }
10880        Err(err) => return Err(err.into()),
10881    };
10882    let file_type = metadata.file_type();
10883
10884    if expected_mode == 0o160000 {
10885        if !file_type.is_dir() {
10886            return Ok(Some(TrackedEntry {
10887                mode: worktree_entry_mode(&metadata),
10888                oid: ObjectId::null(format),
10889            }));
10890        }
10891        let oid = sley_diff_merge::gitlink_head_oid(&absolute, format).unwrap_or(index_entry.oid);
10892        return Ok(Some(TrackedEntry {
10893            mode: 0o160000,
10894            oid,
10895        }));
10896    }
10897
10898    if file_type.is_dir() {
10899        return Ok(Some(TrackedEntry {
10900            mode: worktree_entry_mode(&metadata),
10901            oid: ObjectId::null(format),
10902        }));
10903    }
10904
10905    if !(file_type.is_file() || file_type.is_symlink()) {
10906        return Ok(Some(TrackedEntry {
10907            mode: worktree_entry_mode(&metadata),
10908            oid: ObjectId::null(format),
10909        }));
10910    }
10911
10912    if let Some(tracked) = stat_cache.reuse_index_entry(index_entry, &metadata) {
10913        return Ok(Some(tracked));
10914    }
10915
10916    let mode = worktree_entry_mode(&metadata);
10917    let body = if file_type.is_symlink() {
10918        symlink_target_bytes(&absolute)?
10919    } else {
10920        let body = fs::read(&absolute)?;
10921        let clean_filter = tracked_only_clean_filter(clean_filter, worktree_root, git_dir);
10922        clean_filter.read_attributes_for_path(worktree_root, git_path)?;
10923        let checks =
10924            clean_filter
10925                .matcher
10926                .attributes_for_path(git_path, &clean_filter.requested, false);
10927        apply_clean_filter_with_attributes(&clean_filter.config, &checks, git_path, &body)?
10928    };
10929    let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
10930    Ok(Some(TrackedEntry { mode, oid }))
10931}
10932
10933fn worktree_entry_for_index_entry_ref_with_attributes(
10934    worktree_root: &Path,
10935    git_dir: &Path,
10936    format: ObjectFormat,
10937    index_entry: &IndexEntryRef<'_>,
10938    stat_cache: &IndexStatCache,
10939    clean_filter: &mut Option<TrackedOnlyCleanFilter>,
10940) -> Result<Option<TrackedEntry>> {
10941    let git_path = index_entry.path;
10942    let expected_mode = index_entry.mode;
10943    let absolute = worktree_root.join(repo_path_to_os_path(git_path)?);
10944    let metadata = match fs::symlink_metadata(&absolute) {
10945        Ok(metadata) => metadata,
10946        Err(err)
10947            if matches!(
10948                err.kind(),
10949                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
10950            ) =>
10951        {
10952            return Ok(None);
10953        }
10954        Err(err) => return Err(err.into()),
10955    };
10956    let file_type = metadata.file_type();
10957
10958    if expected_mode == 0o160000 {
10959        if !file_type.is_dir() {
10960            return Ok(Some(TrackedEntry {
10961                mode: worktree_entry_mode(&metadata),
10962                oid: ObjectId::null(format),
10963            }));
10964        }
10965        let oid = sley_diff_merge::gitlink_head_oid(&absolute, format).unwrap_or(index_entry.oid);
10966        return Ok(Some(TrackedEntry {
10967            mode: 0o160000,
10968            oid,
10969        }));
10970    }
10971
10972    if file_type.is_dir() {
10973        return Ok(Some(TrackedEntry {
10974            mode: worktree_entry_mode(&metadata),
10975            oid: ObjectId::null(format),
10976        }));
10977    }
10978
10979    if !(file_type.is_file() || file_type.is_symlink()) {
10980        return Ok(Some(TrackedEntry {
10981            mode: worktree_entry_mode(&metadata),
10982            oid: ObjectId::null(format),
10983        }));
10984    }
10985
10986    if let Some(tracked) = stat_cache.reuse_index_entry_ref(index_entry, &metadata) {
10987        return Ok(Some(tracked));
10988    }
10989
10990    let mode = worktree_entry_mode(&metadata);
10991    let body = if file_type.is_symlink() {
10992        symlink_target_bytes(&absolute)?
10993    } else {
10994        let body = fs::read(&absolute)?;
10995        let clean_filter = tracked_only_clean_filter(clean_filter, worktree_root, git_dir);
10996        clean_filter.read_attributes_for_path(worktree_root, git_path)?;
10997        let checks =
10998            clean_filter
10999                .matcher
11000                .attributes_for_path(git_path, &clean_filter.requested, false);
11001        apply_clean_filter_with_attributes(&clean_filter.config, &checks, git_path, &body)?
11002    };
11003    let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
11004    Ok(Some(TrackedEntry { mode, oid }))
11005}
11006
11007struct TrackedOnlyCleanFilter {
11008    config: GitConfig,
11009    matcher: AttributeMatcher,
11010    requested: Vec<Vec<u8>>,
11011    attribute_dirs: BTreeSet<Vec<u8>>,
11012}
11013
11014impl TrackedOnlyCleanFilter {
11015    fn read_attributes_for_path(&mut self, worktree_root: &Path, git_path: &[u8]) -> Result<()> {
11016        self.read_attribute_dir(worktree_root, &[])?;
11017        let mut prefix = Vec::new();
11018        let mut parts = git_path.split(|byte| *byte == b'/').peekable();
11019        while let Some(part) = parts.next() {
11020            if parts.peek().is_none() {
11021                break;
11022            }
11023            if !prefix.is_empty() {
11024                prefix.push(b'/');
11025            }
11026            prefix.extend_from_slice(part);
11027            self.read_attribute_dir(worktree_root, &prefix)?;
11028        }
11029        Ok(())
11030    }
11031
11032    fn read_attribute_dir(&mut self, worktree_root: &Path, git_path: &[u8]) -> Result<()> {
11033        if !self.attribute_dirs.insert(git_path.to_vec()) {
11034            return Ok(());
11035        }
11036        let dir = if git_path.is_empty() {
11037            worktree_root.to_path_buf()
11038        } else {
11039            worktree_root.join(repo_path_to_os_path(git_path)?)
11040        };
11041        read_dir_attribute_patterns(worktree_root, &dir, &mut self.matcher)
11042    }
11043}
11044
11045fn tracked_only_clean_filter<'a>(
11046    clean_filter: &'a mut Option<TrackedOnlyCleanFilter>,
11047    worktree_root: &Path,
11048    git_dir: &Path,
11049) -> &'a mut TrackedOnlyCleanFilter {
11050    if clean_filter.is_none() {
11051        *clean_filter = Some(TrackedOnlyCleanFilter {
11052            config: sley_config::read_repo_config(git_dir, None).unwrap_or_default(),
11053            matcher: AttributeMatcher::from_worktree_base(worktree_root),
11054            requested: filter_attribute_names(),
11055            attribute_dirs: BTreeSet::new(),
11056        });
11057    }
11058    clean_filter
11059        .as_mut()
11060        .expect("tracked-only clean filter initialized")
11061}
11062
11063fn tracked_only_clean_filter_with_config<'a>(
11064    clean_filter: &'a mut Option<TrackedOnlyCleanFilter>,
11065    worktree_root: &Path,
11066    config: &GitConfig,
11067) -> &'a mut TrackedOnlyCleanFilter {
11068    if clean_filter.is_none() {
11069        *clean_filter = Some(TrackedOnlyCleanFilter {
11070            config: config.clone(),
11071            matcher: AttributeMatcher::from_worktree_base(worktree_root),
11072            requested: filter_attribute_names(),
11073            attribute_dirs: BTreeSet::new(),
11074        });
11075    }
11076    clean_filter
11077        .as_mut()
11078        .expect("tracked-only clean filter initialized")
11079}
11080
11081struct WorktreeEntriesWalk<'a> {
11082    git_dir: &'a Path,
11083    format: ObjectFormat,
11084    config: &'a GitConfig,
11085    matcher: &'a mut AttributeMatcher,
11086    requested: &'a [Vec<u8>],
11087    stat_cache: Option<&'a IndexStatCache>,
11088    tracked_paths: Option<&'a BTreeSet<Vec<u8>>>,
11089    ignores: Option<&'a mut IgnoreMatcher>,
11090    entries: &'a mut BTreeMap<Vec<u8>, TrackedEntry>,
11091    /// Dirt masks for tracked gitlink paths whose submodule worktree is dirty.
11092    submodule_dirt: &'a mut BTreeMap<Vec<u8>, u8>,
11093    tracked_presence: &'a mut HashSet<Vec<u8>>,
11094    record_clean_tracked: bool,
11095}
11096
11097impl WorktreeEntriesWalk<'_> {
11098    fn mark_tracked_present(&mut self, git_path: &[u8]) {
11099        self.tracked_presence.insert(git_path.to_vec());
11100    }
11101
11102    fn tracked_entry_for(&self, git_path: &[u8]) -> Option<TrackedEntry> {
11103        self.stat_cache
11104            .and_then(|cache| cache.tracked_entry(git_path))
11105    }
11106
11107    fn should_record_tracked_entry(&self, git_path: &[u8], entry: &TrackedEntry) -> bool {
11108        self.record_clean_tracked
11109            || self
11110                .tracked_entry_for(git_path)
11111                .is_none_or(|tracked| tracked != *entry)
11112    }
11113}
11114
11115fn git_path_append_component(parent: &[u8], component: &std::ffi::OsStr) -> Vec<u8> {
11116    let component = os_str_component_bytes(component);
11117    let separator = usize::from(!parent.is_empty());
11118    let mut path = Vec::with_capacity(parent.len() + separator + component.len());
11119    if !parent.is_empty() {
11120        path.extend_from_slice(parent);
11121        path.push(b'/');
11122    }
11123    path.extend_from_slice(component.as_ref());
11124    path
11125}
11126
11127fn git_path_push_component(path: &mut Vec<u8>, component: &std::ffi::OsStr) -> usize {
11128    let original_len = path.len();
11129    let component = os_str_component_bytes(component);
11130    if !path.is_empty() {
11131        path.push(b'/');
11132    }
11133    path.extend_from_slice(component.as_ref());
11134    original_len
11135}
11136
11137#[cfg(unix)]
11138fn os_str_component_bytes(component: &std::ffi::OsStr) -> Cow<'_, [u8]> {
11139    use std::os::unix::ffi::OsStrExt;
11140
11141    Cow::Borrowed(component.as_bytes())
11142}
11143
11144#[cfg(not(unix))]
11145fn os_str_component_bytes(component: &std::ffi::OsStr) -> Cow<'_, [u8]> {
11146    Cow::Owned(component.to_string_lossy().into_owned().into_bytes())
11147}
11148
11149fn collect_worktree_entries(
11150    context: &mut WorktreeEntriesWalk<'_>,
11151    dir: &Path,
11152    dir_git_path: &[u8],
11153) -> Result<()> {
11154    if is_same_path(dir, context.git_dir) {
11155        return Ok(());
11156    }
11157    // Fold this directory's `.gitattributes` into the matcher before processing its
11158    // files, so lookups for files here (and below) see it. This is what lets the
11159    // walk read the tree once instead of doing a separate full-tree attribute pass.
11160    read_dir_attribute_patterns_for_base(dir, dir_git_path, context.matcher)?;
11161    if let Some(ignores) = context.ignores.as_deref_mut() {
11162        read_dir_ignore_patterns_for_base(dir, dir_git_path, ignores)?;
11163    }
11164    for entry in fs::read_dir(dir)? {
11165        let entry = entry?;
11166        let file_name = entry.file_name();
11167        let path = entry.path();
11168        if is_dot_git_entry(&path) {
11169            continue;
11170        }
11171        if is_same_path(&path, context.git_dir) {
11172            continue;
11173        }
11174        let metadata = entry.metadata()?;
11175        let git_path = git_path_append_component(dir_git_path, &file_name);
11176        if context
11177            .ignores
11178            .as_ref()
11179            .is_some_and(|ignores| ignores.is_ignored(&git_path, metadata.is_dir()))
11180        {
11181            if metadata.is_dir()
11182                && context.tracked_paths.is_some_and(|tracked_paths| {
11183                    tracked_paths_may_contain(tracked_paths, &git_path)
11184                })
11185            {
11186                collect_worktree_entries(context, &path, &git_path)?;
11187            }
11188            continue;
11189        }
11190        if metadata.is_dir() {
11191            // A directory staged as a gitlink (mode 160000) is opaque: the walk
11192            // never descends into it. Its worktree "content" is the commit the
11193            // embedded repository has checked out (upstream ce_compare_gitlink):
11194            // a populated submodule reports its HEAD (plus a dirt mask when its
11195            // own tree has modified/untracked content); an unpopulated
11196            // directory — no repository, or no commit checked out — always
11197            // matches the staged oid.
11198            if let Some(index_entry) = context
11199                .stat_cache
11200                .and_then(|cache| cache.gitlink_entry(&git_path))
11201            {
11202                context.mark_tracked_present(&git_path);
11203                let oid = sley_diff_merge::gitlink_head_oid(&path, context.format)
11204                    .unwrap_or(index_entry.oid);
11205                let dirt = submodule_dirt(&path);
11206                if dirt != 0 {
11207                    context.submodule_dirt.insert(git_path.clone(), dirt);
11208                }
11209                let tracked = TrackedEntry {
11210                    mode: 0o160000,
11211                    oid,
11212                };
11213                if dirt != 0 || context.should_record_tracked_entry(&git_path, &tracked) {
11214                    context.entries.insert(git_path, tracked);
11215                }
11216                continue;
11217            }
11218            if is_nested_repository_boundary(&path) {
11219                if let Some(tracked_paths) = context.tracked_paths
11220                    && !tracked_paths_may_contain(tracked_paths, &git_path)
11221                {
11222                    continue;
11223                }
11224                context.entries.insert(
11225                    git_path,
11226                    TrackedEntry {
11227                        mode: 0o040000,
11228                        oid: ObjectId::null(context.format),
11229                    },
11230                );
11231                continue;
11232            }
11233            if let Some(tracked_paths) = context.tracked_paths
11234                && !tracked_paths_may_contain(tracked_paths, &git_path)
11235            {
11236                continue;
11237            }
11238            collect_worktree_entries(context, &path, &git_path)?;
11239        } else if metadata.is_file() || metadata.file_type().is_symlink() {
11240            if let Some(tracked_paths) = context.tracked_paths
11241                && !tracked_paths.contains(&git_path)
11242            {
11243                continue;
11244            }
11245            let entry_mode = worktree_entry_mode(&metadata);
11246            // git's racy-git stat shortcut: when the index's cached stat proves
11247            // this file is unchanged since it was staged, reuse the staged oid
11248            // and skip the read+filter+hash entirely. `reuse_tracked_entry`
11249            // returns `Some` ONLY for a non-racy size+mtime+mode match, so a
11250            // modified file always falls through to the full hash below and is
11251            // never silently reported clean.
11252            if let Some(tracked) = context
11253                .stat_cache
11254                .and_then(|cache| cache.reuse_tracked_entry(&git_path, &metadata))
11255            {
11256                context.mark_tracked_present(&git_path);
11257                if context.record_clean_tracked {
11258                    context.entries.insert(git_path, tracked);
11259                }
11260                continue;
11261            }
11262            // A file absent from the index is untracked: status and the
11263            // index-vs-worktree diff report it by *presence* (`??` / nothing), never
11264            // by content, so computing its oid is wasted work — git never hashes
11265            // untracked files. Record presence with a null oid and skip the
11266            // read+filter+hash. Without a stat cache we cannot tell tracked from
11267            // untracked, so fall through and hash as before.
11268            if context
11269                .stat_cache
11270                .is_some_and(|cache| !cache.contains(&git_path))
11271            {
11272                context.entries.insert(
11273                    git_path,
11274                    TrackedEntry {
11275                        mode: entry_mode,
11276                        oid: ObjectId::null(context.format),
11277                    },
11278                );
11279                continue;
11280            }
11281            let body = if metadata.file_type().is_symlink() {
11282                // The blob for a symlink is the raw link target; clean filters
11283                // never apply because git treats symlink content as opaque.
11284                symlink_target_bytes(&path)?
11285            } else {
11286                let body = fs::read(&path)?;
11287                // Resolve this path's attributes against the prebuilt matcher (a cheap
11288                // pattern match) and apply the clean filter -- no per-file matcher
11289                // rebuild. With no attributes/autocrlf configured this is an exact
11290                // passthrough, so the stored OID is unchanged.
11291                let checks =
11292                    context
11293                        .matcher
11294                        .attributes_for_path(&git_path, context.requested, false);
11295                apply_clean_filter_with_attributes(context.config, &checks, &git_path, &body)?
11296            };
11297            let oid = EncodedObject::new(ObjectType::Blob, body).object_id(context.format)?;
11298            let tracked = TrackedEntry {
11299                mode: entry_mode,
11300                oid,
11301            };
11302            if context
11303                .stat_cache
11304                .is_some_and(|cache| cache.contains(&git_path))
11305            {
11306                context.mark_tracked_present(&git_path);
11307                if context.should_record_tracked_entry(&git_path, &tracked) {
11308                    context.entries.insert(git_path, tracked);
11309                }
11310            } else {
11311                context.entries.insert(git_path, tracked);
11312            }
11313        }
11314    }
11315    Ok(())
11316}
11317
11318fn tracked_paths_may_contain(tracked_paths: &BTreeSet<Vec<u8>>, directory: &[u8]) -> bool {
11319    if tracked_paths.contains(directory) {
11320        return true;
11321    }
11322    let mut prefix = Vec::with_capacity(directory.len() + 1);
11323    prefix.extend_from_slice(directory);
11324    prefix.push(b'/');
11325    tracked_paths
11326        .range::<[u8], _>((
11327            std::ops::Bound::Included(prefix.as_slice()),
11328            std::ops::Bound::Unbounded,
11329        ))
11330        .next()
11331        .is_some_and(|path| path.starts_with(&prefix))
11332}
11333
11334fn is_same_path(left: &Path, right: &Path) -> bool {
11335    left == right
11336}
11337
11338/// Whether `path`'s final component is `.git`. Git never lists a `.git` entry at
11339/// any depth (a repository's own `.git`, a submodule gitlink file, or an embedded
11340/// repository's `.git` directory) as untracked content.
11341fn is_dot_git_entry(path: &Path) -> bool {
11342    path.file_name() == Some(std::ffi::OsStr::new(".git"))
11343}
11344
11345/// Whether `path` is a directory containing an embedded repository's `.git`
11346/// *directory*, or a `.git` file whose `gitdir:` pointer resolves to an
11347/// existing directory (a submodule worktree). Git treats both as a repository
11348/// boundary (listing the directory as `dir/`); an *invalid* `.git` file (no
11349/// resolvable `gitdir:` target) is not a boundary — Git descends into the
11350/// directory and lists its other untracked contents normally.
11351fn is_nested_repository_boundary(path: &Path) -> bool {
11352    if path.join(".git").is_dir() {
11353        return true;
11354    }
11355    sley_diff_merge::gitlink_git_dir(path).is_some()
11356}
11357
11358/// Whether `path` is an embedded repository's `.git` directory or a path inside it.
11359fn is_embedded_git_internals(root: &Path, path: &Path) -> bool {
11360    let Ok(relative) = path.strip_prefix(root) else {
11361        return false;
11362    };
11363    let mut current = root.to_path_buf();
11364    for component in relative.components() {
11365        if matches!(component, std::path::Component::Normal(name) if name == ".git")
11366            && current != root
11367            && current.join(".git").is_dir()
11368        {
11369            return true;
11370        }
11371        current.push(component);
11372    }
11373    false
11374}
11375
11376fn worktree_entry_mode(metadata: &fs::Metadata) -> u32 {
11377    if metadata.file_type().is_symlink() {
11378        0o120000
11379    } else if metadata.is_dir() {
11380        0o040000
11381    } else {
11382        file_mode(metadata)
11383    }
11384}
11385
11386fn worktree_path(root: &Path, path: &[u8]) -> Result<PathBuf> {
11387    let text = std::str::from_utf8(path).map_err(|err| GitError::InvalidPath(err.to_string()))?;
11388    let relative = PathBuf::from(text);
11389    if relative.is_absolute()
11390        || relative.components().any(|component| {
11391            matches!(
11392                component,
11393                std::path::Component::ParentDir | std::path::Component::Prefix(_)
11394            )
11395        })
11396    {
11397        return Err(GitError::InvalidPath(format!(
11398            "invalid worktree path {text}"
11399        )));
11400    }
11401    Ok(root.join(relative))
11402}
11403
11404fn remove_worktree_file(root: &Path, path: &[u8]) -> Result<()> {
11405    let file = worktree_path(root, path)?;
11406    if !file.exists() {
11407        return Ok(());
11408    }
11409    if file.is_dir() {
11410        // A tracked path that is a directory on disk is a gitlink: upstream
11411        // checkout/reset never recurses into a submodule's working tree. It
11412        // rmdirs the path when empty (remove_scheduled_dirs) and leaves a
11413        // populated submodule in place.
11414        match fs::remove_dir(&file) {
11415            Ok(()) => prune_empty_parents(root, file.parent())?,
11416            Err(err) if err.kind() == std::io::ErrorKind::DirectoryNotEmpty => {}
11417            Err(err) => return Err(err.into()),
11418        }
11419        return Ok(());
11420    }
11421    fs::remove_file(&file)?;
11422    prune_empty_parents(root, file.parent())?;
11423    Ok(())
11424}
11425
11426fn prune_empty_parents(root: &Path, mut dir: Option<&Path>) -> Result<()> {
11427    while let Some(path) = dir {
11428        if path == root {
11429            break;
11430        }
11431        match fs::remove_dir(path) {
11432            Ok(()) => dir = path.parent(),
11433            Err(err) if err.kind() == std::io::ErrorKind::NotFound => dir = path.parent(),
11434            Err(err) if err.kind() == std::io::ErrorKind::DirectoryNotEmpty => break,
11435            Err(err) => return Err(err.into()),
11436        }
11437    }
11438    Ok(())
11439}
11440
11441fn git_tree_entry_cmp(
11442    left_name: &[u8],
11443    left_mode: u32,
11444    right_name: &[u8],
11445    right_mode: u32,
11446) -> Ordering {
11447    let shared = left_name.len().min(right_name.len());
11448    let name_order = left_name[..shared].cmp(&right_name[..shared]);
11449    if name_order != Ordering::Equal {
11450        return name_order;
11451    }
11452    let left_end = left_name.len() == shared;
11453    let right_end = right_name.len() == shared;
11454    match (left_end, right_end) {
11455        (true, true) => Ordering::Equal,
11456        (true, false) => tree_name_terminator(left_mode).cmp(&right_name[shared]),
11457        (false, true) => left_name[shared].cmp(&tree_name_terminator(right_mode)),
11458        (false, false) => Ordering::Equal,
11459    }
11460}
11461
11462fn tree_name_terminator(mode: u32) -> u8 {
11463    if mode == 0o040000 { b'/' } else { 0 }
11464}
11465
11466#[cfg(unix)]
11467fn file_mode(metadata: &fs::Metadata) -> u32 {
11468    use std::os::unix::fs::PermissionsExt;
11469    if metadata.permissions().mode() & 0o111 != 0 {
11470        0o100755
11471    } else {
11472        0o100644
11473    }
11474}
11475
11476#[cfg(not(unix))]
11477fn file_mode(_metadata: &fs::Metadata) -> u32 {
11478    0o100644
11479}
11480
11481/// The blob content git stores for a symlink: the raw bytes of the link target
11482/// exactly as `readlink(2)` returns them. On Unix the target is an opaque byte
11483/// string, so we take the `OsStr` bytes verbatim (no UTF-8 round-trip, no path
11484/// re-componentization that could rewrite separators).
11485#[cfg(unix)]
11486fn symlink_target_bytes(path: &Path) -> Result<Vec<u8>> {
11487    use std::os::unix::ffi::OsStrExt;
11488    let target = fs::read_link(path)?;
11489    Ok(target.as_os_str().as_bytes().to_vec())
11490}
11491
11492#[cfg(not(unix))]
11493fn symlink_target_bytes(path: &Path) -> Result<Vec<u8>> {
11494    let target = fs::read_link(path)?;
11495    // git normalizes symlink targets to forward slashes on platforms whose
11496    // native separator is `\`.
11497    Ok(target.to_string_lossy().replace('\\', "/").into_bytes())
11498}
11499
11500fn git_path_bytes(path: &Path) -> Result<Vec<u8>> {
11501    if path.components().any(|component| {
11502        matches!(
11503            component,
11504            std::path::Component::ParentDir | std::path::Component::Prefix(_)
11505        )
11506    }) {
11507        return Err(GitError::InvalidPath(format!(
11508            "invalid index path {}",
11509            path.display()
11510        )));
11511    }
11512    Ok(path
11513        .components()
11514        .filter_map(|component| match component {
11515            std::path::Component::Normal(value) => Some(value.to_string_lossy().into_owned()),
11516            _ => None,
11517        })
11518        .collect::<Vec<_>>()
11519        .join("/")
11520        .into_bytes())
11521}
11522
11523fn repo_path_to_os_path(path: &[u8]) -> Result<PathBuf> {
11524    #[cfg(unix)]
11525    {
11526        use std::os::unix::ffi::OsStrExt;
11527
11528        Ok(PathBuf::from(std::ffi::OsStr::from_bytes(path)))
11529    }
11530
11531    #[cfg(not(unix))]
11532    {
11533        let path = std::str::from_utf8(path)
11534            .map_err(|_| GitError::InvalidPath("index path is not utf8".into()))?;
11535        Ok(path.split('/').collect())
11536    }
11537}
11538
11539fn git_path_to_relative_path(path: &[u8]) -> Result<PathBuf> {
11540    let path = std::str::from_utf8(path)
11541        .map_err(|err| GitError::InvalidPath(format!("invalid utf-8 index path: {err}")))?;
11542    Ok(path.split('/').collect())
11543}
11544
11545fn path_has_trailing_separator(path: &Path) -> bool {
11546    path.as_os_str()
11547        .to_string_lossy()
11548        .ends_with(std::path::MAIN_SEPARATOR)
11549}
11550
11551#[cfg(test)]
11552mod tests {
11553    use super::*;
11554    use sley_odb::ObjectReader;
11555    use std::sync::atomic::{AtomicU64, Ordering};
11556
11557    static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
11558
11559    #[test]
11560    fn atomic_metadata_writer_writes_and_reports_stat() {
11561        let root = temp_root();
11562        let path = root.join(".git").join("HEAD");
11563
11564        let result = write_metadata_file_atomic(
11565            &path,
11566            b"ref: refs/heads/main\n",
11567            AtomicMetadataWriteOptions::default(),
11568        )
11569        .expect("write metadata");
11570
11571        assert_eq!(
11572            fs::read(&path).expect("read metadata"),
11573            b"ref: refs/heads/main\n"
11574        );
11575        assert_eq!(result.path, path);
11576        assert_eq!(result.len, b"ref: refs/heads/main\n".len() as u64);
11577        assert!(result.mtime.is_some());
11578        assert!(!path.with_file_name("HEAD.lock").exists());
11579        fs::remove_dir_all(root).expect("test operation should succeed");
11580    }
11581
11582    #[test]
11583    fn atomic_metadata_writer_existing_lock_preserves_original() {
11584        let root = temp_root();
11585        let git_dir = root.join(".git");
11586        fs::create_dir_all(&git_dir).expect("create git dir");
11587        let path = git_dir.join("HEAD");
11588        let lock = git_dir.join("HEAD.lock");
11589        fs::write(&path, b"ref: refs/heads/main\n").expect("write original");
11590        fs::write(&lock, b"held\n").expect("write lock");
11591
11592        let err = write_metadata_file_atomic(
11593            &path,
11594            b"ref: refs/heads/other\n",
11595            AtomicMetadataWriteOptions::default(),
11596        )
11597        .expect_err("held lock must fail");
11598
11599        assert!(matches!(err, GitError::Transaction(_)));
11600        assert_eq!(
11601            fs::read(&path).expect("read original"),
11602            b"ref: refs/heads/main\n"
11603        );
11604        assert_eq!(fs::read(&lock).expect("read lock"), b"held\n");
11605        fs::remove_dir_all(root).expect("test operation should succeed");
11606    }
11607
11608    // --- `ls-files --eol` stat/attr helpers (mirror convert.c) ---------------
11609
11610    #[test]
11611    fn convert_stats_ascii_classifies_eol_content() {
11612        assert_eq!(convert_stats_ascii(b""), "none");
11613        assert_eq!(convert_stats_ascii(b"abc"), "none");
11614        assert_eq!(convert_stats_ascii(b"a\nb\n"), "lf");
11615        assert_eq!(convert_stats_ascii(b"a\r\nb\r\n"), "crlf");
11616        assert_eq!(convert_stats_ascii(b"a\r\nb\n"), "mixed");
11617        // A lone CR makes the content binary (-text), matching git.
11618        assert_eq!(convert_stats_ascii(b"a\rb"), "-text");
11619        // A NUL byte is binary.
11620        assert_eq!(convert_stats_ascii(b"a\0b\n"), "-text");
11621        // A trailing ^Z (EOF) is not counted as non-printable.
11622        assert_eq!(convert_stats_ascii(b"abc\n\x1a"), "lf");
11623    }
11624
11625    fn attr_check(name: &[u8], state: Option<AttributeState>) -> AttributeCheck {
11626        AttributeCheck {
11627            attribute: name.to_vec(),
11628            state,
11629        }
11630    }
11631
11632    #[test]
11633    fn convert_attr_ascii_matches_git_attr_action() {
11634        // No attributes at all: empty attr field.
11635        assert_eq!(convert_attr_ascii(&[]), "");
11636        // text (set) -> "text"; -text (unset) -> "-text".
11637        assert_eq!(
11638            convert_attr_ascii(&[attr_check(b"text", Some(AttributeState::Set))]),
11639            "text"
11640        );
11641        assert_eq!(
11642            convert_attr_ascii(&[attr_check(b"text", Some(AttributeState::Unset))]),
11643            "-text"
11644        );
11645        // text=auto -> "text=auto"; with eol=crlf/lf the AUTO variants.
11646        assert_eq!(
11647            convert_attr_ascii(&[attr_check(
11648                b"text",
11649                Some(AttributeState::Value(b"auto".to_vec()))
11650            )]),
11651            "text=auto"
11652        );
11653        assert_eq!(
11654            convert_attr_ascii(&[
11655                attr_check(b"text", Some(AttributeState::Value(b"auto".to_vec()))),
11656                attr_check(b"eol", Some(AttributeState::Value(b"crlf".to_vec()))),
11657            ]),
11658            "text=auto eol=crlf"
11659        );
11660        assert_eq!(
11661            convert_attr_ascii(&[
11662                attr_check(b"text", Some(AttributeState::Value(b"auto".to_vec()))),
11663                attr_check(b"eol", Some(AttributeState::Value(b"lf".to_vec()))),
11664            ]),
11665            "text=auto eol=lf"
11666        );
11667        // eol=crlf/lf alone (no text) forces text + the eol direction.
11668        assert_eq!(
11669            convert_attr_ascii(&[attr_check(
11670                b"eol",
11671                Some(AttributeState::Value(b"crlf".to_vec()))
11672            )]),
11673            "text eol=crlf"
11674        );
11675        assert_eq!(
11676            convert_attr_ascii(&[attr_check(
11677                b"eol",
11678                Some(AttributeState::Value(b"lf".to_vec()))
11679            )]),
11680            "text eol=lf"
11681        );
11682        // -text overrides any eol attribute (binary wins).
11683        assert_eq!(
11684            convert_attr_ascii(&[
11685                attr_check(b"text", Some(AttributeState::Unset)),
11686                attr_check(b"eol", Some(AttributeState::Value(b"crlf".to_vec()))),
11687            ]),
11688            "-text"
11689        );
11690    }
11691
11692    #[test]
11693    fn smudge_safety_guard_skips_irreversible_autocrlf() {
11694        // text=auto eol=crlf (AUTO_CRLF): convert pure-LF, but leave content
11695        // alone when it already has a CR or CRLF, or is binary.
11696        let auto = ContentFilterPlan {
11697            text: TextDecision::Auto,
11698            eol: EolConversion::Crlf,
11699            driver: None,
11700        };
11701        assert!(auto.will_convert_lf_to_crlf(b"a\nb\n"));
11702        assert!(!auto.will_convert_lf_to_crlf(b"a\r\nb\n")); // has CRLF
11703        assert!(!auto.will_convert_lf_to_crlf(b"a\nb\rc")); // lone CR (binary)
11704        assert!(!auto.will_convert_lf_to_crlf(b"abc")); // no naked LF
11705
11706        // text eol=crlf (TEXT_CRLF): no safety guard — always convert naked LF
11707        // even when a CR/CRLF is already present.
11708        let text = ContentFilterPlan {
11709            text: TextDecision::Text,
11710            eol: EolConversion::Crlf,
11711            driver: None,
11712        };
11713        assert!(text.will_convert_lf_to_crlf(b"a\r\nb\nc\n"));
11714        assert!(!text.will_convert_lf_to_crlf(b"a\r\nb\r\n")); // no naked LF
11715    }
11716
11717    /// Build an in-memory ignore matcher from raw `.gitignore` lines (no disk).
11718    fn ignore_matcher(patterns: &[&[u8]]) -> IgnoreMatcher {
11719        let mut matcher = IgnoreMatcher::default();
11720        let owned: Vec<Vec<u8>> = patterns.iter().map(|p| p.to_vec()).collect();
11721        matcher.extend_patterns(&owned);
11722        matcher
11723    }
11724
11725    #[test]
11726    fn ignore_match_kind_fast_paths_match_the_wildcard_engine() {
11727        // Literal: exact basename anywhere; not a superstring.
11728        let matcher = ignore_matcher(&[b"Pods"]);
11729        assert!(matcher.is_ignored(b"a/b/Pods", true));
11730        assert!(matcher.is_ignored(b"Pods", false));
11731        assert!(!matcher.is_ignored(b"Pods_not", false));
11732        assert!(matches!(
11733            classify_ignore_pattern(b"Pods"),
11734            MatchKind::Literal
11735        ));
11736
11737        // Suffix `*.log`: basename ending in `.log` at any depth.
11738        let matcher = ignore_matcher(&[b"*.log"]);
11739        assert!(matcher.is_ignored(b"x.log", false));
11740        assert!(matcher.is_ignored(b"a/b/x.log", false));
11741        assert!(matcher.is_ignored(b".log", false));
11742        assert!(!matcher.is_ignored(b"x.logx", false));
11743        assert!(matches!(
11744            classify_ignore_pattern(b"*.log"),
11745            MatchKind::Suffix
11746        ));
11747
11748        // Prefix `build*`: basename starting with `build`.
11749        let matcher = ignore_matcher(&[b"build*"]);
11750        assert!(matcher.is_ignored(b"buildfoo", false));
11751        assert!(matcher.is_ignored(b"a/build", false));
11752        assert!(!matcher.is_ignored(b"xbuild", false));
11753        assert!(matches!(
11754            classify_ignore_pattern(b"build*"),
11755            MatchKind::Prefix
11756        ));
11757    }
11758
11759    #[test]
11760    fn ignore_anchored_suffix_does_not_cross_slash() {
11761        // `/*.log` is anchored: matches `.log` files only at the matcher base,
11762        // never in a subdirectory — the slash guard in `match_segment`.
11763        let matcher = ignore_matcher(&[b"/*.log"]);
11764        assert!(matcher.is_ignored(b"x.log", false));
11765        assert!(!matcher.is_ignored(b"sub/x.log", false));
11766
11767        // Anchored literal likewise only matches at root.
11768        let matcher = ignore_matcher(&[b"/foo"]);
11769        assert!(matcher.is_ignored(b"foo", false));
11770        assert!(!matcher.is_ignored(b"a/foo", false));
11771    }
11772
11773    #[test]
11774    fn ignore_anchored_directory_glob_matches_root_directory() {
11775        let matcher = ignore_matcher(&[b"/tmp-*/"]);
11776        assert!(matcher.is_ignored(b"tmp-info-only", true));
11777        assert!(matcher.is_ignored(b"tmp-info-only/file.txt", false));
11778        assert!(!matcher.is_ignored(b"nested/tmp-info-only", true));
11779        assert!(!matcher.is_ignored(b"tmp-info-only", false));
11780    }
11781
11782    #[test]
11783    fn ignore_negated_directory_glob_does_not_reinclude_files() {
11784        // t0008-ignores "directories and ** matches": a negated directory-only
11785        // pattern re-includes *directories* but never the *files* inside them
11786        // (git: re-including a dir with `!dir/` still needs an explicit
11787        // `!dir/*` to reach its files). Verified against git 2.54 check-ignore:
11788        //   data/file              -> data/**           (ignored)
11789        //   data/data1/file1       -> data/**           (ignored, NOT !data/**/)
11790        //   data/data1/file1.txt   -> !data/**/*.txt    (re-included)
11791        //   data/data1   (dir)     -> !data/**/         (re-included)
11792        let matcher = ignore_matcher(&[b"data/**", b"!data/**/", b"!data/**/*.txt"]);
11793        // Files stay ignored: `!data/**/` must not win the file leaf scan.
11794        assert!(matcher.is_ignored(b"data/file", false));
11795        assert!(matcher.is_ignored(b"data/data1/file1", false));
11796        assert!(matcher.is_ignored(b"data/data2/file2", false));
11797        // `.txt` files are re-included by the explicit non-dir negation.
11798        assert!(!matcher.is_ignored(b"data/data1/file1.txt", false));
11799        assert!(!matcher.is_ignored(b"data/data2/file2.txt", false));
11800        // Directories ARE re-included by `!data/**/` (the directory-glob gain
11801        // from `fix: match git status ignored directory globs`).
11802        assert!(!matcher.is_ignored(b"data/data1", true));
11803        assert!(!matcher.is_ignored(b"data/data2", true));
11804    }
11805
11806    #[test]
11807    fn ignore_double_star_prefix_collapses_to_basename() {
11808        // `**/X` ≡ `X` for slash-free X (verified against `git check-ignore`).
11809        let matcher = ignore_matcher(&[b"**/Pods"]);
11810        assert!(matcher.is_ignored(b"a/b/Pods", true));
11811        assert!(matcher.is_ignored(b"Pods", true));
11812        assert!(!matcher.is_ignored(b"Pods_not", false));
11813
11814        let matcher = ignore_matcher(&[b"**/*.jks"]);
11815        assert!(matcher.is_ignored(b"x.jks", false));
11816        assert!(matcher.is_ignored(b"a/deep/y.jks", false));
11817        assert!(!matcher.is_ignored(b"x.jksx", false));
11818
11819        // `**/A/B` keeps a slash in the tail, so it stays a real glob and must
11820        // match the trailing path at any depth.
11821        let matcher = ignore_matcher(&[b"**/Flutter/ephemeral"]);
11822        assert!(matcher.is_ignored(b"Flutter/ephemeral", true));
11823        assert!(matcher.is_ignored(b"a/Flutter/ephemeral", true));
11824        assert!(!matcher.is_ignored(b"Flutter/other", true));
11825    }
11826
11827    #[test]
11828    fn ignore_complex_globs_still_use_the_engine() {
11829        let matcher = ignore_matcher(&[b"*.[Cc]ache"]);
11830        assert!(matcher.is_ignored(b"x.cache", false));
11831        assert!(matcher.is_ignored(b"x.Cache", false));
11832        assert!(!matcher.is_ignored(b"x.xache", false));
11833        assert!(matches!(
11834            classify_ignore_pattern(b"*.[Cc]ache"),
11835            MatchKind::Glob
11836        ));
11837
11838        let matcher = ignore_matcher(&[b"Icon?"]);
11839        assert!(matcher.is_ignored(b"IconA", false));
11840        assert!(!matcher.is_ignored(b"Icon", false));
11841        assert!(!matcher.is_ignored(b"IconAB", false));
11842
11843        // Multi-star is not a simple prefix/suffix.
11844        assert!(matches!(
11845            classify_ignore_pattern(b"app.*.symbols"),
11846            MatchKind::Glob
11847        ));
11848        assert!(matches!(classify_ignore_pattern(b"a*b*c"), MatchKind::Glob));
11849    }
11850
11851    #[test]
11852    fn ignore_negation_still_applies_after_fast_paths() {
11853        // Last match wins: a negated literal un-ignores a suffix-matched file.
11854        let matcher = ignore_matcher(&[b"*.log", b"!keep.log"]);
11855        assert!(matcher.is_ignored(b"a/x.log", false));
11856        assert!(!matcher.is_ignored(b"a/keep.log", false));
11857    }
11858
11859    #[test]
11860    fn read_expected_object_missing_blob_exposes_oid_and_kind() {
11861        let root = temp_root();
11862        let git_dir = root.join(".git");
11863        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
11864        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
11865        let missing = ObjectId::empty_blob(ObjectFormat::Sha1);
11866
11867        let err = read_expected_object(&db, &missing, ObjectType::Blob)
11868            .expect_err("missing blob should error");
11869        let kind = err.not_found_kind().expect("typed not found");
11870        assert_eq!(kind.object_id(), Some(missing));
11871        assert_eq!(kind.missing_object_kind(), Some(MissingObjectKind::Blob));
11872        assert_eq!(
11873            kind.missing_object_context(),
11874            Some(MissingObjectContext::WorktreeMaterialize)
11875        );
11876        fs::remove_dir_all(root).expect("test operation should succeed");
11877    }
11878
11879    #[test]
11880    fn update_index_adds_file_entry_and_blob() {
11881        let root = temp_root();
11882        let git_dir = root.join(".git");
11883        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
11884        fs::write(root.join("hello.txt"), b"hello\n").expect("test operation should succeed");
11885        let result = add_paths_to_index(
11886            &root,
11887            &git_dir,
11888            ObjectFormat::Sha1,
11889            &[PathBuf::from("hello.txt")],
11890        )
11891        .expect("test operation should succeed");
11892        assert_eq!(result.entries, 1);
11893        let index = Index::parse_v2_sha1(
11894            &fs::read(repository_index_path(git_dir)).expect("test operation should succeed"),
11895        )
11896        .expect("test operation should succeed");
11897        assert_eq!(index.entries[0].path, b"hello.txt");
11898        fs::remove_dir_all(root).expect("test operation should succeed");
11899    }
11900
11901    #[test]
11902    fn update_index_and_write_tree_support_sha256() {
11903        let root = temp_root();
11904        let git_dir = root.join(".git");
11905        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
11906        fs::write(root.join("hello.txt"), b"hello\n").expect("test operation should succeed");
11907        let result = add_paths_to_index(
11908            &root,
11909            &git_dir,
11910            ObjectFormat::Sha256,
11911            &[PathBuf::from("hello.txt")],
11912        )
11913        .expect("test operation should succeed");
11914        assert_eq!(result.entries, 1);
11915
11916        let index = Index::parse(
11917            &fs::read(repository_index_path(&git_dir)).expect("test operation should succeed"),
11918            ObjectFormat::Sha256,
11919        )
11920        .expect("test operation should succeed");
11921        assert_eq!(index.entries[0].path, b"hello.txt");
11922        assert_eq!(index.entries[0].oid.format(), ObjectFormat::Sha256);
11923
11924        let tree_oid = write_tree_from_index(&git_dir, ObjectFormat::Sha256)
11925            .expect("test operation should succeed");
11926        assert_eq!(tree_oid.format(), ObjectFormat::Sha256);
11927        let odb = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
11928        let tree = odb
11929            .read_object(&tree_oid)
11930            .expect("test operation should succeed");
11931        assert_eq!(tree.object_type, ObjectType::Tree);
11932        fs::remove_dir_all(root).expect("test operation should succeed");
11933    }
11934
11935    #[test]
11936    fn write_tree_from_index_writes_nested_tree_objects() {
11937        let root = temp_root();
11938        let git_dir = root.join(".git");
11939        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
11940        fs::create_dir_all(root.join("src")).expect("test operation should succeed");
11941        fs::write(root.join("README.md"), b"readme\n").expect("test operation should succeed");
11942        fs::write(root.join("src").join("lib.rs"), b"pub fn demo() {}\n")
11943            .expect("test operation should succeed");
11944        let result = add_paths_to_index(
11945            &root,
11946            &git_dir,
11947            ObjectFormat::Sha1,
11948            &[PathBuf::from("README.md"), PathBuf::from("src/lib.rs")],
11949        )
11950        .expect("test operation should succeed");
11951        assert_eq!(result.entries, 2);
11952        let tree_oid = write_tree_from_index(&git_dir, ObjectFormat::Sha1)
11953            .expect("test operation should succeed");
11954        let odb = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
11955        let tree = odb
11956            .read_object(&tree_oid)
11957            .expect("test operation should succeed");
11958        assert_eq!(tree.object_type, ObjectType::Tree);
11959        fs::remove_dir_all(root).expect("test operation should succeed");
11960    }
11961
11962    #[test]
11963    fn short_status_reports_added_and_untracked_paths() {
11964        let root = temp_root();
11965        let git_dir = root.join(".git");
11966        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
11967        fs::write(root.join("hello.txt"), b"hello\n").expect("test operation should succeed");
11968        fs::write(root.join("extra.txt"), b"extra\n").expect("test operation should succeed");
11969        add_paths_to_index(
11970            &root,
11971            &git_dir,
11972            ObjectFormat::Sha1,
11973            &[PathBuf::from("hello.txt")],
11974        )
11975        .expect("test operation should succeed");
11976        let status = short_status(&root, &git_dir, ObjectFormat::Sha1)
11977            .expect("test operation should succeed");
11978        assert_eq!(
11979            status
11980                .iter()
11981                .map(ShortStatusEntry::line)
11982                .collect::<Vec<_>>(),
11983            vec!["A  hello.txt", "?? extra.txt"]
11984        );
11985        fs::remove_dir_all(root).expect("test operation should succeed");
11986    }
11987
11988    #[test]
11989    fn worktree_root_is_none_for_bare_repository() {
11990        // A bare git_dir (basename `.git`) with `core.bare = true` must resolve to
11991        // `Ok(None)` rather than falling through to the "parent of .git" case.
11992        let root = temp_root();
11993        let git_dir = root.join(".git");
11994        fs::create_dir_all(&git_dir).expect("create bare git dir");
11995        // Hermetic minimal config — do not depend on host gitconfig.
11996        fs::write(git_dir.join("config"), b"[core]\n\tbare = true\n").expect("write bare config");
11997
11998        assert_eq!(
11999            worktree_root_for_git_dir(&git_dir).expect("resolve bare worktree root"),
12000            None,
12001            "a bare repository has no working tree"
12002        );
12003
12004        fs::remove_dir_all(root).expect("test operation should succeed");
12005    }
12006
12007    #[test]
12008    fn worktree_root_is_parent_for_non_bare_dot_git() {
12009        // A non-bare `.git` directory (no core.bare / core.bare = false) still
12010        // resolves to its parent — the ordinary non-bare layout.
12011        let root = temp_root();
12012        let work = root.join("work");
12013        let git_dir = work.join(".git");
12014        fs::create_dir_all(&git_dir).expect("create non-bare git dir");
12015        fs::write(git_dir.join("config"), b"[core]\n\tbare = false\n")
12016            .expect("write non-bare config");
12017
12018        assert_eq!(
12019            worktree_root_for_git_dir(&git_dir).expect("resolve non-bare worktree root"),
12020            Some(work.clone()),
12021            "a non-bare .git dir resolves to its parent"
12022        );
12023
12024        fs::remove_dir_all(root).expect("test operation should succeed");
12025    }
12026
12027    fn temp_root() -> PathBuf {
12028        let path = std::env::temp_dir().join(format!(
12029            "sley-worktree-{}-{}",
12030            std::process::id(),
12031            TEMP_COUNTER.fetch_add(1, Ordering::Relaxed)
12032        ));
12033        fs::create_dir_all(&path).expect("test operation should succeed");
12034        path
12035    }
12036
12037    fn index_entry_for<'a>(index: &'a Index, path: &[u8]) -> &'a IndexEntry {
12038        index
12039            .entries
12040            .iter()
12041            .find(|entry| entry.path == path)
12042            .unwrap_or_else(|| panic!("missing index entry for {}", String::from_utf8_lossy(path)))
12043    }
12044
12045    fn read_index(git_dir: &Path) -> Index {
12046        Index::parse(
12047            &fs::read(repository_index_path(git_dir)).expect("test operation should succeed"),
12048            ObjectFormat::Sha1,
12049        )
12050        .expect("test operation should succeed")
12051    }
12052
12053    /// Stages `paths` from the worktree, writes their tree, wraps it in a commit
12054    /// object, and points `refs/heads/main` + `HEAD` at it. Returns the commit
12055    /// id. After this call the index reflects the committed tree.
12056    fn build_commit(root: &Path, git_dir: &Path, paths: &[&str]) -> ObjectId {
12057        let path_bufs = paths.iter().map(PathBuf::from).collect::<Vec<_>>();
12058        add_paths_to_index(root, git_dir, ObjectFormat::Sha1, &path_bufs)
12059            .expect("test operation should succeed");
12060        let tree = write_tree_from_index(git_dir, ObjectFormat::Sha1)
12061            .expect("test operation should succeed");
12062        let mut body = Vec::new();
12063        body.extend_from_slice(format!("tree {tree}\n").as_bytes());
12064        body.extend_from_slice(b"author Test <test@example.com> 0 +0000\n");
12065        body.extend_from_slice(b"committer Test <test@example.com> 0 +0000\n");
12066        body.extend_from_slice(b"\n");
12067        body.extend_from_slice(b"sparse fixture\n");
12068        let odb = FileObjectDatabase::from_git_dir(git_dir, ObjectFormat::Sha1);
12069        let commit = odb
12070            .write_object(EncodedObject::new(ObjectType::Commit, body))
12071            .expect("test operation should succeed");
12072        let refs = FileRefStore::new(git_dir, ObjectFormat::Sha1);
12073        let mut tx = refs.transaction();
12074        tx.update(RefUpdate {
12075            name: "refs/heads/main".into(),
12076            expected: None,
12077            new: RefTarget::Direct(commit),
12078            reflog: None,
12079        });
12080        tx.update(RefUpdate {
12081            name: "HEAD".into(),
12082            expected: None,
12083            new: RefTarget::Symbolic("refs/heads/main".into()),
12084            reflog: None,
12085        });
12086        tx.commit().expect("test operation should succeed");
12087        commit
12088    }
12089
12090    fn full_sparse(patterns: &[&[u8]]) -> SparseCheckout {
12091        SparseCheckout {
12092            patterns: patterns.iter().map(|pattern| pattern.to_vec()).collect(),
12093            sparse_index: false,
12094        }
12095    }
12096
12097    #[test]
12098    fn apply_sparse_checkout_full_mode_skips_out_of_cone_paths() {
12099        let root = temp_root();
12100        let git_dir = root.join(".git");
12101        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
12102        fs::create_dir_all(root.join("in")).expect("test operation should succeed");
12103        fs::create_dir_all(root.join("out")).expect("test operation should succeed");
12104        fs::write(root.join("in").join("keep.txt"), b"keep\n")
12105            .expect("test operation should succeed");
12106        fs::write(root.join("out").join("drop.txt"), b"drop\n")
12107            .expect("test operation should succeed");
12108        fs::write(root.join("top.txt"), b"top\n").expect("test operation should succeed");
12109        build_commit(&root, &git_dir, &["in/keep.txt", "out/drop.txt", "top.txt"]);
12110
12111        // Full (non-cone) pattern: keep only the `in/` subtree.
12112        let sparse = full_sparse(&[b"/in/"]);
12113        let result = apply_sparse_checkout_with_mode(
12114            &root,
12115            &git_dir,
12116            ObjectFormat::Sha1,
12117            &sparse,
12118            SparseCheckoutMode::Full,
12119        )
12120        .expect("test operation should succeed");
12121
12122        assert!(root.join("in").join("keep.txt").exists());
12123        assert!(!root.join("out").join("drop.txt").exists());
12124        assert!(!root.join("top.txt").exists());
12125        assert!(result.materialized.contains(&b"in/keep.txt".to_vec()));
12126        assert!(result.skipped.contains(&b"out/drop.txt".to_vec()));
12127        assert!(result.skipped.contains(&b"top.txt".to_vec()));
12128
12129        let index = read_index(&git_dir);
12130        assert!(!index_entry_skip_worktree(index_entry_for(
12131            &index,
12132            b"in/keep.txt"
12133        )));
12134        assert!(index_entry_skip_worktree(index_entry_for(
12135            &index,
12136            b"out/drop.txt"
12137        )));
12138        assert!(index_entry_skip_worktree(index_entry_for(
12139            &index, b"top.txt"
12140        )));
12141        // Out-of-cone entries are preserved in the index, just not on disk.
12142        assert_eq!(index.entries.len(), 3);
12143        fs::remove_dir_all(root).expect("test operation should succeed");
12144    }
12145
12146    #[test]
12147    fn apply_sparse_checkout_toggle_rematerializes() {
12148        let root = temp_root();
12149        let git_dir = root.join(".git");
12150        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
12151        fs::create_dir_all(root.join("a")).expect("test operation should succeed");
12152        fs::create_dir_all(root.join("b")).expect("test operation should succeed");
12153        fs::write(root.join("a").join("file.txt"), b"a\n").expect("test operation should succeed");
12154        fs::write(root.join("b").join("file.txt"), b"b\n").expect("test operation should succeed");
12155        build_commit(&root, &git_dir, &["a/file.txt", "b/file.txt"]);
12156
12157        // First narrow to `a/`.
12158        apply_sparse_checkout_with_mode(
12159            &root,
12160            &git_dir,
12161            ObjectFormat::Sha1,
12162            &full_sparse(&[b"/a/"]),
12163            SparseCheckoutMode::Full,
12164        )
12165        .expect("test operation should succeed");
12166        assert!(root.join("a").join("file.txt").exists());
12167        assert!(!root.join("b").join("file.txt").exists());
12168        let index = read_index(&git_dir);
12169        assert!(index_entry_skip_worktree(index_entry_for(
12170            &index,
12171            b"b/file.txt"
12172        )));
12173
12174        // Now switch the cone to `b/`: `a/` must leave, `b/` must come back with
12175        // the correct content, and the skip-worktree bits must flip.
12176        apply_sparse_checkout_with_mode(
12177            &root,
12178            &git_dir,
12179            ObjectFormat::Sha1,
12180            &full_sparse(&[b"/b/"]),
12181            SparseCheckoutMode::Full,
12182        )
12183        .expect("test operation should succeed");
12184        assert!(!root.join("a").join("file.txt").exists());
12185        assert!(root.join("b").join("file.txt").exists());
12186        assert_eq!(
12187            fs::read(root.join("b").join("file.txt")).expect("test operation should succeed"),
12188            b"b\n"
12189        );
12190        let index = read_index(&git_dir);
12191        assert!(index_entry_skip_worktree(index_entry_for(
12192            &index,
12193            b"a/file.txt"
12194        )));
12195        assert!(!index_entry_skip_worktree(index_entry_for(
12196            &index,
12197            b"b/file.txt"
12198        )));
12199        fs::remove_dir_all(root).expect("test operation should succeed");
12200    }
12201
12202    #[test]
12203    fn apply_sparse_checkout_cone_mode_matches_directory_prefixes() {
12204        let root = temp_root();
12205        let git_dir = root.join(".git");
12206        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
12207        fs::create_dir_all(root.join("kept").join("nested"))
12208            .expect("test operation should succeed");
12209        fs::create_dir_all(root.join("other")).expect("test operation should succeed");
12210        fs::write(root.join("kept").join("a.txt"), b"a\n").expect("test operation should succeed");
12211        fs::write(root.join("kept").join("nested").join("b.txt"), b"b\n")
12212            .expect("test operation should succeed");
12213        fs::write(root.join("other").join("c.txt"), b"c\n").expect("test operation should succeed");
12214        fs::write(root.join("root.txt"), b"r\n").expect("test operation should succeed");
12215        build_commit(
12216            &root,
12217            &git_dir,
12218            &["kept/a.txt", "kept/nested/b.txt", "other/c.txt", "root.txt"],
12219        );
12220
12221        // Standard cone patterns: top-level files plus the whole `kept/` tree.
12222        let sparse = SparseCheckout {
12223            patterns: vec![b"/*".to_vec(), b"!/*/".to_vec(), b"/kept/".to_vec()],
12224            sparse_index: false,
12225        };
12226        // Auto mode should detect cone shape on its own.
12227        assert!(patterns_are_cone(&sparse.patterns));
12228        apply_sparse_checkout(&root, &git_dir, ObjectFormat::Sha1, &sparse)
12229            .expect("test operation should succeed");
12230
12231        assert!(root.join("root.txt").exists());
12232        assert!(root.join("kept").join("a.txt").exists());
12233        assert!(root.join("kept").join("nested").join("b.txt").exists());
12234        assert!(!root.join("other").join("c.txt").exists());
12235
12236        let index = read_index(&git_dir);
12237        assert!(!index_entry_skip_worktree(index_entry_for(
12238            &index,
12239            b"root.txt"
12240        )));
12241        assert!(!index_entry_skip_worktree(index_entry_for(
12242            &index,
12243            b"kept/a.txt"
12244        )));
12245        assert!(!index_entry_skip_worktree(index_entry_for(
12246            &index,
12247            b"kept/nested/b.txt"
12248        )));
12249        assert!(index_entry_skip_worktree(index_entry_for(
12250            &index,
12251            b"other/c.txt"
12252        )));
12253        fs::remove_dir_all(root).expect("test operation should succeed");
12254    }
12255
12256    #[test]
12257    fn apply_sparse_checkout_honors_preexisting_skip_worktree_via_idempotence() {
12258        let root = temp_root();
12259        let git_dir = root.join(".git");
12260        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
12261        fs::create_dir_all(root.join("in")).expect("test operation should succeed");
12262        fs::create_dir_all(root.join("out")).expect("test operation should succeed");
12263        fs::write(root.join("in").join("keep.txt"), b"keep\n")
12264            .expect("test operation should succeed");
12265        fs::write(root.join("out").join("drop.txt"), b"drop\n")
12266            .expect("test operation should succeed");
12267        build_commit(&root, &git_dir, &["in/keep.txt", "out/drop.txt"]);
12268
12269        let sparse = full_sparse(&[b"/in/"]);
12270        apply_sparse_checkout_with_mode(
12271            &root,
12272            &git_dir,
12273            ObjectFormat::Sha1,
12274            &sparse,
12275            SparseCheckoutMode::Full,
12276        )
12277        .expect("test operation should succeed");
12278        assert!(!root.join("out").join("drop.txt").exists());
12279
12280        // Re-applying the same spec is a no-op: the already-skipped file stays
12281        // absent and the bit stays set (we do not resurrect it).
12282        let result = apply_sparse_checkout_with_mode(
12283            &root,
12284            &git_dir,
12285            ObjectFormat::Sha1,
12286            &sparse,
12287            SparseCheckoutMode::Full,
12288        )
12289        .expect("test operation should succeed");
12290        assert!(!root.join("out").join("drop.txt").exists());
12291        assert!(root.join("in").join("keep.txt").exists());
12292        assert!(result.skipped.contains(&b"out/drop.txt".to_vec()));
12293        let index = read_index(&git_dir);
12294        assert!(index_entry_skip_worktree(index_entry_for(
12295            &index,
12296            b"out/drop.txt"
12297        )));
12298        fs::remove_dir_all(root).expect("test operation should succeed");
12299    }
12300
12301    #[test]
12302    fn checkout_detached_sparse_only_writes_in_cone_paths() {
12303        let root = temp_root();
12304        let git_dir = root.join(".git");
12305        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
12306        fs::create_dir_all(root.join("keep")).expect("test operation should succeed");
12307        fs::create_dir_all(root.join("skip")).expect("test operation should succeed");
12308        fs::write(root.join("keep").join("a.txt"), b"a\n").expect("test operation should succeed");
12309        fs::write(root.join("skip").join("b.txt"), b"b\n").expect("test operation should succeed");
12310        let commit = build_commit(&root, &git_dir, &["keep/a.txt", "skip/b.txt"]);
12311
12312        // The worktree is clean and matches the commit. A sparse checkout must
12313        // keep the in-cone file and evict the out-of-cone one.
12314        let sparse = full_sparse(&[b"/keep/"]);
12315        let result = checkout_detached_sparse(
12316            &root,
12317            &git_dir,
12318            ObjectFormat::Sha1,
12319            &commit,
12320            b"Test <test@example.com> 0 +0000".to_vec(),
12321            b"checkout".to_vec(),
12322            &sparse,
12323        )
12324        .expect("test operation should succeed");
12325        assert_eq!(result.files, 2);
12326
12327        assert!(root.join("keep").join("a.txt").exists());
12328        assert_eq!(
12329            fs::read(root.join("keep").join("a.txt")).expect("test operation should succeed"),
12330            b"a\n"
12331        );
12332        assert!(!root.join("skip").join("b.txt").exists());
12333
12334        let index = read_index(&git_dir);
12335        assert_eq!(index.entries.len(), 2);
12336        assert!(!index_entry_skip_worktree(index_entry_for(
12337            &index,
12338            b"keep/a.txt"
12339        )));
12340        let skipped = index_entry_for(&index, b"skip/b.txt");
12341        assert!(index_entry_skip_worktree(skipped));
12342        // The skipped entry still carries the committed blob id and mode.
12343        assert_eq!(skipped.mode, 0o100644);
12344        fs::remove_dir_all(root).expect("test operation should succeed");
12345    }
12346
12347    // ----- content filtering: EOL / autocrlf + clean/smudge drivers -----
12348
12349    /// Build a [`GitConfig`] from raw config text.
12350    fn config_from(text: &str) -> GitConfig {
12351        GitConfig::parse(text.as_bytes()).expect("test operation should succeed")
12352    }
12353
12354    /// Conformance grid for git's `output_eol(crlf_action)` decision table
12355    /// (convert.c) on the smudge side, exercised across the same
12356    /// attr × autocrlf × eol × content matrix as upstream t0027/t0026.
12357    ///
12358    /// Each row asserts the smudge output for a representative content shape.
12359    /// The cases that historically under-converted are the non-`auto` `text`
12360    /// paths (the auto-only safety guard must NOT fire) and the
12361    /// `autocrlf=true overrides core.eol` precedence rows.
12362    #[test]
12363    fn smudge_output_eol_decision_table() {
12364        // Naked-LF-only blob (the canonical "should gain CRLF" case).
12365        const LF: &[u8] = b"a\nb\nc\n";
12366        // Mixed CRLF + naked LF: a non-auto crlf action converts the naked LFs
12367        // to CRLF (whole file becomes CRLF); an auto action leaves it untouched.
12368        const CRLF_MIX_LF: &[u8] = b"a\r\nb\nc\r\n";
12369        // Naked LF plus a lone CR: non-auto converts LFs, keeping the lone CR.
12370        const LF_MIX_CR: &[u8] = b"a\nb\rc\n";
12371
12372        let smudge = |cfg: &str, attrline: Option<&[u8]>, input: &[u8]| -> Vec<u8> {
12373            let config = config_from(cfg);
12374            let checks = match attrline {
12375                Some(line) => {
12376                    let mut matcher = AttributeMatcher::default();
12377                    read_attribute_patterns_from_bytes(line, &mut matcher, &[]);
12378                    matcher.attributes_for_path(b"f.txt", &filter_attribute_names(), false)
12379                }
12380                None => Vec::new(),
12381            };
12382            apply_smudge_filter_with_attributes(&config, &checks, b"f.txt", input)
12383                .expect("smudge must succeed")
12384        };
12385
12386        // --- attr=text (CRLF_TEXT_*): non-auto, the safety guard must not fire.
12387        // text + eol=crlf => CRLF_TEXT_CRLF: every naked LF gains CR.
12388        let attr_text_crlf: &[u8] = b"*.txt text eol=crlf";
12389        for cfg in [
12390            "[core]\n\tautocrlf = false\n\teol = lf\n",
12391            "[core]\n\tautocrlf = false\n\teol = crlf\n",
12392            "[core]\n\tautocrlf = true\n\teol = lf\n",
12393            "[core]\n\tautocrlf = input\n",
12394        ] {
12395            assert_eq!(
12396                smudge(cfg, Some(attr_text_crlf), LF),
12397                b"a\r\nb\r\nc\r\n",
12398                "text eol=crlf must add CR to naked LF (cfg={cfg:?})"
12399            );
12400            assert_eq!(
12401                smudge(cfg, Some(attr_text_crlf), CRLF_MIX_LF),
12402                b"a\r\nb\r\nc\r\n",
12403                "text eol=crlf must convert mixed content fully (cfg={cfg:?})"
12404            );
12405            assert_eq!(
12406                smudge(cfg, Some(attr_text_crlf), LF_MIX_CR),
12407                b"a\r\nb\rc\r\n",
12408                "text eol=crlf keeps the lone CR but adds CR to naked LF (cfg={cfg:?})"
12409            );
12410        }
12411
12412        // --- attr=text, no eol attr: CRLF_TEXT, resolved by text_eol_is_crlf().
12413        // autocrlf=true wins over core.eol=lf (the precedence fix).
12414        assert_eq!(
12415            smudge("[core]\n\tautocrlf = true\n\teol = lf\n", Some(b"*.txt text"), LF),
12416            b"a\r\nb\r\nc\r\n",
12417            "autocrlf=true must override core.eol=lf for plain text attr"
12418        );
12419        // autocrlf unset, core.eol=crlf => CRLF.
12420        assert_eq!(
12421            smudge("[core]\n\teol = crlf\n", Some(b"*.txt text"), LF),
12422            b"a\r\nb\r\nc\r\n",
12423            "core.eol=crlf adds CR to naked LF for plain text attr"
12424        );
12425        // autocrlf unset, core.eol=lf (and native LF on this host) => no CR.
12426        assert_eq!(
12427            smudge("[core]\n\teol = lf\n", Some(b"*.txt text"), LF),
12428            LF,
12429            "core.eol=lf leaves naked LF untouched on smudge"
12430        );
12431        // text + autocrlf=input => CRLF_TEXT_INPUT: no CR on smudge.
12432        assert_eq!(
12433            smudge("[core]\n\tautocrlf = input\n", Some(b"*.txt text"), LF),
12434            LF,
12435            "autocrlf=input overrides core.eol; no CR on smudge"
12436        );
12437
12438        // --- attr=text=auto (CRLF_AUTO_*): the safety guard DOES fire.
12439        // auto + autocrlf=true + naked-LF-only => convert.
12440        assert_eq!(
12441            smudge("[core]\n\tautocrlf = true\n", Some(b"*.txt text=auto"), LF),
12442            b"a\r\nb\r\nc\r\n",
12443            "text=auto converts a clean naked-LF file"
12444        );
12445        // auto + already has a CR/CRLF => leave untouched (irreversible guard).
12446        assert_eq!(
12447            smudge("[core]\n\tautocrlf = true\n", Some(b"*.txt text=auto"), CRLF_MIX_LF),
12448            CRLF_MIX_LF,
12449            "text=auto must not touch content that already has CRLF"
12450        );
12451        assert_eq!(
12452            smudge("[core]\n\tautocrlf = true\n", Some(b"*.txt text=auto"), LF_MIX_CR),
12453            LF_MIX_CR,
12454            "text=auto must not touch content that already has a lone CR"
12455        );
12456
12457        // --- no attr, autocrlf=true => CRLF_AUTO_CRLF (auto guard applies).
12458        assert_eq!(
12459            smudge("[core]\n\tautocrlf = true\n\teol = lf\n", None, LF),
12460            b"a\r\nb\r\nc\r\n",
12461            "autocrlf=true (no attr) converts clean naked-LF and overrides core.eol=lf"
12462        );
12463        // --- no attr, autocrlf=false => CRLF_BINARY: never convert.
12464        assert_eq!(
12465            smudge("[core]\n\teol = crlf\n", None, LF),
12466            LF,
12467            "no attr + autocrlf=false leaves content untouched even with core.eol=crlf"
12468        );
12469        // --- -text (CRLF_BINARY): never convert regardless of config.
12470        assert_eq!(
12471            smudge("[core]\n\tautocrlf = true\n", Some(b"*.txt -text"), LF),
12472            LF,
12473            "-text is binary: never convert"
12474        );
12475    }
12476
12477    /// Resolve attribute checks against an on-disk `.gitattributes` in `root`.
12478    fn attrs(root: &Path, path: &[u8]) -> Vec<AttributeCheck> {
12479        filter_attribute_checks(root, path).expect("test operation should succeed")
12480    }
12481
12482    #[test]
12483    fn standard_attribute_matcher_matches_per_path_lookup() {
12484        let root = temp_root();
12485        fs::create_dir_all(root.join(".git").join("info")).expect("test operation should succeed");
12486        fs::create_dir_all(root.join("src").join("nested")).expect("test operation should succeed");
12487        fs::write(root.join(".gitattributes"), b"*.rs diff=rust\n")
12488            .expect("test operation should succeed");
12489        fs::write(
12490            root.join("src").join(".gitattributes"),
12491            b"*.rs diff=python\n",
12492        )
12493        .expect("test operation should succeed");
12494        fs::write(
12495            root.join(".git").join("info").join("attributes"),
12496            b"src/nested/*.rs diff=java\n",
12497        )
12498        .expect("test operation should succeed");
12499
12500        let requested = vec![b"diff".to_vec()];
12501        let path = b"src/nested/file.rs";
12502        let per_path = standard_attributes_for_path(&root, path, &requested, false)
12503            .expect("test operation should succeed");
12504        let matcher = StandardAttributeMatcher::from_worktree_root(&root)
12505            .expect("test operation should succeed");
12506        assert_eq!(
12507            matcher.attributes_for_path(path, &requested, false),
12508            per_path
12509        );
12510
12511        fs::remove_dir_all(root).expect("test operation should succeed");
12512    }
12513
12514    #[test]
12515    fn filter_attribute_lookup_reads_only_path_chain() {
12516        let root = temp_root();
12517        fs::create_dir_all(root.join(".git").join("info")).expect("test operation should succeed");
12518        fs::create_dir_all(root.join("src").join("nested")).expect("test operation should succeed");
12519        fs::create_dir_all(root.join("sibling")).expect("test operation should succeed");
12520        fs::write(root.join(".gitattributes"), b"*.txt text\n")
12521            .expect("test operation should succeed");
12522        fs::write(root.join("src").join(".gitattributes"), b"*.txt -text\n")
12523            .expect("test operation should succeed");
12524        fs::write(
12525            root.join("sibling").join(".gitattributes"),
12526            b"*.txt eol=crlf\n",
12527        )
12528        .expect("test operation should succeed");
12529        fs::write(
12530            root.join(".git").join("info").join("attributes"),
12531            b"src/nested/*.txt eol=lf\n",
12532        )
12533        .expect("test operation should succeed");
12534
12535        let path = b"src/nested/file.txt";
12536        let full = standard_attributes_for_path(&root, path, &filter_attribute_names(), false)
12537            .expect("test operation should succeed");
12538        assert_eq!(filter_attribute_checks(&root, path).unwrap(), full);
12539
12540        fs::remove_dir_all(root).expect("test operation should succeed");
12541    }
12542
12543    #[test]
12544    fn crlf_to_lf_collapses_only_pairs() {
12545        assert_eq!(
12546            convert_crlf_to_lf_cow(Cow::Borrowed(b"a\r\nb\r\n")).as_ref(),
12547            b"a\nb\n"
12548        );
12549        // A lone CR (no following LF) is preserved.
12550        assert_eq!(
12551            convert_crlf_to_lf_cow(Cow::Borrowed(b"a\rb")).as_ref(),
12552            b"a\rb"
12553        );
12554        // An already-LF stream is unchanged.
12555        assert!(matches!(
12556            convert_crlf_to_lf_cow(Cow::Borrowed(b"a\nb\n")),
12557            Cow::Borrowed(_)
12558        ));
12559    }
12560
12561    #[test]
12562    fn lf_to_crlf_does_not_double_convert() {
12563        assert_eq!(convert_lf_to_crlf(b"a\nb\n"), b"a\r\nb\r\n");
12564        // Existing CRLF is left intact (no extra CR added).
12565        assert_eq!(convert_lf_to_crlf(b"a\r\nb\r\n"), b"a\r\nb\r\n");
12566    }
12567
12568    #[test]
12569    fn autocrlf_round_trip_clean_then_smudge() {
12570        // autocrlf=true: worktree CRLF -> blob LF on clean, blob LF -> worktree
12571        // CRLF on smudge.
12572        let config = config_from("[core]\n\tautocrlf = true\n");
12573        let checks: Vec<AttributeCheck> = Vec::new();
12574        let worktree = b"line1\r\nline2\r\n";
12575        let blob = apply_clean_filter_with_attributes(&config, &checks, b"file.txt", worktree)
12576            .expect("test operation should succeed");
12577        assert_eq!(blob, b"line1\nline2\n", "clean must normalize CRLF to LF");
12578        let restored = apply_smudge_filter_with_attributes(&config, &checks, b"file.txt", &blob)
12579            .expect("test operation should succeed");
12580        assert_eq!(
12581            restored, worktree,
12582            "smudge must restore CRLF from the LF blob"
12583        );
12584    }
12585
12586    #[test]
12587    fn conv_flags_from_config_matches_git_defaults() {
12588        // Unset core.safecrlf defaults to WARN (git's global_conv_flags_eol).
12589        assert_eq!(ConvFlags::from_config(&config_from("")), ConvFlags::Warn);
12590        assert_eq!(
12591            ConvFlags::from_config(&config_from("[core]\n\tsafecrlf = warn\n")),
12592            ConvFlags::Warn
12593        );
12594        assert_eq!(
12595            ConvFlags::from_config(&config_from("[core]\n\tsafecrlf = WARN\n")),
12596            ConvFlags::Warn
12597        );
12598        assert_eq!(
12599            ConvFlags::from_config(&config_from("[core]\n\tsafecrlf = true\n")),
12600            ConvFlags::Die
12601        );
12602        assert_eq!(
12603            ConvFlags::from_config(&config_from("[core]\n\tsafecrlf = false\n")),
12604            ConvFlags::Off
12605        );
12606    }
12607
12608    #[test]
12609    fn safecrlf_warn_does_not_change_clean_bytes() {
12610        // The warning is purely additive: byte output is identical whether
12611        // safecrlf is off or warn.
12612        let config = config_from("[core]\n\tautocrlf = true\n");
12613        let checks: Vec<AttributeCheck> = Vec::new();
12614        let worktree = b"a\nb\nc\n";
12615        let plain = apply_clean_filter_with_attributes(&config, &checks, b"f.txt", worktree)
12616            .expect("clean");
12617        let warned = apply_clean_filter_with_attributes_cow_safecrlf(
12618            &config,
12619            &checks,
12620            b"f.txt",
12621            worktree,
12622            ConvFlags::Warn,
12623            SafeCrlfIndexBlob::None,
12624        )
12625        .expect("clean with safecrlf")
12626        .into_owned();
12627        assert_eq!(plain, warned, "safecrlf must not alter the cleaned bytes");
12628    }
12629
12630    #[test]
12631    fn safecrlf_die_errors_on_lf_to_crlf_round_trip() {
12632        // autocrlf=true on a pure-LF file: checkout would add CRLF, so the
12633        // round-trip is irreversible and safecrlf=true dies (exit 128).
12634        let config = config_from("[core]\n\tautocrlf = true\n");
12635        let checks: Vec<AttributeCheck> = Vec::new();
12636        let err = apply_clean_filter_with_attributes_cow_safecrlf(
12637            &config,
12638            &checks,
12639            b"f.txt",
12640            b"a\nb\n",
12641            ConvFlags::Die,
12642            SafeCrlfIndexBlob::None,
12643        )
12644        .expect_err("die must error");
12645        assert!(matches!(err, GitError::Exit(128)));
12646    }
12647
12648    #[test]
12649    fn safecrlf_die_errors_on_crlf_to_lf_round_trip() {
12650        // autocrlf=input on a CRLF file: clean strips CRLF and checkout never
12651        // restores it, so safecrlf=true dies.
12652        let config = config_from("[core]\n\tautocrlf = input\n");
12653        let checks: Vec<AttributeCheck> = Vec::new();
12654        let err = apply_clean_filter_with_attributes_cow_safecrlf(
12655            &config,
12656            &checks,
12657            b"f.txt",
12658            b"a\r\nb\r\n",
12659            ConvFlags::Die,
12660            SafeCrlfIndexBlob::None,
12661        )
12662        .expect_err("die must error");
12663        assert!(matches!(err, GitError::Exit(128)));
12664    }
12665
12666    #[test]
12667    fn safecrlf_reversible_round_trip_does_not_warn_or_die() {
12668        // A CRLF file under autocrlf=true survives the round trip (clean to LF,
12669        // smudge back to CRLF), so even safecrlf=true is silent.
12670        let config = config_from("[core]\n\tautocrlf = true\n");
12671        let checks: Vec<AttributeCheck> = Vec::new();
12672        let out = apply_clean_filter_with_attributes_cow_safecrlf(
12673            &config,
12674            &checks,
12675            b"f.txt",
12676            b"a\r\nb\r\n",
12677            ConvFlags::Die,
12678            SafeCrlfIndexBlob::None,
12679        )
12680        .expect("reversible round trip must not die");
12681        assert_eq!(out.as_ref(), b"a\nb\n");
12682    }
12683
12684    #[test]
12685    fn safecrlf_binary_content_is_silent() {
12686        // autocrlf=true with NUL-containing (binary) content: no conversion and
12687        // no warning/die, mirroring git's early-return in crlf_to_git.
12688        let config = config_from("[core]\n\tautocrlf = true\n");
12689        let checks: Vec<AttributeCheck> = Vec::new();
12690        let body: &[u8] = b"a\nb\0c\n";
12691        let out = apply_clean_filter_with_attributes_cow_safecrlf(
12692            &config,
12693            &checks,
12694            b"f.bin",
12695            body,
12696            ConvFlags::Die,
12697            SafeCrlfIndexBlob::None,
12698        )
12699        .expect("binary content must not die");
12700        assert_eq!(out.as_ref(), body, "binary content is never converted");
12701    }
12702
12703    #[test]
12704    fn safecrlf_off_is_silent_even_on_irreversible_round_trip() {
12705        let config = config_from("[core]\n\tautocrlf = true\n");
12706        let checks: Vec<AttributeCheck> = Vec::new();
12707        let out = apply_clean_filter_with_attributes_cow_safecrlf(
12708            &config,
12709            &checks,
12710            b"f.txt",
12711            b"a\nb\n",
12712            ConvFlags::Off,
12713            SafeCrlfIndexBlob::None,
12714        )
12715        .expect("safecrlf=off never errors");
12716        // autocrlf=true does not convert on clean (only smudge), so bytes pass through.
12717        assert_eq!(out.as_ref(), b"a\nb\n");
12718    }
12719
12720    #[test]
12721    fn autocrlf_input_normalizes_on_clean_but_not_smudge() {
12722        // autocrlf=input: clean normalizes to LF, smudge leaves LF as-is.
12723        let config = config_from("[core]\n\tautocrlf = input\n");
12724        let checks: Vec<AttributeCheck> = Vec::new();
12725        let blob = apply_clean_filter_with_attributes(&config, &checks, b"file.txt", b"a\r\nb\r\n")
12726            .expect("test operation should succeed");
12727        assert_eq!(blob, b"a\nb\n");
12728        let smudged = apply_smudge_filter_with_attributes(&config, &checks, b"file.txt", &blob)
12729            .expect("test operation should succeed");
12730        assert_eq!(
12731            smudged, b"a\nb\n",
12732            "input mode must not add carriage returns"
12733        );
12734    }
12735
12736    #[test]
12737    fn eol_crlf_attribute_drives_conversion_without_config() {
12738        // No core.autocrlf; the `eol=crlf` attribute alone forces conversion.
12739        let config = config_from("");
12740        let checks = vec![AttributeCheck {
12741            attribute: b"eol".to_vec(),
12742            state: Some(AttributeState::Value(b"crlf".to_vec())),
12743        }];
12744        let blob = apply_clean_filter_with_attributes(&config, &checks, b"a.txt", b"x\r\ny\r\n")
12745            .expect("test operation should succeed");
12746        assert_eq!(blob, b"x\ny\n");
12747        let smudged = apply_smudge_filter_with_attributes(&config, &checks, b"a.txt", &blob)
12748            .expect("test operation should succeed");
12749        assert_eq!(smudged, b"x\r\ny\r\n");
12750    }
12751
12752    #[test]
12753    fn binary_attribute_disables_eol_conversion() {
12754        // `-text` (binary) must leave CRLF/NUL content untouched in both
12755        // directions even when autocrlf=true.
12756        let config = config_from("[core]\n\tautocrlf = true\n");
12757        let checks = vec![AttributeCheck {
12758            attribute: b"text".to_vec(),
12759            state: Some(AttributeState::Unset),
12760        }];
12761        let content = b"\x00\x01\r\n\x02\r\n".to_vec();
12762        let blob = apply_clean_filter_with_attributes(&config, &checks, b"data.bin", &content)
12763            .expect("test operation should succeed");
12764        assert_eq!(blob, content, "binary file must not be CRLF-normalized");
12765        let smudged = apply_smudge_filter_with_attributes(&config, &checks, b"data.bin", &blob)
12766            .expect("test operation should succeed");
12767        assert_eq!(
12768            smudged, content,
12769            "binary file must not gain carriage returns"
12770        );
12771    }
12772
12773    #[test]
12774    fn autocrlf_auto_skips_binary_looking_content() {
12775        // text=auto (via autocrlf) must not convert content that contains NUL.
12776        let config = config_from("[core]\n\tautocrlf = true\n");
12777        let checks: Vec<AttributeCheck> = Vec::new();
12778        let content = b"a\r\n\x00b\r\n".to_vec();
12779        let blob = apply_clean_filter_with_attributes(&config, &checks, b"f", &content)
12780            .expect("test operation should succeed");
12781        assert_eq!(blob, content, "binary-looking content stays untouched");
12782    }
12783
12784    #[test]
12785    fn autocrlf_via_add_and_checkout_round_trips() {
12786        // End-to-end: a CRLF worktree file is stored as an LF blob by the
12787        // filtered add path, and restored as CRLF by the filtered checkout.
12788        let root = temp_root();
12789        let git_dir = root.join(".git");
12790        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
12791        let config = config_from("[core]\n\tautocrlf = true\n");
12792
12793        fs::write(root.join("crlf.txt"), b"alpha\r\nbeta\r\n")
12794            .expect("test operation should succeed");
12795        add_paths_to_index_filtered(
12796            &root,
12797            &git_dir,
12798            ObjectFormat::Sha1,
12799            &[PathBuf::from("crlf.txt")],
12800            &config,
12801        )
12802        .expect("test operation should succeed");
12803
12804        // The stored blob must be LF-normalized.
12805        let index = read_index(&git_dir);
12806        let entry = index_entry_for(&index, b"crlf.txt");
12807        let odb = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
12808        let blob = odb
12809            .read_object(&entry.oid)
12810            .expect("test operation should succeed");
12811        assert_eq!(blob.body, b"alpha\nbeta\n");
12812
12813        // Commit and point HEAD at it, then re-checkout with smudge filtering.
12814        let tree = write_tree_from_index(&git_dir, ObjectFormat::Sha1)
12815            .expect("test operation should succeed");
12816        let mut body = Vec::new();
12817        body.extend_from_slice(format!("tree {tree}\n").as_bytes());
12818        body.extend_from_slice(b"author T <t@e> 0 +0000\ncommitter T <t@e> 0 +0000\n\nm\n");
12819        let odb = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
12820        let commit = odb
12821            .write_object(EncodedObject::new(ObjectType::Commit, body))
12822            .expect("test operation should succeed");
12823        let refs = FileRefStore::new(&git_dir, ObjectFormat::Sha1);
12824        let mut tx = refs.transaction();
12825        tx.update(RefUpdate {
12826            name: "HEAD".into(),
12827            expected: None,
12828            new: RefTarget::Direct(commit),
12829            reflog: None,
12830        });
12831        tx.commit().expect("test operation should succeed");
12832
12833        // Make the worktree match the committed (LF) blob so the tree is clean
12834        // for checkout; `short_status`/`worktree_entries` compare by content
12835        // hash and are not filter-aware. Checkout will then smudge it to CRLF.
12836        fs::write(root.join("crlf.txt"), b"alpha\nbeta\n").expect("test operation should succeed");
12837        checkout_detached_filtered(
12838            &root,
12839            &git_dir,
12840            ObjectFormat::Sha1,
12841            &commit,
12842            b"T <t@e> 0 +0000".to_vec(),
12843            b"co".to_vec(),
12844            &config,
12845        )
12846        .expect("test operation should succeed");
12847        assert_eq!(
12848            fs::read(root.join("crlf.txt")).expect("test operation should succeed"),
12849            b"alpha\r\nbeta\r\n",
12850            "checkout must restore CRLF line endings"
12851        );
12852        fs::remove_dir_all(root).expect("test operation should succeed");
12853    }
12854
12855    #[test]
12856    fn driver_filter_clean_and_smudge_transform_both_directions() {
12857        // filter=case: clean upper-cases (worktree -> blob), smudge lower-cases
12858        // (blob -> worktree).
12859        let config =
12860            config_from("[filter \"case\"]\n\tclean = tr a-z A-Z\n\tsmudge = tr A-Z a-z\n");
12861        let checks = vec![AttributeCheck {
12862            attribute: b"filter".to_vec(),
12863            state: Some(AttributeState::Value(b"case".to_vec())),
12864        }];
12865        let blob = apply_clean_filter_with_attributes(&config, &checks, b"f.txt", b"Hello World")
12866            .expect("test operation should succeed");
12867        assert_eq!(blob, b"HELLO WORLD", "clean driver must upper-case");
12868        let worktree =
12869            apply_smudge_filter_with_attributes(&config, &checks, b"f.txt", b"HELLO WORLD")
12870                .expect("test operation should succeed");
12871        assert_eq!(worktree, b"hello world", "smudge driver must lower-case");
12872    }
12873
12874    #[test]
12875    fn driver_filter_resolved_from_gitattributes_file() {
12876        // The filter name is read from a real `.gitattributes`, the commands from
12877        // config; exercises the public worktree-rooted entry points.
12878        let root = temp_root();
12879        let git_dir = root.join(".git");
12880        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
12881        fs::write(root.join(".gitattributes"), b"*.dat filter=rot\n")
12882            .expect("test operation should succeed");
12883        let config =
12884            config_from("[filter \"rot\"]\n\tclean = sed s/a/b/g\n\tsmudge = sed s/b/a/g\n");
12885        // Clean reads attributes from the live worktree `.gitattributes`.
12886        let blob = apply_clean_filter(&root, &git_dir, &config, b"x.dat", b"banana")
12887            .expect("test operation should succeed");
12888        assert_eq!(blob, b"bbnbnb");
12889        // Smudge reads attributes from the index (the worktree file may not
12890        // exist yet during checkout), so stage `.gitattributes` first.
12891        add_paths_to_index(
12892            &root,
12893            &git_dir,
12894            ObjectFormat::Sha1,
12895            &[PathBuf::from(".gitattributes")],
12896        )
12897        .expect("test operation should succeed");
12898        let smudged = apply_smudge_filter(
12899            &root,
12900            &git_dir,
12901            ObjectFormat::Sha1,
12902            &config,
12903            b"x.dat",
12904            &blob,
12905        )
12906        .expect("test operation should succeed");
12907        // sed s/b/a/g is not a perfect inverse, but verifies the smudge command
12908        // ran on the blob bytes.
12909        assert_eq!(smudged, b"aanana");
12910        fs::remove_dir_all(root).expect("test operation should succeed");
12911    }
12912
12913    #[test]
12914    fn required_filter_failure_is_fatal() {
12915        // A required filter whose command fails must surface an error.
12916        let config = config_from("[filter \"boom\"]\n\tclean = false\n\trequired = true\n");
12917        let checks = vec![AttributeCheck {
12918            attribute: b"filter".to_vec(),
12919            state: Some(AttributeState::Value(b"boom".to_vec())),
12920        }];
12921        let err = apply_clean_filter_with_attributes(&config, &checks, b"f", b"data")
12922            .expect_err("required filter failure must error");
12923        assert!(matches!(err, GitError::Command(_)), "got {err:?}");
12924    }
12925
12926    #[test]
12927    fn required_filter_missing_command_is_fatal() {
12928        // required=true but no clean command for this direction is also fatal.
12929        let config = config_from("[filter \"need\"]\n\tsmudge = cat\n\trequired = true\n");
12930        let checks = vec![AttributeCheck {
12931            attribute: b"filter".to_vec(),
12932            state: Some(AttributeState::Value(b"need".to_vec())),
12933        }];
12934        let err = apply_clean_filter_with_attributes(&config, &checks, b"f", b"data")
12935            .expect_err("required filter without a clean command must error");
12936        assert!(matches!(err, GitError::Command(_)), "got {err:?}");
12937    }
12938
12939    #[test]
12940    fn non_required_filter_failure_passes_through() {
12941        // A non-required filter that fails must pass the content through
12942        // unchanged rather than erroring.
12943        let config = config_from("[filter \"opt\"]\n\tclean = false\n");
12944        let checks = vec![AttributeCheck {
12945            attribute: b"filter".to_vec(),
12946            state: Some(AttributeState::Value(b"opt".to_vec())),
12947        }];
12948        let out = apply_clean_filter_with_attributes(&config, &checks, b"f", b"keepme")
12949            .expect("test operation should succeed");
12950        assert_eq!(
12951            out, b"keepme",
12952            "optional filter failure passes content through"
12953        );
12954    }
12955
12956    #[test]
12957    fn filter_with_no_command_is_noop() {
12958        // filter=name with no configured commands and not required is ignored.
12959        let config = config_from("");
12960        let checks = vec![AttributeCheck {
12961            attribute: b"filter".to_vec(),
12962            state: Some(AttributeState::Value(b"ghost".to_vec())),
12963        }];
12964        let out = apply_clean_filter_with_attributes(&config, &checks, b"f", b"unchanged")
12965            .expect("test operation should succeed");
12966        assert_eq!(out, b"unchanged");
12967    }
12968
12969    #[test]
12970    fn driver_and_eol_compose_on_clean_and_smudge() {
12971        // filter=case + autocrlf=true: clean runs the driver then CRLF->LF;
12972        // smudge runs LF->CRLF then the driver.
12973        let config = config_from(
12974            "[core]\n\tautocrlf = true\n[filter \"case\"]\n\tclean = tr a-z A-Z\n\tsmudge = tr A-Z a-z\n",
12975        );
12976        let checks = vec![
12977            AttributeCheck {
12978                attribute: b"filter".to_vec(),
12979                state: Some(AttributeState::Value(b"case".to_vec())),
12980            },
12981            AttributeCheck {
12982                attribute: b"text".to_vec(),
12983                state: Some(AttributeState::Set),
12984            },
12985        ];
12986        let blob = apply_clean_filter_with_attributes(&config, &checks, b"f.txt", b"ab\r\ncd\r\n")
12987            .expect("test operation should succeed");
12988        assert_eq!(blob, b"AB\nCD\n", "clean: upper-case then CRLF->LF");
12989        let worktree = apply_smudge_filter_with_attributes(&config, &checks, b"f.txt", &blob)
12990            .expect("test operation should succeed");
12991        assert_eq!(
12992            worktree, b"ab\r\ncd\r\n",
12993            "smudge: LF->CRLF then lower-case"
12994        );
12995    }
12996
12997    #[test]
12998    fn attrs_helper_reads_filter_from_disk() {
12999        let root = temp_root();
13000        fs::write(root.join(".gitattributes"), b"*.txt text\n*.bin -text\n")
13001            .expect("test operation should succeed");
13002        let text = attrs(&root, b"a.txt");
13003        assert!(
13004            text.iter()
13005                .any(|c| c.attribute == b"text" && c.state == Some(AttributeState::Set))
13006        );
13007        let bin = attrs(&root, b"a.bin");
13008        assert!(
13009            bin.iter()
13010                .any(|c| c.attribute == b"text" && c.state == Some(AttributeState::Unset))
13011        );
13012        fs::remove_dir_all(root).expect("test operation should succeed");
13013    }
13014
13015    /// Builds a stat cache holding a single stage-0 entry whose size+mtime match
13016    /// `file`'s real metadata, with the index-file mtime placed strictly after
13017    /// the entry mtime so the entry reads as non-racy by default. The entry's oid
13018    /// is `oid` and its mode is `mode`.
13019    fn stat_cache_for(file: &Path, oid: ObjectId, mode: u32) -> (IndexStatCache, IndexEntry) {
13020        let metadata = fs::metadata(file).expect("test operation should succeed");
13021        let mut entry = index_entry_from_metadata(b"f.txt".to_vec(), oid, &metadata);
13022        entry.mode = mode;
13023        let index_mtime = Some((u64::from(entry.mtime_seconds) + 10, 0));
13024        let mut entries = HashMap::new();
13025        entries.insert(entry.path.as_bytes().to_vec(), entry.clone());
13026        (
13027            IndexStatCache {
13028                entries,
13029                index_mtime,
13030            },
13031            entry,
13032        )
13033    }
13034
13035    #[test]
13036    fn reuse_tracked_entry_only_reuses_clean_non_racy_match() {
13037        let root = temp_root();
13038        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
13039        let file = root.join("f.txt");
13040        let metadata = fs::metadata(&file).expect("test operation should succeed");
13041        let real_mode = file_mode(&metadata);
13042        let oid = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec())
13043            .object_id(ObjectFormat::Sha1)
13044            .expect("test operation should succeed");
13045
13046        // Clean, non-racy, matching stat + mode -> reuse the cached oid.
13047        let (cache, _) = stat_cache_for(&file, oid, real_mode);
13048        let reused = cache.reuse_tracked_entry(b"f.txt", &metadata);
13049        assert_eq!(
13050            reused,
13051            Some(TrackedEntry {
13052                mode: real_mode,
13053                oid,
13054            }),
13055            "a clean non-racy stat+mode match must reuse the staged oid"
13056        );
13057
13058        // No stage-0 entry for the path -> must hash.
13059        assert_eq!(
13060            cache.reuse_tracked_entry(b"other.txt", &metadata),
13061            None,
13062            "a path with no cached entry must fall through to hashing"
13063        );
13064
13065        // Size differs from the file -> must hash.
13066        let (mut size_cache, mut shrunk) = stat_cache_for(&file, oid, real_mode);
13067        shrunk.size = shrunk.size.saturating_sub(1);
13068        size_cache.entries.insert(shrunk.path.to_vec(), shrunk);
13069        assert_eq!(
13070            size_cache.reuse_tracked_entry(b"f.txt", &metadata),
13071            None,
13072            "a size mismatch must fall through to hashing"
13073        );
13074
13075        // Mode differs (e.g. a chmod that did not move mtime) -> must hash.
13076        let (mode_cache, _) = stat_cache_for(&file, oid, 0o100755);
13077        assert_eq!(
13078            mode_cache.reuse_tracked_entry(b"f.txt", &metadata),
13079            None,
13080            "a mode mismatch must fall through to hashing"
13081        );
13082
13083        // Racily clean (index mtime not strictly after the entry mtime) -> hash.
13084        let (mut racy_cache, entry) = stat_cache_for(&file, oid, real_mode);
13085        racy_cache.index_mtime = Some((
13086            u64::from(entry.mtime_seconds),
13087            u64::from(entry.mtime_nanoseconds),
13088        ));
13089        assert_eq!(
13090            racy_cache.reuse_tracked_entry(b"f.txt", &metadata),
13091            None,
13092            "a racily-clean entry must always be re-hashed"
13093        );
13094
13095        // Unknown index mtime is treated as racy -> hash.
13096        let (mut unknown_cache, _) = stat_cache_for(
13097            &file,
13098            EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec())
13099                .object_id(ObjectFormat::Sha1)
13100                .expect("test operation should succeed"),
13101            real_mode,
13102        );
13103        unknown_cache.index_mtime = None;
13104        assert_eq!(
13105            unknown_cache.reuse_tracked_entry(b"f.txt", &metadata),
13106            None,
13107            "an unknown index mtime must be treated conservatively as racy"
13108        );
13109
13110        fs::remove_dir_all(root).expect("test operation should succeed");
13111    }
13112
13113    #[test]
13114    fn index_stat_probe_cache_serves_many_paths_from_one_index_parse() {
13115        let root = temp_root();
13116        let git_dir = root.join(".git");
13117        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
13118        fs::write(root.join("a.txt"), b"alpha\n").expect("test operation should succeed");
13119        fs::write(root.join("b.txt"), b"bravo\n").expect("test operation should succeed");
13120        build_commit(&root, &git_dir, &["a.txt", "b.txt"]);
13121
13122        let cache = IndexStatProbeCache::from_repository_index(&git_dir, ObjectFormat::Sha1)
13123            .expect("probe cache");
13124        assert_eq!(cache.len(), 2);
13125        assert!(cache.contains_git_path(b"a.txt"));
13126        assert!(cache.contains_git_path(b"b.txt"));
13127        let a = cache.probe_for_git_path(b"a.txt").expect("a probe");
13128        let b = cache.probe_for_git_path(b"b.txt").expect("b probe");
13129        assert_eq!(a.entry().path, b"a.txt");
13130        assert_eq!(b.entry().path, b"b.txt");
13131        assert_eq!(a.index_mtime(), cache.index_mtime());
13132        assert_eq!(b.index_mtime(), cache.index_mtime());
13133        assert!(
13134            cache.probe_for_git_path(b"missing.txt").is_none(),
13135            "missing paths should not allocate probes"
13136        );
13137
13138        let one_shot =
13139            IndexStatProbe::from_repository_index(&git_dir, ObjectFormat::Sha1, b"a.txt")
13140                .expect("legacy one-shot probe")
13141                .expect("a probe");
13142        assert_eq!(one_shot.entry().path, b"a.txt");
13143        assert_eq!(one_shot.index_mtime(), cache.index_mtime());
13144
13145        fs::remove_dir_all(root).expect("test operation should succeed");
13146    }
13147
13148    #[test]
13149    fn short_status_detects_same_length_content_change() {
13150        let root = temp_root();
13151        let git_dir = root.join(".git");
13152        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
13153        fs::write(root.join("f.txt"), b"aaaa\n").expect("test operation should succeed");
13154        build_commit(&root, &git_dir, &["f.txt"]);
13155        // Overwrite with the SAME byte length but different content. Right after
13156        // staging the entry is racily clean (index mtime >= entry mtime), so the
13157        // stat shortcut must not be trusted and the change must surface as M.
13158        fs::write(root.join("f.txt"), b"bbbb\n").expect("test operation should succeed");
13159        let status = short_status(&root, &git_dir, ObjectFormat::Sha1)
13160            .expect("test operation should succeed");
13161        assert_eq!(
13162            status
13163                .iter()
13164                .map(ShortStatusEntry::line)
13165                .collect::<Vec<_>>(),
13166            vec![" M f.txt"],
13167            "a same-length content change must be reported modified"
13168        );
13169        fs::remove_dir_all(root).expect("test operation should succeed");
13170    }
13171
13172    #[test]
13173    fn short_status_clean_after_byte_identical_rewrite() {
13174        let root = temp_root();
13175        let git_dir = root.join(".git");
13176        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
13177        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
13178        build_commit(&root, &git_dir, &["f.txt"]);
13179        // Rewrite with byte-identical content; the mtime moves so the stat
13180        // shortcut declines to reuse and the fallback hash proves it clean.
13181        std::thread::sleep(std::time::Duration::from_millis(20));
13182        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
13183        let status = short_status(&root, &git_dir, ObjectFormat::Sha1)
13184            .expect("test operation should succeed");
13185        assert!(
13186            status.is_empty(),
13187            "a byte-identical rewrite must be clean via the fallback hash, got {status:?}"
13188        );
13189        fs::remove_dir_all(root).expect("test operation should succeed");
13190    }
13191
13192    #[test]
13193    fn short_status_trusts_stat_cache_and_skips_rehash() {
13194        let root = temp_root();
13195        let git_dir = root.join(".git");
13196        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
13197        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
13198        build_commit(&root, &git_dir, &["f.txt"]);
13199
13200        // Plant a BOGUS oid in the stage-0 entry while preserving its size+mtime,
13201        // so a real re-hash of the (unchanged) worktree file would NOT match it.
13202        let index_path = repository_index_path(&git_dir);
13203        let mut index = read_index(&git_dir);
13204        let bogus = ObjectId::from_hex(ObjectFormat::Sha1, &"0".repeat(40))
13205            .expect("test operation should succeed");
13206        let real_oid = index_entry_for(&index, b"f.txt").oid;
13207        assert_ne!(
13208            real_oid, bogus,
13209            "fixture oid should differ from the bogus oid"
13210        );
13211        index
13212            .entries
13213            .iter_mut()
13214            .find(|entry| entry.path == b"f.txt")
13215            .expect("test operation should succeed")
13216            .oid = bogus.clone();
13217        fs::write(
13218            &index_path,
13219            index
13220                .write(ObjectFormat::Sha1)
13221                .expect("test operation should succeed"),
13222        )
13223        .expect("test operation should succeed");
13224
13225        // Make the index file STRICTLY newer than the entry mtime (non-racy) by
13226        // waiting past one-second filesystem granularity and rewriting it, so the
13227        // racy-clean guard does not force a re-hash.
13228        std::thread::sleep(std::time::Duration::from_millis(1100));
13229        fs::write(
13230            &index_path,
13231            fs::read(&index_path).expect("test operation should succeed"),
13232        )
13233        .expect("test operation should succeed");
13234
13235        // The file is unchanged on disk, so a trusted stat reuses the bogus index
13236        // oid for the worktree entry: worktree-oid == index-oid == bogus, so the
13237        // WORKTREE column is clean. Had status re-hashed the file, the real oid
13238        // would differ from the bogus index oid and the worktree column would be
13239        // 'M'. (The index-vs-HEAD column is 'M' because we corrupted the index
13240        // oid away from HEAD; that is expected and not what this test asserts.)
13241        let status = short_status(&root, &git_dir, ObjectFormat::Sha1)
13242            .expect("test operation should succeed");
13243        let entry = status
13244            .iter()
13245            .find(|entry| entry.path == b"f.txt")
13246            .expect("f.txt should appear (its index oid now differs from HEAD)");
13247        assert_eq!(
13248            entry.worktree, b' ',
13249            "non-racy stat match must trust the cached oid (no re-hash); worktree column was {}",
13250            entry.worktree as char
13251        );
13252        assert_eq!(
13253            entry.index_oid.as_ref(),
13254            Some(&bogus),
13255            "the worktree entry must have reused the planted bogus index oid, not the real hash"
13256        );
13257
13258        fs::remove_dir_all(root).expect("test operation should succeed");
13259    }
13260
13261    #[test]
13262    fn worktree_entry_state_detects_same_size_content_change() {
13263        let root = temp_root();
13264        let git_dir = root.join(".git");
13265        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
13266        fs::write(root.join("f.txt"), b"aaaa\n").expect("test operation should succeed");
13267        build_commit(&root, &git_dir, &["f.txt"]);
13268        let index = read_index(&git_dir);
13269        let entry = index_entry_for(&index, b"f.txt").clone();
13270        let probe = IndexStatProbe::from_index_entry_and_index_path(
13271            entry.clone(),
13272            repository_index_path(&git_dir),
13273        );
13274
13275        fs::write(root.join("f.txt"), b"bbbb\n").expect("test operation should succeed");
13276        let state = worktree_entry_state(
13277            &root,
13278            &git_dir,
13279            ObjectFormat::Sha1,
13280            Path::new("f.txt"),
13281            &entry.oid,
13282            entry.mode,
13283            Some(&probe),
13284        )
13285        .expect("test operation should succeed");
13286        assert_eq!(state, WorktreeEntryState::Modified);
13287
13288        fs::remove_dir_all(root).expect("test operation should succeed");
13289    }
13290
13291    #[test]
13292    fn worktree_entry_state_reports_deleted_for_missing_and_parent_not_directory() {
13293        let root = temp_root();
13294        let git_dir = root.join(".git");
13295        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
13296        fs::create_dir_all(root.join("dir")).expect("test operation should succeed");
13297        fs::write(root.join("dir").join("f.txt"), b"hello\n")
13298            .expect("test operation should succeed");
13299        build_commit(&root, &git_dir, &["dir/f.txt"]);
13300        let index = read_index(&git_dir);
13301        let entry = index_entry_for(&index, b"dir/f.txt").clone();
13302
13303        fs::remove_file(root.join("dir").join("f.txt")).expect("test operation should succeed");
13304        let missing = worktree_entry_state_by_git_path(
13305            &root,
13306            &git_dir,
13307            ObjectFormat::Sha1,
13308            b"dir/f.txt",
13309            &entry.oid,
13310            entry.mode,
13311            None,
13312        )
13313        .expect("test operation should succeed");
13314        assert_eq!(missing, WorktreeEntryState::Deleted);
13315
13316        fs::remove_dir(root.join("dir")).expect("test operation should succeed");
13317        fs::write(root.join("dir"), b"not a directory").expect("test operation should succeed");
13318        let parent_not_directory = worktree_entry_state_by_git_path(
13319            &root,
13320            &git_dir,
13321            ObjectFormat::Sha1,
13322            b"dir/f.txt",
13323            &entry.oid,
13324            entry.mode,
13325            None,
13326        )
13327        .expect("test operation should succeed");
13328        assert_eq!(parent_not_directory, WorktreeEntryState::Deleted);
13329
13330        fs::remove_dir_all(root).expect("test operation should succeed");
13331    }
13332
13333    #[test]
13334    fn worktree_entry_state_trusts_clean_non_racy_probe() {
13335        let root = temp_root();
13336        let git_dir = root.join(".git");
13337        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
13338        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
13339        build_commit(&root, &git_dir, &["f.txt"]);
13340        let index_path = repository_index_path(&git_dir);
13341        let mut index = read_index(&git_dir);
13342        let bogus = ObjectId::from_hex(ObjectFormat::Sha1, &"1".repeat(40))
13343            .expect("test operation should succeed");
13344        index
13345            .entries
13346            .iter_mut()
13347            .find(|entry| entry.path == b"f.txt")
13348            .expect("test operation should succeed")
13349            .oid = bogus;
13350        fs::write(
13351            &index_path,
13352            index
13353                .write(ObjectFormat::Sha1)
13354                .expect("test operation should succeed"),
13355        )
13356        .expect("test operation should succeed");
13357        std::thread::sleep(std::time::Duration::from_millis(1100));
13358        fs::write(
13359            &index_path,
13360            fs::read(&index_path).expect("test operation should succeed"),
13361        )
13362        .expect("test operation should succeed");
13363        let index = read_index(&git_dir);
13364        let entry = index_entry_for(&index, b"f.txt").clone();
13365        let probe = IndexStatProbe::from_index_entry_and_index_path(
13366            entry.clone(),
13367            repository_index_path(&git_dir),
13368        );
13369
13370        let state = worktree_entry_state(
13371            &root,
13372            &git_dir,
13373            ObjectFormat::Sha1,
13374            Path::new("f.txt"),
13375            &entry.oid,
13376            entry.mode,
13377            Some(&probe),
13378        )
13379        .expect("test operation should succeed");
13380        assert_eq!(
13381            state,
13382            WorktreeEntryState::Clean,
13383            "a non-racy stat match must be enough to prove this path clean"
13384        );
13385
13386        fs::remove_dir_all(root).expect("test operation should succeed");
13387    }
13388
13389    #[test]
13390    fn worktree_entry_state_rehashes_racy_probe() {
13391        let root = temp_root();
13392        let git_dir = root.join(".git");
13393        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
13394        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
13395        build_commit(&root, &git_dir, &["f.txt"]);
13396        let index = read_index(&git_dir);
13397        let mut entry = index_entry_for(&index, b"f.txt").clone();
13398        entry.oid = ObjectId::from_hex(ObjectFormat::Sha1, &"2".repeat(40))
13399            .expect("test operation should succeed");
13400        let probe = IndexStatProbe::from_index_entry(
13401            entry.clone(),
13402            Some((
13403                u64::from(entry.mtime_seconds),
13404                u64::from(entry.mtime_nanoseconds),
13405            )),
13406        );
13407
13408        let state = worktree_entry_state(
13409            &root,
13410            &git_dir,
13411            ObjectFormat::Sha1,
13412            Path::new("f.txt"),
13413            &entry.oid,
13414            entry.mode,
13415            Some(&probe),
13416        )
13417        .expect("test operation should succeed");
13418        assert_eq!(
13419            state,
13420            WorktreeEntryState::Modified,
13421            "a racily-clean stat match must fall through to hashing"
13422        );
13423
13424        fs::remove_dir_all(root).expect("test operation should succeed");
13425    }
13426
13427    #[cfg(unix)]
13428    #[test]
13429    fn worktree_entry_state_detects_chmod_only_change() {
13430        use std::os::unix::fs::PermissionsExt;
13431
13432        let root = temp_root();
13433        let git_dir = root.join(".git");
13434        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
13435        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
13436        build_commit(&root, &git_dir, &["f.txt"]);
13437        let index = read_index(&git_dir);
13438        let entry = index_entry_for(&index, b"f.txt").clone();
13439
13440        let file = root.join("f.txt");
13441        let mut permissions = fs::metadata(&file)
13442            .expect("test operation should succeed")
13443            .permissions();
13444        permissions.set_mode(permissions.mode() | 0o111);
13445        fs::set_permissions(&file, permissions).expect("test operation should succeed");
13446        let state = worktree_entry_state(
13447            &root,
13448            &git_dir,
13449            ObjectFormat::Sha1,
13450            Path::new("f.txt"),
13451            &entry.oid,
13452            entry.mode,
13453            None,
13454        )
13455        .expect("test operation should succeed");
13456        assert_eq!(state, WorktreeEntryState::Modified);
13457
13458        fs::remove_dir_all(root).expect("test operation should succeed");
13459    }
13460
13461    #[cfg(unix)]
13462    #[test]
13463    fn worktree_entry_state_detects_symlink_target_change() {
13464        use std::os::unix::fs::symlink;
13465
13466        let root = temp_root();
13467        let git_dir = root.join(".git");
13468        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
13469        symlink("one", root.join("link")).expect("test operation should succeed");
13470        build_commit(&root, &git_dir, &["link"]);
13471        let index = read_index(&git_dir);
13472        let entry = index_entry_for(&index, b"link").clone();
13473
13474        fs::remove_file(root.join("link")).expect("test operation should succeed");
13475        symlink("two", root.join("link")).expect("test operation should succeed");
13476        let state = worktree_entry_state(
13477            &root,
13478            &git_dir,
13479            ObjectFormat::Sha1,
13480            Path::new("link"),
13481            &entry.oid,
13482            entry.mode,
13483            None,
13484        )
13485        .expect("test operation should succeed");
13486        assert_eq!(state, WorktreeEntryState::Modified);
13487
13488        fs::remove_dir_all(root).expect("test operation should succeed");
13489    }
13490
13491    #[test]
13492    fn worktree_entry_state_treats_present_unpopulated_gitlink_directory_as_clean() {
13493        let root = temp_root();
13494        let git_dir = root.join(".git");
13495        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
13496        fs::create_dir_all(root.join("submodule")).expect("test operation should succeed");
13497        let oid = ObjectId::from_hex(ObjectFormat::Sha1, &"3".repeat(40))
13498            .expect("test operation should succeed");
13499
13500        let state = worktree_entry_state(
13501            &root,
13502            &git_dir,
13503            ObjectFormat::Sha1,
13504            Path::new("submodule"),
13505            &oid,
13506            0o160000,
13507            None,
13508        )
13509        .expect("test operation should succeed");
13510        assert_eq!(state, WorktreeEntryState::Clean);
13511
13512        fs::remove_dir_all(root).expect("test operation should succeed");
13513    }
13514
13515    #[test]
13516    fn short_status_empty_on_unborn_repository() {
13517        let root = temp_root();
13518        let git_dir = root.join(".git");
13519        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
13520        fs::write(git_dir.join("HEAD"), "ref: refs/heads/main\n")
13521            .expect("test operation should succeed");
13522        let status = short_status(&root, &git_dir, ObjectFormat::Sha1)
13523            .expect("test operation should succeed");
13524        assert!(
13525            status.is_empty(),
13526            "an unborn repository with an empty worktree must be clean, got {status:?}"
13527        );
13528        fs::remove_dir_all(root).expect("test operation should succeed");
13529    }
13530
13531    #[test]
13532    fn untracked_paths_skips_embedded_git_internals() {
13533        let root = temp_root();
13534        let git_dir = root.join(".git");
13535        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
13536        fs::write(git_dir.join("HEAD"), "ref: refs/heads/main\n")
13537            .expect("test operation should succeed");
13538        let nested = root.join("not-a-submodule");
13539        fs::create_dir_all(nested.join(".git")).expect("test operation should succeed");
13540        fs::write(nested.join(".git/HEAD"), "ref: refs/heads/main\n")
13541            .expect("test operation should succeed");
13542        fs::write(nested.join("file.txt"), b"inside\n").expect("test operation should succeed");
13543        let paths = untracked_paths(&root, &git_dir, ObjectFormat::Sha1)
13544            .expect("test operation should succeed");
13545        assert!(
13546            paths.iter().any(|path| path == b"not-a-submodule/"),
13547            "embedded repository directory should be listed, got {paths:?}"
13548        );
13549        assert!(
13550            !paths
13551                .iter()
13552                .any(|path| path.starts_with(b"not-a-submodule/.git")),
13553            "embedded .git internals must not be listed, got {paths:?}"
13554        );
13555        fs::remove_dir_all(root).expect("test operation should succeed");
13556    }
13557
13558    #[cfg(unix)]
13559    #[test]
13560    fn untracked_paths_lists_symlink() {
13561        use std::os::unix::fs::symlink;
13562
13563        let root = temp_root();
13564        let git_dir = root.join(".git");
13565        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
13566        fs::write(git_dir.join("HEAD"), "ref: refs/heads/main\n")
13567            .expect("test operation should succeed");
13568        fs::write(root.join("target.txt"), b"target\n").expect("test operation should succeed");
13569        symlink(root.join("target.txt"), root.join("path1")).expect("create symlink");
13570        let paths = untracked_paths(&root, &git_dir, ObjectFormat::Sha1)
13571            .expect("test operation should succeed");
13572        assert!(
13573            paths.contains(&b"path1".to_vec()),
13574            "untracked symlink must be listed, got {paths:?}"
13575        );
13576        fs::remove_dir_all(root).expect("test operation should succeed");
13577    }
13578}