Skip to main content

sley_worktree/
lib.rs

1use sley_config::GitConfig;
2use sley_core::{
3    BString, GitError, MissingObjectContext, MissingObjectKind, ObjectFormat, ObjectId, RepoPath,
4    Result,
5};
6use sley_index::{
7    BorrowedIndex, CacheTree, Index, IndexEntry, IndexEntryRef, SPARSE_DIR_MODE, Stage,
8};
9use sley_object::{Commit, EncodedObject, ObjectType, Tree, TreeEntry, tree_entry_object_type};
10use sley_odb::{FileObjectDatabase, ObjectPresenceChecker, ObjectReader, ObjectWriter};
11use sley_refs::{FileRefStore, RefTarget, RefUpdate, ReflogEntry, branch_ref_name};
12use std::borrow::Cow;
13use std::cell::RefCell;
14use std::cmp::Ordering;
15use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
16use std::io::Write;
17use std::path::{Path, PathBuf};
18use std::process::{Command, Stdio};
19use std::sync::{Mutex, OnceLock};
20use std::time::{Instant, UNIX_EPOCH};
21use std::{env, fs};
22
23#[derive(Debug, Clone, PartialEq, Eq)]
24pub enum WorktreeStatus {
25    Clean,
26    Modified(RepoPath),
27    Added(RepoPath),
28    Deleted(RepoPath),
29    Untracked(RepoPath),
30}
31
32pub trait WorktreeScanner {
33    fn status(&self) -> Result<Vec<WorktreeStatus>>;
34}
35
36#[derive(Debug, Clone, PartialEq, Eq)]
37pub struct SparseCheckout {
38    pub patterns: Vec<Vec<u8>>,
39    pub sparse_index: bool,
40}
41
42/// Selects how the patterns in a [`SparseCheckout`] are interpreted when
43/// deciding which index paths are "in cone" (kept in the worktree).
44///
45/// * [`SparseCheckoutMode::Full`] interprets the patterns exactly like
46///   `.gitignore` lines (full pattern matching, including `*`, `?`, `**`,
47///   character classes, anchoring with a leading `/`, directory-only `/`
48///   suffixes, and `!` negation). A path is *included* when the last pattern
49///   that matches it is not negated. This mirrors upstream Git's non-cone
50///   `core.sparseCheckout` behaviour.
51/// * [`SparseCheckoutMode::Cone`] interprets the patterns as the restricted
52///   directory-prefix form Git emits for `core.sparseCheckoutCone`: a literal
53///   `/*` (top-level files), the recursive-parent guard `!/*/`, and anchored
54///   directory patterns such as `/dir/` (everything under `dir/`) plus the
55///   parent guards `/dir/*` and `!/dir/*/`. Matching is purely prefix based,
56///   so glob metacharacters are treated literally.
57/// * [`SparseCheckoutMode::Auto`] inspects the patterns and uses cone matching
58///   when every pattern fits the cone grammar above, otherwise full matching.
59#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
60pub enum SparseCheckoutMode {
61    #[default]
62    Auto,
63    Full,
64    Cone,
65}
66
67#[derive(Debug, Clone, PartialEq, Eq)]
68pub struct ApplySparseResult {
69    /// Paths whose worktree file was (re)materialized because they are in cone.
70    pub materialized: Vec<Vec<u8>>,
71    /// Paths that were taken out of the worktree because they are out of cone;
72    /// their index entry now has the skip-worktree bit set.
73    pub skipped: Vec<Vec<u8>>,
74    /// Out-of-cone paths whose worktree file was *not* up to date with the index
75    /// and was therefore left in place (and its skip-worktree bit left clear),
76    /// matching git's data-loss-avoiding behavior. The caller surfaces these as
77    /// git's "The following paths are not up to date …" warning. Sorted by path.
78    pub not_up_to_date: Vec<Vec<u8>>,
79}
80
81#[derive(Debug, Clone, PartialEq, Eq)]
82pub struct UpdateIndexResult {
83    pub entries: usize,
84    pub updated: Vec<ObjectId>,
85}
86
87#[derive(Debug, Clone, PartialEq, Eq)]
88pub enum AddUpdateTrackedAction {
89    Add(Vec<u8>),
90    Remove(Vec<u8>),
91}
92
93#[derive(Debug, Clone, PartialEq, Eq)]
94pub enum AddExactTrackedPathResult {
95    Handled(Option<AddUpdateTrackedAction>),
96    Unsupported,
97}
98
99#[derive(Debug, Clone, PartialEq, Eq)]
100pub struct CacheInfoEntry {
101    pub mode: u32,
102    pub oid: ObjectId,
103    pub path: Vec<u8>,
104    pub stage: u16,
105}
106
107#[derive(Debug, Clone, PartialEq, Eq)]
108pub enum IndexInfoRecord {
109    Add(CacheInfoEntry),
110    Remove { path: Vec<u8> },
111}
112
113/// Batch-wide options for the `git add`-style callers that apply one uniform
114/// mode to every path. The positional `add`/`remove`/`force_remove`/`info_only`/
115/// `chmod` fields describe that uniform mode; `ignore_skip_worktree_entries` is
116/// a genuine whole-invocation toggle (it is not positional in git either).
117///
118/// `git update-index <flag> <path>...` does NOT use this for its per-path mode —
119/// it builds [`UpdateIndexPath`] values directly, each carrying the sticky mode
120/// in effect when that path was parsed. See [`UpdateIndexPath`].
121#[derive(Debug, Clone, Copy, PartialEq, Eq)]
122pub struct UpdateIndexOptions {
123    pub add: bool,
124    pub remove: bool,
125    pub force_remove: bool,
126    pub chmod: Option<bool>,
127    pub info_only: bool,
128    pub ignore_skip_worktree_entries: bool,
129}
130
131impl UpdateIndexOptions {
132    /// The uniform per-path mode this batch applies to every path.
133    fn path_mode(&self) -> UpdateIndexPathMode {
134        UpdateIndexPathMode {
135            add: self.add,
136            remove: self.remove,
137            force_remove: self.force_remove,
138            info_only: self.info_only,
139            chmod: self.chmod,
140        }
141    }
142}
143
144/// A single positional path passed to `update-index`, together with the
145/// *mode* that was active at the point the path was seen on the command line.
146///
147/// git's `update-index` processes argv left-to-right with `parse_options_step`
148/// (`PARSE_OPT_STOP_AT_NON_OPTION`): the mode flags `--add`/`--remove`/
149/// `--force-remove`/`--info-only`/`--chmod` set sticky global state, and each
150/// non-option path is handed to `update_one()` under whatever state is in
151/// effect *at that point*. So `--add foo --force-remove bar` ADDs `foo` and
152/// FORCE-REMOVEs `bar` — the flags are positional, not global. We mirror that
153/// by snapshotting the mode onto each path as it is parsed, rather than
154/// applying one batch-wide `UpdateIndexOptions` to every path.
155///
156/// `--chmod=(+|-)x` is likewise sticky (`--chmod=+x A --chmod=-x B` flips A
157/// executable and B non-executable). Each path reports its action
158/// (`add '<p>'`, `remove '<p>'`, `chmod (+|-)x '<p>'`) inline under `--verbose`,
159/// interleaved in command-line order — which is why the mode must travel with
160/// the path.
161#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
162pub struct UpdateIndexPathMode {
163    pub add: bool,
164    pub remove: bool,
165    pub force_remove: bool,
166    pub info_only: bool,
167    /// `--chmod=+x` → `Some(true)`, `--chmod=-x` → `Some(false)`, else `None`.
168    pub chmod: Option<bool>,
169}
170
171#[derive(Debug, Clone)]
172pub struct UpdateIndexPath {
173    pub path: PathBuf,
174    pub mode: UpdateIndexPathMode,
175}
176
177#[derive(Debug, Clone, PartialEq, Eq, Default)]
178pub struct WriteTreeOptions {
179    pub missing_ok: bool,
180    pub prefix: Option<Vec<u8>>,
181}
182
183#[derive(Debug, Clone, PartialEq, Eq)]
184pub struct ShortStatusEntry {
185    pub index: u8,
186    pub worktree: u8,
187    pub path: Vec<u8>,
188    pub head_mode: Option<u32>,
189    pub index_mode: Option<u32>,
190    pub worktree_mode: Option<u32>,
191    pub head_oid: Option<ObjectId>,
192    pub index_oid: Option<ObjectId>,
193    /// For a tracked gitlink (submodule) path: how the submodule's working
194    /// state differs from the staged gitlink. `None` for ordinary paths.
195    pub submodule: Option<SubmoduleStatus>,
196}
197
198#[derive(Debug, Clone, Copy, PartialEq, Eq)]
199pub struct ShortStatusRow<'a> {
200    pub index: u8,
201    pub worktree: u8,
202    pub path: &'a [u8],
203    pub head_mode: Option<u32>,
204    pub index_mode: Option<u32>,
205    pub worktree_mode: Option<u32>,
206    pub head_oid: Option<ObjectId>,
207    pub index_oid: Option<ObjectId>,
208    /// For a tracked gitlink (submodule) path: how the submodule's working
209    /// state differs from the staged gitlink. `None` for ordinary paths.
210    pub submodule: Option<SubmoduleStatus>,
211}
212
213#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
214pub enum StreamControl {
215    #[default]
216    Continue,
217    Stop,
218}
219
220impl StreamControl {
221    fn is_stop(self) -> bool {
222        matches!(self, Self::Stop)
223    }
224}
225
226/// Submodule-specific change detail for a status entry, mirroring upstream's
227/// `wt_status_change_data` trio: `new_submodule_commits` plus the
228/// `DIRTY_SUBMODULE_MODIFIED`/`DIRTY_SUBMODULE_UNTRACKED` dirty bits.
229#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
230pub struct SubmoduleStatus {
231    /// The submodule's checked-out HEAD differs from the staged gitlink oid.
232    pub new_commits: bool,
233    /// The submodule has staged or unstaged changes to tracked files.
234    pub modified_content: bool,
235    /// The submodule has untracked files.
236    pub untracked_content: bool,
237}
238
239impl SubmoduleStatus {
240    pub fn any(&self) -> bool {
241        self.new_commits || self.modified_content || self.untracked_content
242    }
243}
244
245/// Bit set in a submodule dirt mask when the submodule has staged or unstaged
246/// changes to tracked files (upstream `DIRTY_SUBMODULE_MODIFIED`).
247pub const DIRTY_SUBMODULE_MODIFIED: u8 = 1;
248/// Bit set in a submodule dirt mask when the submodule has untracked files
249/// (upstream `DIRTY_SUBMODULE_UNTRACKED`).
250pub const DIRTY_SUBMODULE_UNTRACKED: u8 = 2;
251
252/// Inspect the working state of the submodule whose worktree is at `sub_root`
253/// and report its dirt mask: [`DIRTY_SUBMODULE_MODIFIED`] for staged/unstaged
254/// changes to tracked files, [`DIRTY_SUBMODULE_UNTRACKED`] for untracked
255/// files. Returns 0 for a clean submodule — and for a directory that is not a
256/// populated repository at all (upstream treats an unpopulated gitlink as
257/// always unchanged). The native equivalent of upstream's
258/// `is_submodule_modified()` (which runs `git status --porcelain=2` inside the
259/// submodule and classifies `?` lines as untracked, everything else as
260/// modified).
261pub fn submodule_dirt(sub_root: &Path) -> u8 {
262    let Some(git_dir) = sley_diff_merge::gitlink_git_dir(sub_root) else {
263        return 0;
264    };
265    let Ok(config) = sley_config::read_repo_config(&git_dir, None) else {
266        return 0;
267    };
268    let Ok(format) = config.repository_object_format() else {
269        return 0;
270    };
271    let mut dirt = 0;
272    let status_result = stream_short_status_with_options(
273        sub_root,
274        &git_dir,
275        format,
276        ShortStatusOptions {
277            include_ignored: false,
278            ignored_mode: StatusIgnoredMode::Traditional,
279            untracked_mode: StatusUntrackedMode::Normal,
280        },
281        |entry| {
282            if entry.index == b'?' && entry.worktree == b'?' {
283                dirt |= DIRTY_SUBMODULE_UNTRACKED;
284            } else {
285                dirt |= DIRTY_SUBMODULE_MODIFIED;
286            }
287            let complete = DIRTY_SUBMODULE_MODIFIED | DIRTY_SUBMODULE_UNTRACKED;
288            Ok(if dirt == complete {
289                StreamControl::Stop
290            } else {
291                StreamControl::Continue
292            })
293        },
294    );
295    if status_result.is_err() {
296        return 0;
297    }
298    dirt
299}
300
301#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
302pub enum StatusUntrackedMode {
303    #[default]
304    All,
305    Normal,
306    None,
307}
308
309#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
310pub enum StatusIgnoredMode {
311    #[default]
312    Traditional,
313    Matching,
314}
315
316#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
317pub struct ShortStatusOptions {
318    pub include_ignored: bool,
319    pub ignored_mode: StatusIgnoredMode,
320    pub untracked_mode: StatusUntrackedMode,
321}
322
323/// The worktree state of one tracked path relative to an expected index/tree
324/// entry.
325#[derive(Debug, Clone, Copy, PartialEq, Eq)]
326pub enum WorktreeEntryState {
327    /// The path exists in the worktree and matches the expected mode/object id.
328    Clean,
329    /// The path exists, but its type, mode, filtered content, symlink target, or
330    /// gitlink HEAD differs from the expected entry.
331    Modified,
332    /// The path, or one of its parents, is missing from the worktree.
333    Deleted,
334}
335
336#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
337pub struct AtomicMetadataWriteOptions {
338    pub fsync_file: bool,
339    pub fsync_dir: bool,
340}
341
342#[derive(Debug, Clone, PartialEq, Eq)]
343pub struct AtomicMetadataWriteResult {
344    pub path: PathBuf,
345    pub len: u64,
346    pub mtime: Option<(u64, u64)>,
347}
348
349/// Stage-0 index stat data that can prove a worktree path clean without
350/// re-reading and re-hashing it.
351///
352/// This is the public carrier for sley's racy-git shortcut. Callers that already
353/// parsed `.git/index` can build a probe from the matching [`IndexEntry`] and
354/// the index file's mtime, then pass it to [`worktree_entry_state`] or
355/// [`worktree_entry_state_by_git_path`]. The probe is trusted only when its path,
356/// mode, and object id match the expected entry and the cached stat is not
357/// racily clean; otherwise the helper falls back to the same content hashing
358/// path used by [`stream_short_status_with_options`].
359#[derive(Debug, Clone, PartialEq, Eq)]
360pub struct IndexStatProbe {
361    entry: IndexEntry,
362    index_mtime: Option<(u64, u64)>,
363}
364
365/// Reusable stage-0 index stat probes for many worktree paths.
366///
367/// Prefer this over repeated [`IndexStatProbe::from_repository_index`] calls
368/// when an embedder needs to verify many paths. It parses `.git/index` once,
369/// records the index file mtime used for racy-git checks, and serves cheap
370/// per-path probes from memory.
371#[derive(Debug, Clone, PartialEq, Eq, Default)]
372pub struct IndexStatProbeCache {
373    entries: HashMap<Vec<u8>, IndexEntry>,
374    index_mtime: Option<(u64, u64)>,
375}
376
377impl IndexStatProbe {
378    /// Build a probe from a parsed stage-0 index entry and the index file's mtime
379    /// split as `(seconds, nanoseconds)`.
380    pub fn from_index_entry(entry: IndexEntry, index_mtime: Option<(u64, u64)>) -> Self {
381        Self { entry, index_mtime }
382    }
383
384    /// Build a probe from a parsed index entry and the path of the index file on
385    /// disk, using that file's mtime as the racy-clean reference timestamp.
386    pub fn from_index_entry_and_index_path(
387        entry: IndexEntry,
388        index_path: impl AsRef<Path>,
389    ) -> Self {
390        let index_mtime = fs::metadata(index_path.as_ref())
391            .ok()
392            .and_then(|metadata| file_mtime_parts(&metadata));
393        Self { entry, index_mtime }
394    }
395
396    /// Read this repository's index and return a probe for `git_path` when a
397    /// stage-0 entry exists.
398    ///
399    /// For repeated lookups prefer [`IndexStatProbeCache::from_repository_index`]
400    /// and [`IndexStatProbeCache::probe_for_git_path`]. This one-shot helper
401    /// keeps a small process-local cache for back-to-back calls against an
402    /// unchanged index, but the explicit cache makes ownership and invalidation
403    /// clearer for high-volume embedders.
404    pub fn from_repository_index(
405        git_dir: impl AsRef<Path>,
406        format: ObjectFormat,
407        git_path: &[u8],
408    ) -> Result<Option<Self>> {
409        let index_path = repository_index_path(git_dir);
410        cached_repository_index_stat_probe(&index_path, format, git_path)
411    }
412
413    /// The parsed index entry this probe was built from.
414    pub fn entry(&self) -> &IndexEntry {
415        &self.entry
416    }
417
418    /// The index file mtime used as the racy-clean reference timestamp.
419    pub fn index_mtime(&self) -> Option<(u64, u64)> {
420        self.index_mtime
421    }
422
423    fn stat_cache_for(
424        &self,
425        git_path: &[u8],
426        expected_oid: &ObjectId,
427        expected_mode: u32,
428    ) -> Option<IndexStatCache> {
429        if index_entry_stage(&self.entry) != 0
430            || self.entry.path.as_bytes() != git_path
431            || self.entry.oid != *expected_oid
432            || self.entry.mode != expected_mode
433        {
434            return None;
435        }
436        let mut entries = HashMap::new();
437        entries.insert(git_path.to_vec(), self.entry.clone());
438        Some(IndexStatCache {
439            entries,
440            index_mtime: self.index_mtime,
441        })
442    }
443}
444
445impl IndexStatProbeCache {
446    /// Build a reusable probe cache from an already parsed index and index-file
447    /// mtime.
448    pub fn from_index(index: &Index, index_mtime: Option<(u64, u64)>) -> Self {
449        Self {
450            entries: stage0_index_entries(index),
451            index_mtime,
452        }
453    }
454
455    /// Read this repository's index once and build reusable stat probes.
456    ///
457    /// A missing index returns an empty cache, matching the one-shot helper's
458    /// `Ok(None)` result for every path.
459    pub fn from_repository_index(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Result<Self> {
460        let index_path = repository_index_path(git_dir);
461        read_index_stat_probe_cache(&index_path, format)
462    }
463
464    /// Return a per-path probe for a stage-0 entry, if present.
465    pub fn probe_for_git_path(&self, git_path: &[u8]) -> Option<IndexStatProbe> {
466        self.entries
467            .get(git_path)
468            .cloned()
469            .map(|entry| IndexStatProbe {
470                entry,
471                index_mtime: self.index_mtime,
472            })
473    }
474
475    /// Whether this cache has a stage-0 entry for `git_path`.
476    pub fn contains_git_path(&self, git_path: &[u8]) -> bool {
477        self.entries.contains_key(git_path)
478    }
479
480    /// Number of stage-0 entries in the cache.
481    pub fn len(&self) -> usize {
482        self.entries.len()
483    }
484
485    /// Whether the cache has no stage-0 entries.
486    pub fn is_empty(&self) -> bool {
487        self.entries.is_empty()
488    }
489
490    /// The index file mtime used as the racy-clean reference timestamp.
491    pub fn index_mtime(&self) -> Option<(u64, u64)> {
492        self.index_mtime
493    }
494}
495
496#[derive(Clone)]
497struct CachedRepositoryIndexStatProbes {
498    index_path: PathBuf,
499    format: ObjectFormat,
500    len: u64,
501    mtime: Option<(u64, u64)>,
502    probes: IndexStatProbeCache,
503}
504
505static REPOSITORY_INDEX_STAT_PROBES: OnceLock<Mutex<Option<CachedRepositoryIndexStatProbes>>> =
506    OnceLock::new();
507
508fn cached_repository_index_stat_probe(
509    index_path: &Path,
510    format: ObjectFormat,
511    git_path: &[u8],
512) -> Result<Option<IndexStatProbe>> {
513    let metadata = match fs::metadata(index_path) {
514        Ok(metadata) => metadata,
515        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
516            if let Some(cache) = REPOSITORY_INDEX_STAT_PROBES.get()
517                && let Ok(mut guard) = cache.lock()
518            {
519                *guard = None;
520            }
521            return Ok(None);
522        }
523        Err(err) => return Err(err.into()),
524    };
525    let len = metadata.len();
526    let mtime = file_mtime_parts(&metadata);
527    let cache = REPOSITORY_INDEX_STAT_PROBES.get_or_init(|| Mutex::new(None));
528    if let Ok(guard) = cache.lock()
529        && let Some(cached) = guard.as_ref()
530        && cached.index_path == index_path
531        && cached.format == format
532        && cached.len == len
533        && cached.mtime == mtime
534    {
535        return Ok(cached.probes.probe_for_git_path(git_path));
536    }
537
538    let probes = read_index_stat_probe_cache_with_metadata(index_path, format, mtime)?;
539    let probe = probes.probe_for_git_path(git_path);
540    if let Ok(mut guard) = cache.lock() {
541        *guard = Some(CachedRepositoryIndexStatProbes {
542            index_path: index_path.to_path_buf(),
543            format,
544            len,
545            mtime,
546            probes: probes.clone(),
547        });
548    }
549    Ok(probe)
550}
551
552fn read_index_stat_probe_cache(
553    index_path: &Path,
554    format: ObjectFormat,
555) -> Result<IndexStatProbeCache> {
556    let metadata = match fs::metadata(index_path) {
557        Ok(metadata) => metadata,
558        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
559            return Ok(IndexStatProbeCache::default());
560        }
561        Err(err) => return Err(err.into()),
562    };
563    read_index_stat_probe_cache_with_metadata(index_path, format, file_mtime_parts(&metadata))
564}
565
566fn read_index_stat_probe_cache_with_metadata(
567    index_path: &Path,
568    format: ObjectFormat,
569    index_mtime: Option<(u64, u64)>,
570) -> Result<IndexStatProbeCache> {
571    let bytes = fs::read(index_path)?;
572    let index = Index::parse(&bytes, format)?;
573    Ok(IndexStatProbeCache::from_index(&index, index_mtime))
574}
575
576fn stage0_index_entries(index: &Index) -> HashMap<Vec<u8>, IndexEntry> {
577    let mut entries = HashMap::new();
578    for entry in &index.entries {
579        if index_entry_stage(entry) == 0 {
580            entries.insert(entry.path.as_bytes().to_vec(), entry.clone());
581        }
582    }
583    entries
584}
585
586#[derive(Debug, Clone, PartialEq, Eq)]
587pub struct CheckoutResult {
588    pub branch: String,
589    pub oid: ObjectId,
590    pub files: usize,
591}
592
593#[derive(Debug, Clone, PartialEq, Eq)]
594pub struct RestoreResult {
595    pub restored: usize,
596}
597
598#[derive(Debug, Clone, PartialEq, Eq)]
599pub struct RemoveResult {
600    pub removed: Vec<Vec<u8>>,
601}
602
603#[derive(Debug, Clone, PartialEq, Eq)]
604pub struct MoveResult {
605    pub source: Vec<u8>,
606    pub destination: Vec<u8>,
607    pub skipped: bool,
608    pub fatal: Option<String>,
609    pub details: Vec<MoveDetail>,
610}
611
612#[derive(Debug, Clone, PartialEq, Eq)]
613pub struct MoveDetail {
614    pub source: Vec<u8>,
615    pub destination: Vec<u8>,
616    pub skipped: bool,
617}
618
619pub fn repository_index_path(git_dir: impl AsRef<Path>) -> PathBuf {
620    env::var_os("GIT_INDEX_FILE")
621        .map(PathBuf::from)
622        .unwrap_or_else(|| git_dir.as_ref().join("index"))
623}
624
625pub fn read_repository_index(
626    git_dir: impl AsRef<Path>,
627    format: ObjectFormat,
628) -> Result<Option<Index>> {
629    let index_path = repository_index_path(git_dir);
630    if !index_path.exists() {
631        return Ok(None);
632    }
633    Ok(Some(Index::parse(&fs::read(index_path)?, format)?))
634}
635
636fn empty_index() -> Index {
637    Index {
638        version: 2,
639        entries: Vec::new(),
640        extensions: Vec::new(),
641        checksum: None,
642    }
643}
644
645/// Resolve the working-tree root for a repository identified by its git
646/// directory, returning `Ok(None)` for a bare repository.
647///
648/// This is the repository-intrinsic worktree resolution (it does *not* consult
649/// `GIT_WORK_TREE`/`GIT_DIR` or CLI overrides — those are the caller's job):
650///
651/// 0. if `core.bare` is true the repository is bare and `Ok(None)` is returned
652///    immediately — `core.bare` takes precedence, so a bare repo ignores
653///    `core.worktree` and the `.git`-parent fallback;
654/// 1. otherwise, a `core.worktree` setting in `<git_dir>/config` (absolute, or
655///    relative to the git directory), canonicalised;
656/// 2. otherwise, for a linked worktree (a git directory that has both a
657///    `commondir` and a `gitdir` administrative file), the directory containing
658///    the worktree's `.git` link, canonicalised;
659/// 3. otherwise, when the git directory is a `.git` directory, its parent (the
660///    ordinary non-bare layout) — returned verbatim, not canonicalised;
661/// 4. otherwise the repository is bare and `Ok(None)` is returned.
662///
663/// `Ok(None)` means specifically "bare" (case 0 or case 4). A [`GitError::Io`] is
664/// returned if a path that should exist cannot be canonicalised, and a
665/// [`GitError::InvalidPath`] if a `.git` directory has no parent (a malformed
666/// layout).
667pub fn worktree_root_for_git_dir(git_dir: &Path) -> Result<Option<PathBuf>> {
668    if let Ok(config) = sley_config::read_repo_config(git_dir, None) {
669        // A bare repository has no working tree, and `core.bare` takes precedence:
670        // a bare repo ignores `core.worktree`. Check it before any worktree
671        // resolution so a bare `.git`-named directory does not fall through to the
672        // "parent of .git" case below.
673        if config.get_bool("core", None, "bare") == Some(true) {
674            return Ok(None);
675        }
676        if let Some(worktree) = config.get("core", None, "worktree") {
677            let worktree = PathBuf::from(worktree);
678            let worktree = if worktree.is_absolute() {
679                worktree
680            } else {
681                git_dir.join(worktree)
682            };
683            return fs::canonicalize(worktree)
684                .map(Some)
685                .map_err(|err| GitError::Io(err.to_string()));
686        }
687    }
688    if git_dir.join("commondir").is_file() {
689        let gitdir_file = git_dir.join("gitdir");
690        if gitdir_file.is_file() {
691            let value = fs::read_to_string(&gitdir_file)?;
692            let worktree_git_file = resolve_worktree_admin_path(git_dir, value.trim());
693            if let Some(worktree) = worktree_git_file.parent() {
694                return fs::canonicalize(worktree)
695                    .map(Some)
696                    .map_err(|err| GitError::Io(err.to_string()));
697            }
698        }
699    }
700    if git_dir.file_name().and_then(|name| name.to_str()) != Some(".git") {
701        return Ok(None);
702    }
703    git_dir
704        .parent()
705        .map(Path::to_path_buf)
706        .map(Some)
707        .ok_or_else(|| GitError::InvalidPath("git dir has no parent worktree".into()))
708}
709
710/// Resolve a path read from a git-directory administrative file (e.g. the
711/// `gitdir` link of a linked worktree): absolute paths are kept as-is, relative
712/// paths are joined onto the administrative directory.
713fn resolve_worktree_admin_path(admin_dir: &Path, value: &str) -> PathBuf {
714    let path = PathBuf::from(value);
715    if path.is_absolute() {
716        path
717    } else {
718        admin_dir.join(path)
719    }
720}
721
722/// Whether the repository at `git_dir` is shallow — i.e. it has a `shallow`
723/// file recording grafted commit boundaries (`git clone --depth`).
724pub fn is_shallow_repository(git_dir: &Path) -> bool {
725    git_dir.join("shallow").exists()
726}
727
728#[derive(Debug, Clone, Copy, PartialEq, Eq)]
729pub struct RemoveOptions {
730    pub recursive: bool,
731    pub cached: bool,
732    pub force: bool,
733    pub dry_run: bool,
734    pub ignore_unmatch: bool,
735}
736
737#[derive(Debug, Clone, Copy, PartialEq, Eq)]
738pub struct MoveOptions {
739    pub force: bool,
740    pub dry_run: bool,
741    pub skip_errors: bool,
742}
743
744impl ShortStatusEntry {
745    pub fn as_row(&self) -> ShortStatusRow<'_> {
746        ShortStatusRow {
747            index: self.index,
748            worktree: self.worktree,
749            path: &self.path,
750            head_mode: self.head_mode,
751            index_mode: self.index_mode,
752            worktree_mode: self.worktree_mode,
753            head_oid: self.head_oid,
754            index_oid: self.index_oid,
755            submodule: self.submodule,
756        }
757    }
758
759    pub fn line(&self) -> String {
760        format!(
761            "{}{} {}",
762            self.index as char,
763            self.worktree as char,
764            String::from_utf8_lossy(&self.path)
765        )
766    }
767}
768
769impl ShortStatusRow<'_> {
770    pub fn to_owned_entry(self) -> ShortStatusEntry {
771        ShortStatusEntry {
772            index: self.index,
773            worktree: self.worktree,
774            path: self.path.to_vec(),
775            head_mode: self.head_mode,
776            index_mode: self.index_mode,
777            worktree_mode: self.worktree_mode,
778            head_oid: self.head_oid,
779            index_oid: self.index_oid,
780            submodule: self.submodule,
781        }
782    }
783
784    pub fn line(&self) -> String {
785        format!(
786            "{}{} {}",
787            self.index as char,
788            self.worktree as char,
789            String::from_utf8_lossy(self.path)
790        )
791    }
792}
793
794pub fn add_paths_to_index(
795    worktree_root: impl AsRef<Path>,
796    git_dir: impl AsRef<Path>,
797    format: ObjectFormat,
798    paths: &[PathBuf],
799) -> Result<UpdateIndexResult> {
800    update_index_paths(
801        worktree_root,
802        git_dir,
803        format,
804        paths,
805        UpdateIndexOptions {
806            add: true,
807            remove: false,
808            force_remove: false,
809            chmod: None,
810            info_only: false,
811            ignore_skip_worktree_entries: false,
812        },
813    )
814}
815
816pub fn update_index_paths(
817    worktree_root: impl AsRef<Path>,
818    git_dir: impl AsRef<Path>,
819    format: ObjectFormat,
820    paths: &[PathBuf],
821    options: UpdateIndexOptions,
822) -> Result<UpdateIndexResult> {
823    let git_dir = git_dir.as_ref();
824    let index = read_repository_index(git_dir, format)?.unwrap_or_else(empty_index);
825    update_index_paths_with_index(worktree_root, git_dir, format, index, paths, options)
826}
827
828pub fn update_index_paths_with_index(
829    worktree_root: impl AsRef<Path>,
830    git_dir: impl AsRef<Path>,
831    format: ObjectFormat,
832    index: Index,
833    paths: &[PathBuf],
834    options: UpdateIndexOptions,
835) -> Result<UpdateIndexResult> {
836    let ordered = ordered_paths_from_plain(paths, options);
837    update_index_paths_impl(
838        worktree_root.as_ref(),
839        git_dir.as_ref(),
840        format,
841        index,
842        &ordered,
843        options,
844        None,
845        false,
846    )
847}
848
849/// Stamp a single uniform mode (from a batch-wide [`UpdateIndexOptions`]) onto
850/// every path. Used by the `git add`-style callers that genuinely apply one
851/// mode to all paths; the positional `git update-index <flag> <path>...` path
852/// instead snapshots a distinct mode per path in the CLI parse walk.
853fn ordered_paths_from_plain(
854    paths: &[PathBuf],
855    options: UpdateIndexOptions,
856) -> Vec<UpdateIndexPath> {
857    let mode = options.path_mode();
858    paths
859        .iter()
860        .map(|path| UpdateIndexPath {
861            path: path.clone(),
862            mode,
863        })
864        .collect()
865}
866
867/// Stage an ordered list of paths, each carrying its own `--chmod` state, and
868/// (under `verbose`) print the `add`/`remove`/`chmod` action lines inline in
869/// command-line order. This is the entry point `git update-index <path>...`
870/// uses so that `--chmod=+x A --chmod=-x B --verbose` produces the interleaved
871/// `add 'A'` / `chmod +x 'A'` / `add 'B'` / `chmod -x 'B'` output git emits.
872pub fn update_index_ordered_paths_filtered(
873    worktree_root: impl AsRef<Path>,
874    git_dir: impl AsRef<Path>,
875    format: ObjectFormat,
876    paths: &[UpdateIndexPath],
877    options: UpdateIndexOptions,
878    config: &GitConfig,
879    verbose: bool,
880) -> Result<UpdateIndexResult> {
881    let git_dir = git_dir.as_ref();
882    let index = read_repository_index(git_dir, format)?.unwrap_or_else(empty_index);
883    update_index_ordered_paths_filtered_with_index(
884        worktree_root,
885        git_dir,
886        format,
887        index,
888        paths,
889        options,
890        config,
891        verbose,
892    )
893}
894
895pub fn update_index_ordered_paths_filtered_with_index(
896    worktree_root: impl AsRef<Path>,
897    git_dir: impl AsRef<Path>,
898    format: ObjectFormat,
899    index: Index,
900    paths: &[UpdateIndexPath],
901    options: UpdateIndexOptions,
902    config: &GitConfig,
903    verbose: bool,
904) -> Result<UpdateIndexResult> {
905    update_index_paths_impl(
906        worktree_root.as_ref(),
907        git_dir.as_ref(),
908        format,
909        index,
910        paths,
911        options,
912        Some(config),
913        verbose,
914    )
915}
916
917/// Like [`add_paths_to_index`], but runs the configured content filters
918/// (`core.autocrlf`/`text`/`eol` EOL conversion and `filter.<name>.clean`
919/// drivers) on each file's contents before hashing it into the object store.
920///
921/// `config` is the repository config used to resolve the filters; pass the
922/// parsed `<git_dir>/config` (the orchestrator typically already has this).
923pub fn add_paths_to_index_filtered(
924    worktree_root: impl AsRef<Path>,
925    git_dir: impl AsRef<Path>,
926    format: ObjectFormat,
927    paths: &[PathBuf],
928    config: &GitConfig,
929) -> Result<UpdateIndexResult> {
930    update_index_paths_filtered(
931        worktree_root,
932        git_dir,
933        format,
934        paths,
935        UpdateIndexOptions {
936            add: true,
937            remove: false,
938            force_remove: false,
939            chmod: None,
940            info_only: false,
941            ignore_skip_worktree_entries: false,
942        },
943        config,
944    )
945}
946
947/// Like [`update_index_paths`], but applies the clean-side content filters (see
948/// [`apply_clean_filter`]) to file contents before they are hashed/written.
949pub fn update_index_paths_filtered(
950    worktree_root: impl AsRef<Path>,
951    git_dir: impl AsRef<Path>,
952    format: ObjectFormat,
953    paths: &[PathBuf],
954    options: UpdateIndexOptions,
955    config: &GitConfig,
956) -> Result<UpdateIndexResult> {
957    let git_dir = git_dir.as_ref();
958    let index = read_repository_index(git_dir, format)?.unwrap_or_else(empty_index);
959    update_index_paths_filtered_with_index(
960        worktree_root,
961        git_dir,
962        format,
963        index,
964        paths,
965        options,
966        config,
967    )
968}
969
970pub fn update_index_paths_filtered_with_index(
971    worktree_root: impl AsRef<Path>,
972    git_dir: impl AsRef<Path>,
973    format: ObjectFormat,
974    index: Index,
975    paths: &[PathBuf],
976    options: UpdateIndexOptions,
977    config: &GitConfig,
978) -> Result<UpdateIndexResult> {
979    let ordered = ordered_paths_from_plain(paths, options);
980    update_index_paths_impl(
981        worktree_root.as_ref(),
982        git_dir.as_ref(),
983        format,
984        index,
985        &ordered,
986        options,
987        Some(config),
988        false,
989    )
990}
991
992pub fn add_update_all_tracked_filtered(
993    worktree_root: impl AsRef<Path>,
994    git_dir: impl AsRef<Path>,
995    format: ObjectFormat,
996    clean_config: &GitConfig,
997) -> Result<Vec<AddUpdateTrackedAction>> {
998    let worktree_root = worktree_root.as_ref();
999    let git_dir = git_dir.as_ref();
1000    let index_path = repository_index_path(git_dir);
1001    if !index_path.exists() {
1002        return Ok(Vec::new());
1003    }
1004    let mut index = Index::parse(&fs::read(&index_path)?, format)?;
1005    let index_mtime = fs::metadata(&index_path)
1006        .ok()
1007        .and_then(|metadata| file_mtime_parts(&metadata));
1008    let stat_cache = IndexStatCache::from_index_mtime_only(index_mtime);
1009    let prechecks = tracked_only_non_clean_prechecks_parallel(worktree_root, &index, &stat_cache)?;
1010    if prechecks.is_empty() {
1011        return Ok(Vec::new());
1012    }
1013
1014    let pending = prechecks
1015        .into_iter()
1016        .map(|precheck| match precheck {
1017            TrackedOnlyPrecheck::Deleted(idx) => {
1018                (precheck, index.entries[idx].path.as_bytes().to_vec())
1019            }
1020            TrackedOnlyPrecheck::Slow(idx) => {
1021                (precheck, index.entries[idx].path.as_bytes().to_vec())
1022            }
1023        })
1024        .collect::<Vec<_>>();
1025    let odb = FileObjectDatabase::from_git_dir(git_dir, format);
1026    let mut actions = Vec::new();
1027    let mut index_dirty = false;
1028    let mut clean_filter = None;
1029    for (precheck, path) in pending {
1030        match precheck {
1031            TrackedOnlyPrecheck::Deleted(_) => {
1032                if remove_index_entries_with_path(&mut index.entries, &path) {
1033                    actions.push(AddUpdateTrackedAction::Remove(path));
1034                    index_dirty = true;
1035                }
1036            }
1037            TrackedOnlyPrecheck::Slow(_) => {
1038                let (action, dirty) = add_update_tracked_path(
1039                    worktree_root,
1040                    git_dir,
1041                    format,
1042                    Some(clean_config),
1043                    &odb,
1044                    &stat_cache,
1045                    &mut clean_filter,
1046                    &mut index,
1047                    &path,
1048                )?;
1049                index_dirty |= dirty;
1050                if let Some(action) = action {
1051                    actions.push(action);
1052                }
1053            }
1054        }
1055    }
1056
1057    if index_dirty {
1058        normalize_index_version_for_extended_flags(&mut index);
1059        index.extensions = index_extensions_without_cache_tree(&index.extensions);
1060        fs::write(index_path, index.write(format)?)?;
1061    }
1062    Ok(actions)
1063}
1064
1065pub fn add_exact_tracked_path_from_disk(
1066    worktree_root: impl AsRef<Path>,
1067    git_dir: impl AsRef<Path>,
1068    format: ObjectFormat,
1069    git_path: &[u8],
1070    ignore_removal: bool,
1071    config_parameters_env: Option<&str>,
1072) -> Result<AddExactTrackedPathResult> {
1073    let worktree_root = worktree_root.as_ref();
1074    let git_dir = git_dir.as_ref();
1075    let index_path = repository_index_path(git_dir);
1076    let index_metadata = match fs::metadata(&index_path) {
1077        Ok(metadata) => metadata,
1078        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
1079            return Ok(AddExactTrackedPathResult::Unsupported);
1080        }
1081        Err(err) => return Err(err.into()),
1082    };
1083    let mut index_bytes = fs::read(&index_path)?;
1084    let Some(raw) = raw_exact_index_entry(&index_bytes, format, git_path)? else {
1085        return Ok(AddExactTrackedPathResult::Unsupported);
1086    };
1087    if !raw_exact_entry_can_patch(&raw, git_path) {
1088        return Ok(AddExactTrackedPathResult::Unsupported);
1089    }
1090    if !raw_index_extensions_are_filterable(&index_bytes, raw.entries_end, raw.checksum_offset) {
1091        return Ok(AddExactTrackedPathResult::Unsupported);
1092    }
1093
1094    let entry = raw.entry.clone();
1095    if entry.stage() != Stage::Normal || index_entry_skip_worktree(&entry) || sley_index::is_gitlink(entry.mode)
1096    {
1097        return Ok(AddExactTrackedPathResult::Unsupported);
1098    }
1099    let absolute = worktree_root.join(repo_path_to_os_path(git_path)?);
1100    let metadata = match fs::symlink_metadata(&absolute) {
1101        Ok(metadata) => metadata,
1102        Err(err)
1103            if matches!(
1104                err.kind(),
1105                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
1106            ) =>
1107        {
1108            return Ok(if ignore_removal {
1109                AddExactTrackedPathResult::Handled(None)
1110            } else {
1111                AddExactTrackedPathResult::Unsupported
1112            });
1113        }
1114        Err(err) => return Err(err.into()),
1115    };
1116    let file_type = metadata.file_type();
1117    if metadata.is_dir() || !(file_type.is_file() || file_type.is_symlink()) {
1118        return Ok(AddExactTrackedPathResult::Unsupported);
1119    }
1120    let index_mtime = file_mtime_parts(&index_metadata);
1121    let stat_cache = IndexStatCache::from_index_mtime_only(index_mtime);
1122    if stat_cache.reuse_index_entry(&entry, &metadata).is_some() {
1123        return Ok(AddExactTrackedPathResult::Handled(None));
1124    }
1125
1126    let odb = FileObjectDatabase::from_git_dir(git_dir, format);
1127    let is_symlink = file_type.is_symlink();
1128    let body = if is_symlink {
1129        symlink_target_bytes(&absolute)?
1130    } else {
1131        let body = fs::read(&absolute)?;
1132        // Resolve the effective config WITH command-line `-c` / `--config-env`
1133        // overrides folded in (e.g. upstream t0027's `git -c core.autocrlf=true
1134        // add`); the plain repo-config reader would drop them and the fast path
1135        // would convert/warn against the wrong EOL policy.
1136        let config =
1137            sley_config::read_repo_config(git_dir, config_parameters_env).unwrap_or_default();
1138        let mut clean_filter = None;
1139        let clean_filter =
1140            tracked_only_clean_filter_with_config(&mut clean_filter, worktree_root, &config);
1141        clean_filter.read_attributes_for_path(worktree_root, git_path)?;
1142        let checks =
1143            clean_filter
1144                .matcher
1145                .attributes_for_path(git_path, &clean_filter.requested, false);
1146        // git's index update folds in `global_conv_flags_eol`, so `git add`
1147        // emits the `core.safecrlf` round-trip warning (default: warn). The
1148        // current index blob (`entry.oid`) drives the auto-crlf
1149        // `has_crlf_in_index` decision. Mirror the slow `add_update_tracked_path`
1150        // path here so the exact-patch fast path does not silently drop the
1151        // warning (upstream t0020 'safecrlf: print warning only once').
1152        let conv_flags = ConvFlags::from_config(&clean_filter.config);
1153        let index_blob = match conv_flags {
1154            ConvFlags::Off => SafeCrlfIndexBlob::None,
1155            _ => SafeCrlfIndexBlob::Lookup {
1156                odb: &odb,
1157                oid: entry.oid,
1158            },
1159        };
1160        apply_clean_filter_with_attributes_cow_safecrlf(
1161            &clean_filter.config,
1162            &checks,
1163            git_path,
1164            &body,
1165            conv_flags,
1166            index_blob,
1167        )?
1168        .into_owned()
1169    };
1170    let object = EncodedObject::new(ObjectType::Blob, body);
1171    let oid = object.object_id(format)?;
1172    if oid != entry.oid {
1173        odb.write_object(object)?;
1174    }
1175
1176    let mut updated_entry = index_entry_from_metadata(entry.path.clone(), oid, &metadata);
1177    if is_symlink {
1178        updated_entry.mode = 0o120000;
1179    }
1180    if updated_entry == entry {
1181        return Ok(AddExactTrackedPathResult::Handled(None));
1182    }
1183    if !raw_updated_entry_can_patch(&entry, &updated_entry, git_path) {
1184        return Ok(AddExactTrackedPathResult::Unsupported);
1185    }
1186    patch_raw_index_entry(&mut index_bytes, format, &raw, &updated_entry)?;
1187    fs::write(index_path, index_bytes)?;
1188    let changed = updated_entry.oid != entry.oid || updated_entry.mode != entry.mode;
1189    Ok(AddExactTrackedPathResult::Handled(
1190        changed.then(|| AddUpdateTrackedAction::Add(git_path.to_vec())),
1191    ))
1192}
1193
1194pub fn add_exact_tracked_path_with_index(
1195    worktree_root: impl AsRef<Path>,
1196    git_dir: impl AsRef<Path>,
1197    format: ObjectFormat,
1198    mut index: Index,
1199    git_path: &[u8],
1200) -> Result<Option<AddUpdateTrackedAction>> {
1201    let worktree_root = worktree_root.as_ref();
1202    let git_dir = git_dir.as_ref();
1203    let range = index_entries_path_range(&index.entries, git_path);
1204    if range.len() != 1 {
1205        return Ok(None);
1206    }
1207    let entry = &index.entries[range.start];
1208    if entry.stage() != Stage::Normal || index_entry_skip_worktree(entry) {
1209        return Ok(None);
1210    }
1211    let index_path = repository_index_path(git_dir);
1212    let index_mtime = fs::metadata(&index_path)
1213        .ok()
1214        .and_then(|metadata| file_mtime_parts(&metadata));
1215    let stat_cache = IndexStatCache::from_index_mtime_only(index_mtime);
1216    let odb = FileObjectDatabase::from_git_dir(git_dir, format);
1217    let mut clean_filter = None;
1218    let (action, dirty) = add_update_tracked_path(
1219        worktree_root,
1220        git_dir,
1221        format,
1222        None,
1223        &odb,
1224        &stat_cache,
1225        &mut clean_filter,
1226        &mut index,
1227        git_path,
1228    )?;
1229    if dirty {
1230        normalize_index_version_for_extended_flags(&mut index);
1231        index.extensions = index_extensions_without_cache_tree(&index.extensions);
1232        fs::write(index_path, index.write(format)?)?;
1233    }
1234    Ok(action)
1235}
1236
1237struct RawExactIndexEntry {
1238    version: u32,
1239    entry: IndexEntry,
1240    entry_start: usize,
1241    entries_end: usize,
1242    checksum_offset: usize,
1243}
1244
1245fn raw_exact_index_entry(
1246    bytes: &[u8],
1247    format: ObjectFormat,
1248    git_path: &[u8],
1249) -> Result<Option<RawExactIndexEntry>> {
1250    let hash_len = format.raw_len();
1251    if bytes.len() < 12 + hash_len {
1252        return Err(GitError::InvalidFormat("index header too short".into()));
1253    }
1254    let checksum_offset = bytes.len() - hash_len;
1255    let actual_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
1256    let expected_checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
1257    if actual_checksum != expected_checksum {
1258        return Err(GitError::InvalidFormat(format!(
1259            "index checksum mismatch: expected {expected_checksum}, got {actual_checksum}"
1260        )));
1261    }
1262    if &bytes[..4] != b"DIRC" {
1263        return Err(GitError::InvalidFormat("missing DIRC signature".into()));
1264    }
1265    let version = u32_from_be(&bytes[4..8]);
1266    if !(2..=3).contains(&version) {
1267        return Ok(None);
1268    }
1269    let count = u32_from_be(&bytes[8..12]) as usize;
1270    let mut offset = 12;
1271    let mut found = None;
1272    for _ in 0..count {
1273        let entry_header_len = 40 + hash_len + 2;
1274        if checksum_offset.saturating_sub(offset) < entry_header_len {
1275            return Err(GitError::InvalidFormat("truncated index entry".into()));
1276        }
1277        let start = offset;
1278        let oid_start = offset + 40;
1279        let oid_end = oid_start + hash_len;
1280        let flags = u16_from_be(&bytes[oid_end..oid_end + 2]);
1281        offset = oid_end + 2;
1282        let flags_extended = if flags & INDEX_FLAG_EXTENDED != 0 {
1283            if checksum_offset.saturating_sub(offset) < 2 {
1284                return Err(GitError::InvalidFormat(
1285                    "truncated index extended flags".into(),
1286                ));
1287            }
1288            let flags_extended = u16_from_be(&bytes[offset..offset + 2]);
1289            offset += 2;
1290            flags_extended
1291        } else {
1292            0
1293        };
1294        let path_start = offset;
1295        while bytes.get(offset).copied() != Some(0) {
1296            offset += 1;
1297            if offset >= checksum_offset {
1298                return Err(GitError::InvalidFormat("unterminated index path".into()));
1299            }
1300        }
1301        let path = &bytes[path_start..offset];
1302        offset += 1;
1303        while (offset - start) % 8 != 0 {
1304            offset += 1;
1305            if offset > checksum_offset {
1306                return Err(GitError::InvalidFormat("truncated index padding".into()));
1307            }
1308        }
1309        if path == git_path {
1310            if found.is_some() {
1311                return Ok(None);
1312            }
1313            let oid = ObjectId::from_raw(format, &bytes[oid_start..oid_end])?;
1314            found = Some(RawExactIndexEntry {
1315                version,
1316                entry: IndexEntry {
1317                    ctime_seconds: u32_from_be(&bytes[start..start + 4]),
1318                    ctime_nanoseconds: u32_from_be(&bytes[start + 4..start + 8]),
1319                    mtime_seconds: u32_from_be(&bytes[start + 8..start + 12]),
1320                    mtime_nanoseconds: u32_from_be(&bytes[start + 12..start + 16]),
1321                    dev: u32_from_be(&bytes[start + 16..start + 20]),
1322                    ino: u32_from_be(&bytes[start + 20..start + 24]),
1323                    mode: u32_from_be(&bytes[start + 24..start + 28]),
1324                    uid: u32_from_be(&bytes[start + 28..start + 32]),
1325                    gid: u32_from_be(&bytes[start + 32..start + 36]),
1326                    size: u32_from_be(&bytes[start + 36..start + 40]),
1327                    oid,
1328                    flags,
1329                    flags_extended,
1330                    path: BString::from(path),
1331                },
1332                entry_start: start,
1333                entries_end: 0,
1334                checksum_offset,
1335            });
1336        } else if found.is_none() && path > git_path {
1337            return Ok(None);
1338        }
1339    }
1340    if let Some(mut found) = found {
1341        found.entries_end = offset;
1342        Ok(Some(found))
1343    } else {
1344        Ok(None)
1345    }
1346}
1347
1348fn raw_exact_entry_can_patch(raw: &RawExactIndexEntry, git_path: &[u8]) -> bool {
1349    raw.version == 2
1350        && raw.entry.flags_extended == 0
1351        && raw.entry.flags & INDEX_FLAG_EXTENDED == 0
1352        && raw.entry.flags == index_flags(git_path.len(), 0)
1353        && raw.entry.path.as_bytes() == git_path
1354}
1355
1356fn raw_updated_entry_can_patch(
1357    previous: &IndexEntry,
1358    updated: &IndexEntry,
1359    git_path: &[u8],
1360) -> bool {
1361    updated.path.as_bytes() == git_path
1362        && updated.flags_extended == 0
1363        && updated.flags & INDEX_FLAG_EXTENDED == 0
1364        && updated.flags == previous.flags
1365}
1366
1367fn raw_index_extensions_are_filterable(
1368    bytes: &[u8],
1369    entries_end: usize,
1370    checksum_offset: usize,
1371) -> bool {
1372    let mut offset = entries_end;
1373    while offset < checksum_offset {
1374        if checksum_offset.saturating_sub(offset) < 8 {
1375            return false;
1376        }
1377        let size = u32_from_be(&bytes[offset + 4..offset + 8]) as usize;
1378        let Some(end) = offset
1379            .checked_add(8)
1380            .and_then(|offset| offset.checked_add(size))
1381        else {
1382            return false;
1383        };
1384        if end > checksum_offset {
1385            return false;
1386        }
1387        offset = end;
1388    }
1389    true
1390}
1391
1392fn patch_raw_index_entry(
1393    bytes: &mut Vec<u8>,
1394    format: ObjectFormat,
1395    raw: &RawExactIndexEntry,
1396    entry: &IndexEntry,
1397) -> Result<()> {
1398    let hash_len = format.raw_len();
1399    let start = raw.entry_start;
1400    bytes[start..start + 4].copy_from_slice(&entry.ctime_seconds.to_be_bytes());
1401    bytes[start + 4..start + 8].copy_from_slice(&entry.ctime_nanoseconds.to_be_bytes());
1402    bytes[start + 8..start + 12].copy_from_slice(&entry.mtime_seconds.to_be_bytes());
1403    bytes[start + 12..start + 16].copy_from_slice(&entry.mtime_nanoseconds.to_be_bytes());
1404    bytes[start + 16..start + 20].copy_from_slice(&entry.dev.to_be_bytes());
1405    bytes[start + 20..start + 24].copy_from_slice(&entry.ino.to_be_bytes());
1406    bytes[start + 24..start + 28].copy_from_slice(&entry.mode.to_be_bytes());
1407    bytes[start + 28..start + 32].copy_from_slice(&entry.uid.to_be_bytes());
1408    bytes[start + 32..start + 36].copy_from_slice(&entry.gid.to_be_bytes());
1409    bytes[start + 36..start + 40].copy_from_slice(&entry.size.to_be_bytes());
1410    bytes[start + 40..start + 40 + hash_len].copy_from_slice(entry.oid.as_bytes());
1411    bytes[start + 40 + hash_len..start + 40 + hash_len + 2]
1412        .copy_from_slice(&entry.flags.to_be_bytes());
1413
1414    let mut extension_offset = raw.entries_end;
1415    let mut removed_cache_tree = false;
1416    let mut rewritten = Vec::new();
1417    while extension_offset < raw.checksum_offset {
1418        let signature = &bytes[extension_offset..extension_offset + 4];
1419        let size = u32_from_be(&bytes[extension_offset + 4..extension_offset + 8]) as usize;
1420        let end = extension_offset + 8 + size;
1421        if signature == b"TREE" {
1422            removed_cache_tree = true;
1423        } else {
1424            rewritten.extend_from_slice(&bytes[extension_offset..end]);
1425        }
1426        extension_offset = end;
1427    }
1428
1429    if removed_cache_tree {
1430        bytes.truncate(raw.entries_end);
1431        bytes.extend_from_slice(&rewritten);
1432        let checksum = sley_core::digest_bytes(format, bytes)?;
1433        bytes.extend_from_slice(checksum.as_bytes());
1434    } else {
1435        let checksum = sley_core::digest_bytes(format, &bytes[..raw.checksum_offset])?;
1436        bytes[raw.checksum_offset..raw.checksum_offset + hash_len]
1437            .copy_from_slice(checksum.as_bytes());
1438    }
1439    Ok(())
1440}
1441
1442fn u32_from_be(bytes: &[u8]) -> u32 {
1443    u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
1444}
1445
1446fn u16_from_be(bytes: &[u8]) -> u16 {
1447    u16::from_be_bytes([bytes[0], bytes[1]])
1448}
1449
1450fn add_update_tracked_path(
1451    worktree_root: &Path,
1452    git_dir: &Path,
1453    format: ObjectFormat,
1454    clean_config: Option<&GitConfig>,
1455    odb: &FileObjectDatabase,
1456    stat_cache: &IndexStatCache,
1457    clean_filter: &mut Option<TrackedOnlyCleanFilter>,
1458    index: &mut Index,
1459    git_path: &[u8],
1460) -> Result<(Option<AddUpdateTrackedAction>, bool)> {
1461    let range = index_entries_path_range(&index.entries, git_path);
1462    if range.is_empty() {
1463        return Ok((None, false));
1464    }
1465    let entry = index.entries[range.start].clone();
1466    if entry.stage() != Stage::Normal {
1467        return Ok((None, false));
1468    }
1469    let absolute = worktree_root.join(repo_path_to_os_path(git_path)?);
1470    let metadata = match fs::symlink_metadata(&absolute) {
1471        Ok(metadata) => metadata,
1472        Err(err)
1473            if matches!(
1474                err.kind(),
1475                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
1476            ) =>
1477        {
1478            if remove_index_entries_with_path(&mut index.entries, git_path) {
1479                return Ok((
1480                    Some(AddUpdateTrackedAction::Remove(git_path.to_vec())),
1481                    true,
1482                ));
1483            }
1484            return Ok((None, false));
1485        }
1486        Err(err) => return Err(err.into()),
1487    };
1488    if metadata.is_dir() {
1489        if !sley_index::is_gitlink(entry.mode) {
1490            return Ok((None, false));
1491        }
1492        let oid = sley_diff_merge::gitlink_head_oid(&absolute, format).unwrap_or(entry.oid);
1493        let mut updated_entry = index_entry_from_metadata(entry.path.clone(), oid, &metadata);
1494        updated_entry.mode = sley_index::GITLINK_MODE;
1495        let changed = updated_entry.oid != entry.oid || updated_entry.mode != entry.mode;
1496        if updated_entry != entry {
1497            replace_index_entries_with_entry(&mut index.entries, updated_entry);
1498            return Ok((
1499                changed.then(|| AddUpdateTrackedAction::Add(git_path.to_vec())),
1500                true,
1501            ));
1502        }
1503        return Ok((None, false));
1504    }
1505    if !(metadata.is_file() || metadata.file_type().is_symlink()) {
1506        return Ok((None, false));
1507    }
1508    if stat_cache.reuse_index_entry(&entry, &metadata).is_some() {
1509        return Ok((None, false));
1510    }
1511
1512    let is_symlink = metadata.file_type().is_symlink();
1513    let body = if is_symlink {
1514        symlink_target_bytes(&absolute)?
1515    } else {
1516        let body = fs::read(&absolute)?;
1517        let clean_filter = match clean_config {
1518            Some(config) => {
1519                tracked_only_clean_filter_with_config(clean_filter, worktree_root, config)
1520            }
1521            None => tracked_only_clean_filter(clean_filter, worktree_root, git_dir),
1522        };
1523        clean_filter.read_attributes_for_path(worktree_root, git_path)?;
1524        let checks =
1525            clean_filter
1526                .matcher
1527                .attributes_for_path(git_path, &clean_filter.requested, false);
1528        // git's `add -u` index update folds in `global_conv_flags_eol`, so emit
1529        // the `core.safecrlf` round-trip warning (default: warn). The current
1530        // index blob (`entry.oid`) drives the auto-crlf `has_crlf_in_index`
1531        // decision.
1532        let conv_flags = ConvFlags::from_config(&clean_filter.config);
1533        let index_blob = match conv_flags {
1534            ConvFlags::Off => SafeCrlfIndexBlob::None,
1535            _ => SafeCrlfIndexBlob::Lookup {
1536                odb,
1537                oid: entry.oid,
1538            },
1539        };
1540        apply_clean_filter_with_attributes_cow_safecrlf(
1541            &clean_filter.config,
1542            &checks,
1543            git_path,
1544            &body,
1545            conv_flags,
1546            index_blob,
1547        )?
1548        .into_owned()
1549    };
1550    let object = EncodedObject::new(ObjectType::Blob, body);
1551    let oid = object.object_id(format)?;
1552    if oid != entry.oid {
1553        odb.write_object(object)?;
1554    }
1555    let mut updated_entry = index_entry_from_metadata(entry.path.clone(), oid, &metadata);
1556    if is_symlink {
1557        updated_entry.mode = 0o120000;
1558    }
1559    let changed = updated_entry.oid != entry.oid || updated_entry.mode != entry.mode;
1560    if updated_entry != entry {
1561        replace_index_entries_with_entry(&mut index.entries, updated_entry);
1562        return Ok((
1563            changed.then(|| AddUpdateTrackedAction::Add(git_path.to_vec())),
1564            true,
1565        ));
1566    }
1567    Ok((None, false))
1568}
1569
1570enum UpdateIndexCleanFilter {
1571    Full(AttributeMatcher),
1572    PathLocal,
1573}
1574
1575fn index_entries_path_range(entries: &[IndexEntry], path: &[u8]) -> std::ops::Range<usize> {
1576    let mut start = match entries.binary_search_by(|entry| entry.path.as_bytes().cmp(path)) {
1577        Ok(index) => index,
1578        Err(insert) => return insert..insert,
1579    };
1580    while start > 0 && entries[start - 1].path.as_bytes() == path {
1581        start -= 1;
1582    }
1583    let mut end = start;
1584    while end < entries.len() && entries[end].path.as_bytes() == path {
1585        end += 1;
1586    }
1587    start..end
1588}
1589
1590fn remove_index_entries_with_path(entries: &mut Vec<IndexEntry>, path: &[u8]) -> bool {
1591    let range = index_entries_path_range(entries, path);
1592    if range.is_empty() {
1593        return false;
1594    }
1595    entries.drain(range);
1596    true
1597}
1598
1599/// Remove every index entry whose path lives *under* `name/` (a strict
1600/// directory-prefix collision). Mirrors git's `has_file_name`
1601/// (read-cache.c): when a *file* entry `a/b` is being added, any entry
1602/// `a/b/...` already in the index would produce a tree that records `a/b`
1603/// both as a blob and as a tree — `write-tree` would emit a malformed tree.
1604/// Entries are sorted by path, so the conflicting children form a contiguous
1605/// run immediately after `name`'s insertion point.
1606fn remove_index_entries_under_dir(entries: &mut Vec<IndexEntry>, name: &[u8]) {
1607    let start = match entries.binary_search_by(|entry| entry.path.as_bytes().cmp(name)) {
1608        Ok(found) => found + 1,
1609        Err(insert) => insert,
1610    };
1611    let mut end = start;
1612    while end < entries.len() {
1613        let candidate = entries[end].path.as_bytes();
1614        // `candidate` is under `name/` iff it is strictly longer, shares the
1615        // `name` prefix, and the next byte is the path separator.
1616        if candidate.len() > name.len()
1617            && candidate[name.len()] == b'/'
1618            && candidate[..name.len()] == *name
1619        {
1620            end += 1;
1621        } else {
1622            break;
1623        }
1624    }
1625    if end > start {
1626        entries.drain(start..end);
1627    }
1628}
1629
1630/// Remove any *file* entry that is a strict directory-prefix of `name` (e.g.
1631/// when adding `a/b/c`, drop a file entry `a/b` or `a`). Mirrors git's
1632/// `has_dir_name` (read-cache.c): such an entry would make the resulting tree
1633/// record the prefix both as a blob and as the directory containing `name`.
1634/// We walk every parent directory of `name`, longest first; the moment a
1635/// real subdirectory already exists at a prefix, no shorter prefix can
1636/// conflict, so we stop early (git's "already matches the sub-directory"
1637/// trivial optimization).
1638fn remove_index_dir_name_conflicts(entries: &mut Vec<IndexEntry>, name: &[u8]) {
1639    let mut slash = name.len();
1640    // Walk back over each '/' (longest parent dir first) until the path has no
1641    // more components.
1642    while let Some(pos) = name[..slash].iter().rposition(|&byte| byte == b'/') {
1643        slash = pos;
1644        let prefix = &name[..slash];
1645        match entries.binary_search_by(|entry| entry.path.as_bytes().cmp(prefix)) {
1646            Ok(found) => {
1647                // A file entry sits exactly at this directory prefix — drop it.
1648                entries.remove(found);
1649            }
1650            Err(insert) => {
1651                // No file at `prefix`. If a child `prefix/...` already exists,
1652                // the directory is established and nothing at this prefix (or
1653                // any shorter one) can conflict; stop.
1654                if insert < entries.len() {
1655                    let candidate = entries[insert].path.as_bytes();
1656                    if candidate.len() > prefix.len()
1657                        && candidate[prefix.len()] == b'/'
1658                        && candidate[..prefix.len()] == *prefix
1659                    {
1660                        break;
1661                    }
1662                }
1663            }
1664        }
1665    }
1666}
1667
1668fn replace_index_entries_with_entry(entries: &mut Vec<IndexEntry>, entry: IndexEntry) {
1669    let path = entry.path.as_bytes().to_vec();
1670    // Enforce directory/file replacement *before* computing the insert
1671    // position: git's `add_index_entry_with_check` removes the conflicting
1672    // entries, then recomputes where the new entry lands. Adding the entry
1673    // as a file drops any `path/...` children; adding it drops any file that
1674    // is a directory-prefix of `path`. Skipping this leaves a D/F-corrupt
1675    // index that `write-tree` turns into a malformed tree.
1676    remove_index_entries_under_dir(entries, &path);
1677    remove_index_dir_name_conflicts(entries, &path);
1678    let range = index_entries_path_range(entries, &path);
1679    if range.is_empty() {
1680        entries.insert(range.start, entry);
1681    } else {
1682        entries.splice(range, [entry]);
1683    }
1684}
1685
1686fn update_index_paths_impl(
1687    worktree_root: &Path,
1688    git_dir: &Path,
1689    format: ObjectFormat,
1690    mut index: Index,
1691    paths: &[UpdateIndexPath],
1692    options: UpdateIndexOptions,
1693    clean_config: Option<&GitConfig>,
1694    verbose: bool,
1695) -> Result<UpdateIndexResult> {
1696    let index_path = repository_index_path(git_dir);
1697    let odb = FileObjectDatabase::from_git_dir(git_dir, format);
1698    // For small batches, read only each path's `.gitattributes` chain; a
1699    // whole-worktree matcher can dominate `add -u` when only a few files are
1700    // dirty in a huge checkout. Large batches still amortize the full matcher.
1701    let clean_filter = match clean_config {
1702        Some(_) if paths.len() >= 64 => Some(UpdateIndexCleanFilter::Full(
1703            AttributeMatcher::from_worktree_root(worktree_root)?,
1704        )),
1705        Some(_) => Some(UpdateIndexCleanFilter::PathLocal),
1706        None => None,
1707    };
1708    // git's index-update path (object-file.c `get_conv_flags`) folds in
1709    // `global_conv_flags_eol`, so `git add`/`commit` emit the `core.safecrlf`
1710    // round-trip warning (default: warn). It only applies when content filters
1711    // run at all (i.e. when we have a config).
1712    let conv_flags = clean_config.map_or(ConvFlags::Off, ConvFlags::from_config);
1713    let requested_filter_attrs = filter_attribute_names();
1714    let mut updated = Vec::new();
1715    let mut reports: Vec<String> = Vec::new();
1716    for update_path in paths {
1717        let path = &update_path.path;
1718        // Each path carries the sticky mode that was in effect when it was
1719        // parsed on the command line (git processes argv left-to-right). Read
1720        // the action from the path's own mode, NOT a batch-wide flag, so
1721        // `--add foo --force-remove bar` adds foo and force-removes bar.
1722        let path_mode = update_path.mode;
1723        let path_chmod = path_mode.chmod;
1724        let absolute = if path.is_absolute() {
1725            path.clone()
1726        } else {
1727            worktree_root.join(path)
1728        };
1729        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
1730            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
1731        })?;
1732        let git_path = git_path_bytes(relative)?;
1733        if path_mode.force_remove {
1734            remove_index_entries_with_path(&mut index.entries, &git_path);
1735            // git's update_one() reports `remove` for a --force-remove path.
1736            reports.push(format!("remove '{}'", String::from_utf8_lossy(&git_path)));
1737            continue;
1738        }
1739        let existing_range = index_entries_path_range(&index.entries, &git_path);
1740        if index.entries[existing_range.clone()]
1741            .iter()
1742            .any(index_entry_skip_worktree)
1743        {
1744            if path_mode.remove && !options.ignore_skip_worktree_entries {
1745                index.entries.drain(existing_range);
1746            }
1747            continue;
1748        }
1749        // lstat (not stat): a symlink must be inspected as the link itself, never
1750        // followed to its target. `Path::exists`/`fs::metadata` both stat through
1751        // the link, which makes a symlink-to-directory look like a directory
1752        // (fs::read then fails with "Is a directory") and a symlink-to-file get
1753        // staged with the target's content + a regular-file mode. git stages a
1754        // symlink as mode 120000 whose blob is the link target string, regardless
1755        // of what (if anything) the target resolves to.
1756        let symlink_metadata = match fs::symlink_metadata(&absolute) {
1757            Ok(metadata) => Some(metadata),
1758            // ENOTDIR (a leading path component is now a file, e.g. staging the
1759            // stale `a/b/c` entry after `a/b` became a regular file in a D/F
1760            // flip) means the path no longer exists as a file — git's lstat
1761            // returns ENOTDIR here and treats it exactly like ENOENT. Fold both
1762            // into the "missing" arm so the `--remove` path drops the stale
1763            // entry instead of aborting the whole add with an I/O error.
1764            Err(err)
1765                if matches!(
1766                    err.kind(),
1767                    std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
1768                ) =>
1769            {
1770                None
1771            }
1772            Err(err) => return Err(err.into()),
1773        };
1774        let Some(metadata) = symlink_metadata else {
1775            if path_mode.remove {
1776                remove_index_entries_with_path(&mut index.entries, &git_path);
1777                // git's update_one() unconditionally reports `add '<path>'`
1778                // after process_path(), even when the missing file was removed
1779                // from the index via the `--remove` (not --force-remove) path.
1780                reports.push(format!("add '{}'", String::from_utf8_lossy(&git_path)));
1781                continue;
1782            }
1783            print_update_index_path_error(&git_path, "does not exist and --remove not passed");
1784            return Err(GitError::Exit(128));
1785        };
1786        if !path_mode.add && index_entries_path_range(&index.entries, &git_path).is_empty() {
1787            print_update_index_path_error(
1788                &git_path,
1789                "cannot add to the index - missing --add option?",
1790            );
1791            return Err(GitError::Exit(128));
1792        }
1793        if metadata.is_dir() {
1794            // A directory is stageable only as a gitlink: when it is an
1795            // embedded repository with a commit checked out, git records a
1796            // mode-160000 entry whose oid is that commit (no object is
1797            // written). Otherwise it errors — with upstream's exact messages
1798            // for the embedded-repo-without-commit and plain-directory cases
1799            // (object-file.c index_path / builtin/update-index.c
1800            // process_directory).
1801            let display = String::from_utf8_lossy(&git_path).into_owned();
1802            let has_dot_git = absolute.join(".git").exists();
1803            let Some(head_oid) = sley_diff_merge::gitlink_head_oid(&absolute, format) else {
1804                if has_dot_git {
1805                    eprintln!("error: '{display}' does not have a commit checked out");
1806                } else {
1807                    eprintln!("error: {display}: is a directory - add files inside instead");
1808                }
1809                eprintln!("fatal: Unable to process path {display}");
1810                return Err(GitError::Exit(128));
1811            };
1812            if path_chmod.is_some() {
1813                eprintln!(
1814                    "fatal: git update-index: cannot chmod {}x '{display}'",
1815                    if path_chmod == Some(true) { '+' } else { '-' },
1816                );
1817                return Err(GitError::Exit(128));
1818            }
1819            let mut entry = index_entry_from_metadata(git_path.clone(), head_oid, &metadata);
1820            entry.mode = sley_index::GITLINK_MODE;
1821            reports.push(format!("add '{display}'"));
1822            replace_index_entries_with_entry(&mut index.entries, entry);
1823            updated.push(head_oid);
1824            continue;
1825        }
1826        let is_symlink = metadata.file_type().is_symlink();
1827        let body = if is_symlink {
1828            // The blob is the raw link target bytes; clean filters never apply to
1829            // a symlink (git treats it as binary content, not a text path).
1830            symlink_target_bytes(&absolute)?
1831        } else {
1832            let body = fs::read(&absolute)?;
1833            // The safecrlf auto-crlf decision needs the path's *current* index
1834            // blob (git's `has_crlf_in_index`); the stage-0 entry, if any, has it.
1835            let index_blob = match conv_flags {
1836                ConvFlags::Off => SafeCrlfIndexBlob::None,
1837                _ => stage0_oid_in_range(&index.entries, existing_range.clone()).map_or(
1838                    SafeCrlfIndexBlob::None,
1839                    |oid| SafeCrlfIndexBlob::Lookup { odb: &odb, oid },
1840                ),
1841            };
1842            match (clean_config, &clean_filter) {
1843                (Some(config), Some(UpdateIndexCleanFilter::Full(matcher))) => {
1844                    // Identical to `apply_clean_filter`, but reuses the batch's
1845                    // matcher instead of rebuilding it (and re-walking the tree)
1846                    // for this path.
1847                    let checks =
1848                        matcher.attributes_for_path(&git_path, &requested_filter_attrs, false);
1849                    apply_clean_filter_with_attributes_cow_safecrlf(
1850                        config, &checks, &git_path, &body, conv_flags, index_blob,
1851                    )?
1852                    .into_owned()
1853                }
1854                (Some(config), Some(UpdateIndexCleanFilter::PathLocal)) => {
1855                    let checks = filter_attribute_checks(worktree_root, &git_path)?;
1856                    apply_clean_filter_with_attributes_cow_safecrlf(
1857                        config, &checks, &git_path, &body, conv_flags, index_blob,
1858                    )?
1859                    .into_owned()
1860                }
1861                _ => body,
1862            }
1863        };
1864        let object = EncodedObject::new(ObjectType::Blob, body);
1865        let oid = if path_mode.info_only {
1866            object.object_id(format)?
1867        } else {
1868            odb.write_object(object)?
1869        };
1870        let mut entry = index_entry_from_metadata(git_path.clone(), oid, &metadata);
1871        if is_symlink {
1872            entry.mode = 0o120000;
1873        }
1874        // git's update_one() reports `add` for every staged path (whether the
1875        // entry is new or an update), then chmod_path() reports the chmod after.
1876        reports.push(format!("add '{}'", String::from_utf8_lossy(&git_path)));
1877        if let Some(executable) = path_chmod {
1878            // git's chmod_path() refuses to flip the executable bit on anything
1879            // that is not a regular file (a symlink/gitlink has no such bit). It
1880            // writes the blob first, then errors with this exact message and
1881            // leaves the index untouched.
1882            if is_symlink {
1883                eprintln!(
1884                    "fatal: git update-index: cannot chmod {}x '{}'",
1885                    if executable { '+' } else { '-' },
1886                    String::from_utf8_lossy(&git_path)
1887                );
1888                return Err(GitError::Exit(128));
1889            }
1890            entry.mode = if executable { 0o100755 } else { 0o100644 };
1891            reports.push(format!(
1892                "chmod {}x '{}'",
1893                if executable { '+' } else { '-' },
1894                String::from_utf8_lossy(&git_path)
1895            ));
1896        }
1897        replace_index_entries_with_entry(&mut index.entries, entry);
1898        updated.push(oid);
1899    }
1900    normalize_index_version_for_extended_flags(&mut index);
1901    index.extensions = index_extensions_without_cache_tree(&index.extensions);
1902    fs::write(index_path, index.write(format)?)?;
1903    if verbose {
1904        let mut stdout = std::io::stdout().lock();
1905        for line in &reports {
1906            writeln!(stdout, "{line}")?;
1907        }
1908        stdout.flush()?;
1909    }
1910    Ok(UpdateIndexResult {
1911        entries: index.entries.len(),
1912        updated,
1913    })
1914}
1915
1916pub fn refresh_index_paths(
1917    worktree_root: impl AsRef<Path>,
1918    git_dir: impl AsRef<Path>,
1919    format: ObjectFormat,
1920    paths: &[PathBuf],
1921    quiet: bool,
1922    ignore_missing: bool,
1923    really_refresh: bool,
1924) -> Result<UpdateIndexResult> {
1925    let worktree_root = worktree_root.as_ref();
1926    let git_dir = git_dir.as_ref();
1927    let index_path = repository_index_path(git_dir);
1928    if !index_path.exists() {
1929        return Ok(UpdateIndexResult {
1930            entries: 0,
1931            updated: Vec::new(),
1932        });
1933    }
1934    let mut index = Index::parse(&fs::read(&index_path)?, format)?;
1935    // git's `update-index --refresh` trusts the cached stat: a stage-0 entry
1936    // whose size+mtime still match the worktree file (and is not racily clean) is
1937    // known unchanged, so its content is NOT re-read or re-hashed
1938    // (read-cache.c `refresh_cache_ent` → `ie_match_stat`). Without this shortcut
1939    // sley re-hashed every tracked file on every refresh — the 3.2x slowdown in
1940    // sley#27. We build the cache from the same parsed index + the index file's
1941    // own mtime (the racy-clean reference) so no extra parse is needed.
1942    let index_mtime = fs::metadata(&index_path)
1943        .ok()
1944        .and_then(|metadata| file_mtime_parts(&metadata));
1945    let stat_cache = IndexStatCache::from_index_mtime_only(index_mtime);
1946    let selected_paths = paths
1947        .iter()
1948        .map(|path| {
1949            let absolute = if path.is_absolute() {
1950                path.clone()
1951            } else {
1952                worktree_root.join(path)
1953            };
1954            let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
1955                GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
1956            })?;
1957            git_path_bytes(relative)
1958        })
1959        .collect::<Result<Vec<_>>>()?;
1960    let selected_paths = selected_paths.into_iter().collect::<BTreeSet<_>>();
1961    if selected_paths.is_empty()
1962        && !really_refresh
1963        && !index
1964            .entries
1965            .iter()
1966            .any(|entry| entry.flags & INDEX_FLAG_ASSUME_UNCHANGED != 0)
1967    {
1968        return refresh_all_index_paths_parallel(
1969            worktree_root,
1970            &index_path,
1971            format,
1972            index,
1973            stat_cache,
1974            quiet,
1975            ignore_missing,
1976        );
1977    }
1978    let mut needs_update = false;
1979    let mut index_dirty = false;
1980    for entry in &mut index.entries {
1981        if index_entry_stage(entry) != 0 {
1982            continue;
1983        }
1984        let selected_for_update =
1985            !selected_paths.is_empty() && selected_paths.contains(entry.path.as_bytes());
1986        if entry.flags & INDEX_FLAG_ASSUME_UNCHANGED != 0 {
1987            if !really_refresh {
1988                continue;
1989            }
1990            entry.flags &= !INDEX_FLAG_ASSUME_UNCHANGED;
1991            index_dirty = true;
1992        }
1993        let absolute = worktree_root.join(repo_path_to_os_path(entry.path.as_bytes())?);
1994        let Ok(metadata) = fs::metadata(&absolute) else {
1995            if ignore_missing {
1996                continue;
1997            }
1998            if !quiet {
1999                print_update_index_needs_update(entry.path.as_bytes());
2000            }
2001            needs_update = true;
2002            continue;
2003        };
2004        // git's `refresh_cache_ent` runs `ie_match_stat`, whose `S_IFGITLINK`
2005        // arm never re-reads content: a gitlink whose worktree path is a
2006        // directory is up to date (an unpopulated/HEAD-matching submodule), so
2007        // `--refresh` leaves it untouched and silent. Only a gitlink that is no
2008        // longer a directory (replaced by a file, or removed) is `TYPE_CHANGED`.
2009        // This is the single `sley_index::gitlink_stat_verdict` rule; without it
2010        // the `!is_file()` guard below mis-flagged every populated submodule as
2011        // "needs update". The populated-HEAD comparison is deliberately left to
2012        // status/diff (the unpopulated default is clean).
2013        if sley_index::is_gitlink(entry.mode) {
2014            match sley_index::gitlink_stat_verdict(&metadata) {
2015                sley_index::GitlinkStatVerdict::Populated => continue,
2016                sley_index::GitlinkStatVerdict::TypeChanged => {
2017                    if !quiet {
2018                        print_update_index_needs_update(entry.path.as_bytes());
2019                    }
2020                    needs_update = true;
2021                    continue;
2022                }
2023            }
2024        }
2025        if !metadata.is_file() {
2026            if !quiet {
2027                print_update_index_needs_update(entry.path.as_bytes());
2028            }
2029            needs_update = true;
2030            continue;
2031        }
2032        // Stat shortcut: when the cached stat proves the file is unchanged since
2033        // it was staged, its content hashes to the cached oid by construction
2034        // (see `IndexStatCache`'s safety invariant). Skip the read+hash and just
2035        // refresh the stat fields from current metadata — byte-identical to the
2036        // clean arm below, since the oid stamped is the cached one and the
2037        // metadata is the same one that re-stamp would read.
2038        if stat_cache.reuse_index_entry(entry, &metadata).is_some() {
2039            continue;
2040        }
2041        let body = fs::read(&absolute)?;
2042        let object = EncodedObject::new(ObjectType::Blob, body);
2043        let oid = object.object_id(format)?;
2044        if oid != entry.oid || file_mode(&metadata) != entry.mode {
2045            if !quiet {
2046                print_update_index_needs_update(entry.path.as_bytes());
2047            }
2048            needs_update = true;
2049            if selected_for_update {
2050                let updated_entry = index_entry_from_metadata(entry.path.clone(), oid, &metadata);
2051                if updated_entry != *entry {
2052                    *entry = updated_entry;
2053                    index_dirty = true;
2054                }
2055            }
2056            continue;
2057        }
2058        let updated_entry = index_entry_from_metadata(entry.path.clone(), oid, &metadata);
2059        if updated_entry != *entry {
2060            *entry = updated_entry;
2061            index_dirty = true;
2062        }
2063    }
2064    if index_dirty {
2065        fs::write(&index_path, index.write(format)?)?;
2066    }
2067    if needs_update && !quiet {
2068        return Err(GitError::Exit(1));
2069    }
2070    Ok(UpdateIndexResult {
2071        entries: index.entries.len(),
2072        updated: Vec::new(),
2073    })
2074}
2075
2076fn refresh_all_index_paths_parallel(
2077    worktree_root: &Path,
2078    index_path: &Path,
2079    format: ObjectFormat,
2080    mut index: Index,
2081    stat_cache: IndexStatCache,
2082    quiet: bool,
2083    ignore_missing: bool,
2084) -> Result<UpdateIndexResult> {
2085    let prechecks = tracked_only_non_clean_prechecks_parallel(worktree_root, &index, &stat_cache)?;
2086    let mut needs_update = false;
2087    let mut index_dirty = false;
2088    for precheck in prechecks {
2089        match precheck {
2090            TrackedOnlyPrecheck::Deleted(idx) => {
2091                if ignore_missing {
2092                    continue;
2093                }
2094                if !quiet {
2095                    print_update_index_needs_update(index.entries[idx].path.as_bytes());
2096                }
2097                needs_update = true;
2098            }
2099            TrackedOnlyPrecheck::Slow(idx) => {
2100                let entry = &mut index.entries[idx];
2101                let path = entry.path.as_bytes().to_vec();
2102                let absolute = worktree_root.join(repo_path_to_os_path(&path)?);
2103                let Ok(metadata) = fs::metadata(&absolute) else {
2104                    if ignore_missing {
2105                        continue;
2106                    }
2107                    if !quiet {
2108                        print_update_index_needs_update(&path);
2109                    }
2110                    needs_update = true;
2111                    continue;
2112                };
2113                // Gitlink: never re-read; a directory on disk is up to date (the
2114                // single `sley_index::gitlink_stat_verdict` rule, matching the
2115                // serial path above). Only a type-changed gitlink needs update.
2116                if sley_index::is_gitlink(entry.mode) {
2117                    match sley_index::gitlink_stat_verdict(&metadata) {
2118                        sley_index::GitlinkStatVerdict::Populated => continue,
2119                        sley_index::GitlinkStatVerdict::TypeChanged => {
2120                            if !quiet {
2121                                print_update_index_needs_update(&path);
2122                            }
2123                            needs_update = true;
2124                            continue;
2125                        }
2126                    }
2127                }
2128                if !metadata.is_file() {
2129                    if !quiet {
2130                        print_update_index_needs_update(&path);
2131                    }
2132                    needs_update = true;
2133                    continue;
2134                }
2135                if stat_cache.reuse_index_entry(entry, &metadata).is_some() {
2136                    continue;
2137                }
2138                let body = fs::read(&absolute)?;
2139                let object = EncodedObject::new(ObjectType::Blob, body);
2140                let oid = object.object_id(format)?;
2141                if oid != entry.oid || file_mode(&metadata) != entry.mode {
2142                    if !quiet {
2143                        print_update_index_needs_update(&path);
2144                    }
2145                    needs_update = true;
2146                    continue;
2147                }
2148                let updated_entry = index_entry_from_metadata(entry.path.clone(), oid, &metadata);
2149                if updated_entry != *entry {
2150                    *entry = updated_entry;
2151                    index_dirty = true;
2152                }
2153            }
2154        }
2155    }
2156    if index_dirty {
2157        fs::write(index_path, index.write(format)?)?;
2158    }
2159    if needs_update && !quiet {
2160        return Err(GitError::Exit(1));
2161    }
2162    Ok(UpdateIndexResult {
2163        entries: index.entries.len(),
2164        updated: Vec::new(),
2165    })
2166}
2167
2168pub fn update_index_again(
2169    worktree_root: impl AsRef<Path>,
2170    git_dir: impl AsRef<Path>,
2171    format: ObjectFormat,
2172    paths: &[PathBuf],
2173    options: UpdateIndexOptions,
2174) -> Result<UpdateIndexResult> {
2175    let worktree_root = worktree_root.as_ref();
2176    let git_dir = git_dir.as_ref();
2177    let index_path = repository_index_path(git_dir);
2178    if !index_path.exists() {
2179        return Ok(UpdateIndexResult {
2180            entries: 0,
2181            updated: Vec::new(),
2182        });
2183    }
2184    let index = Index::parse(&fs::read(&index_path)?, format)?;
2185    let db = FileObjectDatabase::from_git_dir(git_dir, format);
2186    let head_entries = head_tree_entries(git_dir, format, &db)?;
2187    let selected_paths = selected_git_paths(worktree_root, paths)?;
2188    let mut again_paths = Vec::new();
2189    for entry in &index.entries {
2190        if index_entry_stage(entry) != 0 {
2191            continue;
2192        }
2193        if !selected_paths.is_empty() && !git_path_selected(entry.path.as_bytes(), &selected_paths)
2194        {
2195            continue;
2196        }
2197        let differs_from_head = match head_entries.get(entry.path.as_bytes()) {
2198            Some(head_entry) => head_entry.oid != entry.oid || head_entry.mode != entry.mode,
2199            None => true,
2200        };
2201        if differs_from_head {
2202            again_paths.push(worktree_root.join(repo_path_to_os_path(entry.path.as_bytes())?));
2203        }
2204    }
2205    if again_paths.is_empty() {
2206        return Ok(UpdateIndexResult {
2207            entries: index.entries.len(),
2208            updated: Vec::new(),
2209        });
2210    }
2211    update_index_paths(worktree_root, git_dir, format, &again_paths, options)
2212}
2213
2214pub fn set_index_assume_unchanged_paths(
2215    worktree_root: impl AsRef<Path>,
2216    git_dir: impl AsRef<Path>,
2217    format: ObjectFormat,
2218    paths: &[PathBuf],
2219    assume_unchanged: bool,
2220) -> Result<UpdateIndexResult> {
2221    let worktree_root = worktree_root.as_ref();
2222    let git_dir = git_dir.as_ref();
2223    let index_path = repository_index_path(git_dir);
2224    let mut index = if index_path.exists() {
2225        Index::parse(&fs::read(&index_path)?, format)?
2226    } else {
2227        Index {
2228            version: 2,
2229            entries: Vec::new(),
2230            extensions: Vec::new(),
2231            checksum: None,
2232        }
2233    };
2234    let selected_paths = paths
2235        .iter()
2236        .map(|path| {
2237            let absolute = if path.is_absolute() {
2238                path.clone()
2239            } else {
2240                worktree_root.join(path)
2241            };
2242            let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
2243                GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
2244            })?;
2245            git_path_bytes(relative)
2246        })
2247        .collect::<Result<Vec<_>>>()?;
2248    for path in selected_paths {
2249        if let Some(entry) = index.entries.iter_mut().find(|entry| entry.path == path) {
2250            if assume_unchanged {
2251                entry.flags |= INDEX_FLAG_ASSUME_UNCHANGED;
2252            } else {
2253                entry.flags &= !INDEX_FLAG_ASSUME_UNCHANGED;
2254            }
2255        }
2256    }
2257    normalize_index_version_for_extended_flags(&mut index);
2258    fs::write(index_path, index.write(format)?)?;
2259    Ok(UpdateIndexResult {
2260        entries: index.entries.len(),
2261        updated: Vec::new(),
2262    })
2263}
2264
2265fn selected_git_paths(worktree_root: &Path, paths: &[PathBuf]) -> Result<BTreeSet<Vec<u8>>> {
2266    paths
2267        .iter()
2268        .map(|path| {
2269            let absolute = if path.is_absolute() {
2270                path.clone()
2271            } else {
2272                worktree_root.join(path)
2273            };
2274            let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
2275                GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
2276            })?;
2277            git_path_bytes(relative)
2278        })
2279        .collect()
2280}
2281
2282fn git_path_selected(path: &[u8], selected_paths: &BTreeSet<Vec<u8>>) -> bool {
2283    selected_paths
2284        .iter()
2285        .any(|selected| path == selected || index_entry_is_under_path(path, selected))
2286}
2287
2288pub fn set_index_skip_worktree_paths(
2289    worktree_root: impl AsRef<Path>,
2290    git_dir: impl AsRef<Path>,
2291    format: ObjectFormat,
2292    paths: &[PathBuf],
2293    skip_worktree: bool,
2294) -> Result<UpdateIndexResult> {
2295    let worktree_root = worktree_root.as_ref();
2296    let git_dir = git_dir.as_ref();
2297    let index_path = repository_index_path(git_dir);
2298    let mut index = if index_path.exists() {
2299        Index::parse(&fs::read(&index_path)?, format)?
2300    } else {
2301        Index {
2302            version: 2,
2303            entries: Vec::new(),
2304            extensions: Vec::new(),
2305            checksum: None,
2306        }
2307    };
2308    let selected_paths = paths
2309        .iter()
2310        .map(|path| {
2311            let absolute = if path.is_absolute() {
2312                path.clone()
2313            } else {
2314                worktree_root.join(path)
2315            };
2316            let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
2317                GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
2318            })?;
2319            git_path_bytes(relative)
2320        })
2321        .collect::<Result<Vec<_>>>()?;
2322    for path in selected_paths {
2323        if let Some(entry) = index.entries.iter_mut().find(|entry| entry.path == path) {
2324            if skip_worktree {
2325                entry.flags |= INDEX_FLAG_EXTENDED;
2326                entry.flags_extended |= INDEX_EXTENDED_FLAG_SKIP_WORKTREE;
2327            } else {
2328                entry.flags_extended &= !INDEX_EXTENDED_FLAG_SKIP_WORKTREE;
2329                if entry.flags_extended == 0 {
2330                    entry.flags &= !INDEX_FLAG_EXTENDED;
2331                }
2332            }
2333        }
2334    }
2335    normalize_index_version_for_extended_flags(&mut index);
2336    fs::write(index_path, index.write(format)?)?;
2337    Ok(UpdateIndexResult {
2338        entries: index.entries.len(),
2339        updated: Vec::new(),
2340    })
2341}
2342
2343pub fn set_index_fsmonitor_valid_paths(
2344    worktree_root: impl AsRef<Path>,
2345    git_dir: impl AsRef<Path>,
2346    format: ObjectFormat,
2347    paths: &[PathBuf],
2348    _fsmonitor_valid: bool,
2349) -> Result<UpdateIndexResult> {
2350    let worktree_root = worktree_root.as_ref();
2351    let git_dir = git_dir.as_ref();
2352    let index_path = repository_index_path(git_dir);
2353    let index = if index_path.exists() {
2354        Index::parse(&fs::read(&index_path)?, format)?
2355    } else {
2356        Index {
2357            version: 2,
2358            entries: Vec::new(),
2359            extensions: Vec::new(),
2360            checksum: None,
2361        }
2362    };
2363    let selected_paths = paths
2364        .iter()
2365        .map(|path| {
2366            let absolute = if path.is_absolute() {
2367                path.clone()
2368            } else {
2369                worktree_root.join(path)
2370            };
2371            let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
2372                GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
2373            })?;
2374            git_path_bytes(relative)
2375        })
2376        .collect::<Result<Vec<_>>>()?;
2377    for path in selected_paths {
2378        if !index.entries.iter().any(|entry| entry.path == path) {
2379            eprintln!(
2380                "fatal: Unable to mark file {}",
2381                String::from_utf8_lossy(&path)
2382            );
2383            return Err(GitError::Exit(128));
2384        }
2385    }
2386    Ok(UpdateIndexResult {
2387        entries: index.entries.len(),
2388        updated: Vec::new(),
2389    })
2390}
2391
2392pub fn set_index_version(
2393    git_dir: impl AsRef<Path>,
2394    format: ObjectFormat,
2395    version: u32,
2396    verbose: bool,
2397) -> Result<UpdateIndexResult> {
2398    if !matches!(version, 2..=4) {
2399        return Err(GitError::Unsupported(format!(
2400            "update-index currently supports --index-version 2, 3, or 4, got {version}"
2401        )));
2402    }
2403    let git_dir = git_dir.as_ref();
2404    let index_path = repository_index_path(git_dir);
2405    let mut index = if index_path.exists() {
2406        Index::parse(&fs::read(&index_path)?, format)?
2407    } else {
2408        Index {
2409            version: 2,
2410            entries: Vec::new(),
2411            extensions: Vec::new(),
2412            checksum: None,
2413        }
2414    };
2415    // git reports the transition unconditionally under --verbose, even when the
2416    // requested version equals the current one ("was 4, set to 4").
2417    let previous = index.version;
2418    if verbose {
2419        println!("index-version: was {previous}, set to {version}");
2420    }
2421    index.version = version;
2422    normalize_index_version_for_extended_flags(&mut index);
2423    fs::write(index_path, index.write(format)?)?;
2424    Ok(UpdateIndexResult {
2425        entries: index.entries.len(),
2426        updated: Vec::new(),
2427    })
2428}
2429
2430pub fn force_write_index(
2431    git_dir: impl AsRef<Path>,
2432    format: ObjectFormat,
2433) -> Result<UpdateIndexResult> {
2434    let git_dir = git_dir.as_ref();
2435    let index_path = repository_index_path(git_dir);
2436    let mut index = if index_path.exists() {
2437        Index::parse(&fs::read(&index_path)?, format)?
2438    } else {
2439        Index {
2440            version: 2,
2441            entries: Vec::new(),
2442            extensions: Vec::new(),
2443            checksum: None,
2444        }
2445    };
2446    normalize_index_version_for_extended_flags(&mut index);
2447    fs::write(index_path, index.write(format)?)?;
2448    Ok(UpdateIndexResult {
2449        entries: index.entries.len(),
2450        updated: Vec::new(),
2451    })
2452}
2453
2454fn index_extensions_without_cache_tree(extensions: &[u8]) -> Vec<u8> {
2455    let mut offset = 0;
2456    let mut filtered = Vec::new();
2457    while offset < extensions.len() {
2458        if extensions.len().saturating_sub(offset) < 8 {
2459            return Vec::new();
2460        }
2461        let signature = &extensions[offset..offset + 4];
2462        let size = u32::from_be_bytes([
2463            extensions[offset + 4],
2464            extensions[offset + 5],
2465            extensions[offset + 6],
2466            extensions[offset + 7],
2467        ]) as usize;
2468        let end = offset + 8 + size;
2469        if end > extensions.len() {
2470            return Vec::new();
2471        }
2472        if signature != b"TREE" {
2473            filtered.extend_from_slice(&extensions[offset..end]);
2474        }
2475        offset = end;
2476    }
2477    filtered
2478}
2479
2480pub fn update_index_cacheinfo(
2481    git_dir: impl AsRef<Path>,
2482    format: ObjectFormat,
2483    entries: &[CacheInfoEntry],
2484    add: bool,
2485    verbose: bool,
2486) -> Result<UpdateIndexResult> {
2487    let git_dir = git_dir.as_ref();
2488    let index_path = repository_index_path(git_dir);
2489    let mut index = if index_path.exists() {
2490        Index::parse(&fs::read(&index_path)?, format)?
2491    } else {
2492        Index {
2493            version: 2,
2494            entries: Vec::new(),
2495            extensions: Vec::new(),
2496            checksum: None,
2497        }
2498    };
2499    let mut updated = Vec::new();
2500    let mut reports: Vec<String> = Vec::new();
2501    for cacheinfo in entries {
2502        if !add
2503            && !index
2504                .entries
2505                .iter()
2506                .any(|existing| existing.path == cacheinfo.path)
2507        {
2508            let path = String::from_utf8_lossy(&cacheinfo.path);
2509            eprintln!("error: {path}: cannot add to the index - missing --add option?");
2510            eprintln!("fatal: git update-index: --cacheinfo cannot add {path}");
2511            return Err(GitError::Exit(128));
2512        }
2513        let flags = index_flags(cacheinfo.path.len(), cacheinfo.stage);
2514        let entry = IndexEntry {
2515            ctime_seconds: 0,
2516            ctime_nanoseconds: 0,
2517            mtime_seconds: 0,
2518            mtime_nanoseconds: 0,
2519            dev: 0,
2520            ino: 0,
2521            mode: cacheinfo.mode,
2522            uid: 0,
2523            gid: 0,
2524            size: 0,
2525            oid: cacheinfo.oid,
2526            flags,
2527            flags_extended: 0,
2528            path: BString::from(cacheinfo.path.as_slice()),
2529        };
2530        index.entries.retain(|existing| {
2531            existing.path != cacheinfo.path || index_entry_stage(existing) != cacheinfo.stage
2532        });
2533        index.entries.push(entry);
2534        updated.push(cacheinfo.oid);
2535        // git's add_cacheinfo() calls report("add '%s'") *after* the entry is
2536        // staged, regardless of whether the subsequent index write succeeds.
2537        reports.push(format!(
2538            "add '{}'",
2539            String::from_utf8_lossy(&cacheinfo.path)
2540        ));
2541    }
2542    index
2543        .entries
2544        .sort_by(|left, right| left.path.cmp(&right.path));
2545    // git refuses to write an index entry whose object id is the null oid:
2546    // do_write_index() emits `error: cache entry has null sha1: <path>` and
2547    // returns nonzero, leaving the on-disk index untouched. The verbose `add`
2548    // line has already been printed by then.
2549    let null_entry = index.entries.iter().find(|entry| entry.oid.is_null());
2550    if let Some(entry) = null_entry {
2551        if verbose {
2552            flush_update_index_reports(&reports)?;
2553        }
2554        eprintln!(
2555            "error: cache entry has null sha1: {}",
2556            String::from_utf8_lossy(&entry.path)
2557        );
2558        return Err(GitError::Exit(128));
2559    }
2560    fs::write(index_path, index.write(format)?)?;
2561    if verbose {
2562        flush_update_index_reports(&reports)?;
2563    }
2564    Ok(UpdateIndexResult {
2565        entries: index.entries.len(),
2566        updated,
2567    })
2568}
2569
2570fn flush_update_index_reports(reports: &[String]) -> Result<()> {
2571    let mut stdout = std::io::stdout().lock();
2572    for line in reports {
2573        writeln!(stdout, "{line}")?;
2574    }
2575    stdout.flush()?;
2576    Ok(())
2577}
2578
2579pub fn update_index_index_info(
2580    git_dir: impl AsRef<Path>,
2581    format: ObjectFormat,
2582    records: &[IndexInfoRecord],
2583) -> Result<UpdateIndexResult> {
2584    let git_dir = git_dir.as_ref();
2585    let index_path = repository_index_path(git_dir);
2586    let mut index = if index_path.exists() {
2587        Index::parse(&fs::read(&index_path)?, format)?
2588    } else {
2589        Index {
2590            version: 2,
2591            entries: Vec::new(),
2592            extensions: Vec::new(),
2593            checksum: None,
2594        }
2595    };
2596    let mut updated = Vec::new();
2597    for record in records {
2598        match record {
2599            IndexInfoRecord::Remove { path } => {
2600                index.entries.retain(|existing| existing.path != *path);
2601            }
2602            IndexInfoRecord::Add(cacheinfo) => {
2603                let flags = index_flags(cacheinfo.path.len(), cacheinfo.stage);
2604                let entry = IndexEntry {
2605                    ctime_seconds: 0,
2606                    ctime_nanoseconds: 0,
2607                    mtime_seconds: 0,
2608                    mtime_nanoseconds: 0,
2609                    dev: 0,
2610                    ino: 0,
2611                    mode: cacheinfo.mode,
2612                    uid: 0,
2613                    gid: 0,
2614                    size: 0,
2615                    oid: cacheinfo.oid,
2616                    flags,
2617                    flags_extended: 0,
2618                    path: BString::from(cacheinfo.path.as_slice()),
2619                };
2620                if cacheinfo.stage == 0 {
2621                    index
2622                        .entries
2623                        .retain(|existing| existing.path != cacheinfo.path);
2624                } else {
2625                    index.entries.retain(|existing| {
2626                        existing.path != cacheinfo.path
2627                            || index_entry_stage(existing) != cacheinfo.stage
2628                    });
2629                }
2630                index.entries.push(entry);
2631                updated.push(cacheinfo.oid);
2632            }
2633        }
2634    }
2635    index.entries.sort_by(|left, right| {
2636        left.path
2637            .cmp(&right.path)
2638            .then_with(|| index_entry_stage(left).cmp(&index_entry_stage(right)))
2639    });
2640    fs::write(index_path, index.write(format)?)?;
2641    Ok(UpdateIndexResult {
2642        entries: index.entries.len(),
2643        updated,
2644    })
2645}
2646
2647fn index_flags(path_len: usize, stage: u16) -> u16 {
2648    ((stage & 0x3) << 12) | ((path_len.min(0xfff) as u16) & 0x0fff)
2649}
2650
2651const INDEX_FLAG_ASSUME_UNCHANGED: u16 = 0x8000;
2652const INDEX_FLAG_EXTENDED: u16 = 0x4000;
2653const INDEX_EXTENDED_FLAG_SKIP_WORKTREE: u16 = 0x4000;
2654
2655fn normalize_index_version_for_extended_flags(index: &mut Index) {
2656    let has_extended_flags = index
2657        .entries
2658        .iter()
2659        .any(|entry| entry.flags & INDEX_FLAG_EXTENDED != 0 || entry.flags_extended != 0);
2660    if has_extended_flags && index.version < 3 {
2661        index.version = 3;
2662    } else if !has_extended_flags && index.version == 3 {
2663        index.version = 2;
2664    }
2665}
2666
2667fn index_entry_stage(entry: &IndexEntry) -> u16 {
2668    (entry.flags >> 12) & 0x3
2669}
2670
2671/// The oid of the stage-0 entry in `range` (the path's currently-tracked blob),
2672/// if any. Used by the safecrlf check to fetch `has_crlf_in_index`.
2673fn stage0_oid_in_range(entries: &[IndexEntry], range: std::ops::Range<usize>) -> Option<ObjectId> {
2674    entries[range]
2675        .iter()
2676        .find(|entry| index_entry_stage(entry) == 0)
2677        .map(|entry| entry.oid)
2678}
2679
2680fn index_entry_skip_worktree(entry: &IndexEntry) -> bool {
2681    entry.flags & INDEX_FLAG_EXTENDED != 0
2682        && entry.flags_extended & INDEX_EXTENDED_FLAG_SKIP_WORKTREE != 0
2683}
2684
2685fn print_update_index_path_error(path: &[u8], message: &str) {
2686    let path = String::from_utf8_lossy(path);
2687    eprintln!("error: {path}: {message}");
2688    eprintln!("fatal: Unable to process path {path}");
2689}
2690
2691fn print_update_index_needs_update(path: &[u8]) {
2692    let path = String::from_utf8_lossy(path);
2693    println!("{path}: needs update");
2694}
2695
2696pub fn write_tree_from_index(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Result<ObjectId> {
2697    write_tree_from_index_with_options(git_dir, format, WriteTreeOptions::default())
2698}
2699
2700pub fn write_tree_from_index_with_odb(
2701    git_dir: impl AsRef<Path>,
2702    format: ObjectFormat,
2703    odb: &FileObjectDatabase,
2704) -> Result<ObjectId> {
2705    write_tree_from_index_with_options_and_odb(
2706        git_dir.as_ref(),
2707        format,
2708        WriteTreeOptions::default(),
2709        odb,
2710    )
2711}
2712
2713pub fn write_tree_from_index_with_options(
2714    git_dir: impl AsRef<Path>,
2715    format: ObjectFormat,
2716    options: WriteTreeOptions,
2717) -> Result<ObjectId> {
2718    let git_dir = git_dir.as_ref();
2719    let odb = FileObjectDatabase::from_git_dir(git_dir, format);
2720    write_tree_from_index_with_options_and_odb(git_dir, format, options, &odb)
2721}
2722
2723fn write_tree_from_index_with_options_and_odb(
2724    git_dir: &Path,
2725    format: ObjectFormat,
2726    options: WriteTreeOptions,
2727    odb: &FileObjectDatabase,
2728) -> Result<ObjectId> {
2729    let index_path = repository_index_path(git_dir);
2730    // A repository with no index file yet (fresh init, nothing staged) is an
2731    // empty index: `git write-tree` / `git commit --allow-empty` produce the
2732    // empty tree rather than erroring.
2733    let index_bytes = match fs::read(&index_path) {
2734        Ok(bytes) => bytes,
2735        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
2736            let mut checker = odb.presence_checker();
2737            let empty: &[WriteTreeEntry<'_>] = &[];
2738            return write_tree_entries_stream(
2739                empty,
2740                b"",
2741                None,
2742                odb,
2743                &mut checker,
2744                options.missing_ok,
2745            );
2746        }
2747        Err(err) => return Err(err.into()),
2748    };
2749    let mut checker = odb.presence_checker();
2750    match BorrowedIndex::parse(&index_bytes, format) {
2751        Ok(index) => write_tree_from_borrowed_index(&index, format, &options, odb, &mut checker),
2752        Err(GitError::Unsupported(_)) => {
2753            let index = Index::parse(&index_bytes, format)?;
2754            write_tree_from_owned_index(&index, format, &options, odb, &mut checker)
2755        }
2756        Err(err) => Err(err),
2757    }
2758}
2759
2760fn write_tree_from_borrowed_index(
2761    index: &BorrowedIndex<'_>,
2762    format: ObjectFormat,
2763    options: &WriteTreeOptions,
2764    odb: &FileObjectDatabase,
2765    checker: &mut ObjectPresenceChecker,
2766) -> Result<ObjectId> {
2767    let cache_tree = if options.prefix.is_none() {
2768        index.cache_tree(format).ok().flatten()
2769    } else {
2770        None
2771    };
2772    if options.prefix.is_none() && !index.entries.iter().any(|entry| entry.is_intent_to_add()) {
2773        return write_tree_entries_stream(
2774            &index.entries,
2775            b"",
2776            cache_tree.as_ref(),
2777            odb,
2778            checker,
2779            options.missing_ok,
2780        );
2781    }
2782    // intent-to-add entries (`git add -N`, `git reset -N`) are placeholders that do
2783    // NOT belong in a written tree — git's cache_tree_update skips CE_INTENT_TO_ADD.
2784    // Drop them before building, so `write-tree` succeeds and the tree omits them
2785    // (their empty-blob oid is also typically absent from the odb).
2786    let entries = write_tree_entries_for_prefix(
2787        index
2788            .entries
2789            .iter()
2790            .filter(|entry| !entry.is_intent_to_add()),
2791        options.prefix.as_deref(),
2792    )?;
2793    write_tree_entries_stream(
2794        &entries,
2795        b"",
2796        cache_tree.as_ref(),
2797        odb,
2798        checker,
2799        options.missing_ok,
2800    )
2801}
2802
2803fn write_tree_from_owned_index(
2804    index: &Index,
2805    format: ObjectFormat,
2806    options: &WriteTreeOptions,
2807    odb: &FileObjectDatabase,
2808    checker: &mut ObjectPresenceChecker,
2809) -> Result<ObjectId> {
2810    let cache_tree = if options.prefix.is_none() {
2811        index.cache_tree(format).ok().flatten()
2812    } else {
2813        None
2814    };
2815    if options.prefix.is_none() && !index.entries.iter().any(|entry| entry.is_intent_to_add()) {
2816        return write_tree_entries_stream(
2817            &index.entries,
2818            b"",
2819            cache_tree.as_ref(),
2820            odb,
2821            checker,
2822            options.missing_ok,
2823        );
2824    }
2825    let entries = write_tree_entries_for_prefix(
2826        index
2827            .entries
2828            .iter()
2829            .filter(|entry| !entry.is_intent_to_add()),
2830        options.prefix.as_deref(),
2831    )?;
2832    write_tree_entries_stream(
2833        &entries,
2834        b"",
2835        cache_tree.as_ref(),
2836        odb,
2837        checker,
2838        options.missing_ok,
2839    )
2840}
2841
2842#[derive(Clone, Copy)]
2843struct WriteTreeEntry<'a> {
2844    path: &'a [u8],
2845    mode: u32,
2846    oid: ObjectId,
2847}
2848
2849trait WriteTreeIndexEntry {
2850    fn write_tree_path(&self) -> &[u8];
2851    fn write_tree_mode(&self) -> u32;
2852    fn write_tree_oid(&self) -> ObjectId;
2853}
2854
2855impl WriteTreeIndexEntry for IndexEntry {
2856    fn write_tree_path(&self) -> &[u8] {
2857        self.path.as_bytes()
2858    }
2859
2860    fn write_tree_mode(&self) -> u32 {
2861        self.mode
2862    }
2863
2864    fn write_tree_oid(&self) -> ObjectId {
2865        self.oid
2866    }
2867}
2868
2869impl WriteTreeIndexEntry for IndexEntryRef<'_> {
2870    fn write_tree_path(&self) -> &[u8] {
2871        self.path
2872    }
2873
2874    fn write_tree_mode(&self) -> u32 {
2875        self.mode
2876    }
2877
2878    fn write_tree_oid(&self) -> ObjectId {
2879        self.oid
2880    }
2881}
2882
2883impl WriteTreeIndexEntry for WriteTreeEntry<'_> {
2884    fn write_tree_path(&self) -> &[u8] {
2885        self.path
2886    }
2887
2888    fn write_tree_mode(&self) -> u32 {
2889        self.mode
2890    }
2891
2892    fn write_tree_oid(&self) -> ObjectId {
2893        self.oid
2894    }
2895}
2896
2897fn write_tree_entries_for_prefix<'a, E>(
2898    entries: impl IntoIterator<Item = &'a E>,
2899    prefix: Option<&[u8]>,
2900) -> Result<Vec<WriteTreeEntry<'a>>>
2901where
2902    E: WriteTreeIndexEntry + 'a,
2903{
2904    let Some(prefix) = prefix else {
2905        return Ok(entries
2906            .into_iter()
2907            .map(|entry| WriteTreeEntry {
2908                path: entry.write_tree_path(),
2909                mode: entry.write_tree_mode(),
2910                oid: entry.write_tree_oid(),
2911            })
2912            .collect());
2913    };
2914    let trimmed_len = prefix
2915        .iter()
2916        .rposition(|byte| *byte != b'/')
2917        .map(|idx| idx + 1)
2918        .unwrap_or(0);
2919    let trimmed = &prefix[..trimmed_len];
2920    if trimmed.is_empty() {
2921        return Ok(entries
2922            .into_iter()
2923            .map(|entry| WriteTreeEntry {
2924                path: entry.write_tree_path(),
2925                mode: entry.write_tree_mode(),
2926                oid: entry.write_tree_oid(),
2927            })
2928            .collect());
2929    }
2930    let mut prefixed = Vec::new();
2931    for entry in entries {
2932        let Some(remainder) = entry.write_tree_path().strip_prefix(trimmed) else {
2933            continue;
2934        };
2935        let Some(stripped) = remainder.strip_prefix(b"/") else {
2936            continue;
2937        };
2938        if stripped.is_empty() {
2939            continue;
2940        }
2941        prefixed.push(WriteTreeEntry {
2942            path: stripped,
2943            mode: entry.write_tree_mode(),
2944            oid: entry.write_tree_oid(),
2945        });
2946    }
2947    if prefixed.is_empty() {
2948        eprintln!(
2949            "fatal: git-write-tree: prefix {} not found",
2950            String::from_utf8_lossy(prefix)
2951        );
2952        return Err(GitError::Exit(128));
2953    }
2954    Ok(prefixed)
2955}
2956
2957fn write_tree_entries_stream<E>(
2958    entries: &[E],
2959    prefix: &[u8],
2960    cache_tree: Option<&CacheTree>,
2961    odb: &FileObjectDatabase,
2962    checker: &mut ObjectPresenceChecker,
2963    missing_ok: bool,
2964) -> Result<ObjectId>
2965where
2966    E: WriteTreeIndexEntry,
2967{
2968    if let Some(oid) = valid_cache_tree_oid(cache_tree, entries.len()) {
2969        return Ok(oid);
2970    }
2971
2972    let mut tree_entries = Vec::new();
2973    let mut index = 0usize;
2974    while index < entries.len() {
2975        let entry = &entries[index];
2976        let path = entry.write_tree_path();
2977        let Some(remainder) = path.strip_prefix(prefix) else {
2978            return Err(GitError::InvalidPath(format!(
2979                "invalid index path {}",
2980                String::from_utf8_lossy(path)
2981            )));
2982        };
2983        if remainder.is_empty() || remainder[0] == b'/' {
2984            return Err(GitError::InvalidPath(format!(
2985                "invalid index path {}",
2986                String::from_utf8_lossy(path)
2987            )));
2988        }
2989
2990        if let Some(slash) = remainder.iter().position(|byte| *byte == b'/') {
2991            let name = &remainder[..slash];
2992            if name.is_empty() {
2993                return Err(GitError::InvalidPath(format!(
2994                    "invalid index path {}",
2995                    String::from_utf8_lossy(path)
2996                )));
2997            }
2998            let start = index;
2999            let child_cache = cache_tree.and_then(|tree| {
3000                tree.subtrees
3001                    .iter()
3002                    .find(|child| child.name.as_slice() == name)
3003                    .map(|child| &child.tree)
3004            });
3005            if let Some(cached_count) = valid_cache_tree_entry_count(child_cache) {
3006                let end = start.saturating_add(cached_count);
3007                if cached_count > 0
3008                    && end <= entries.len()
3009                    && same_tree_component(entries[end - 1].write_tree_path(), prefix, name)?
3010                    && (end == entries.len()
3011                        || !same_tree_component(entries[end].write_tree_path(), prefix, name)?)
3012                {
3013                    index = end;
3014                } else {
3015                    index += 1;
3016                    while index < entries.len()
3017                        && same_tree_component(entries[index].write_tree_path(), prefix, name)?
3018                    {
3019                        index += 1;
3020                    }
3021                }
3022            } else {
3023                index += 1;
3024                while index < entries.len()
3025                    && same_tree_component(entries[index].write_tree_path(), prefix, name)?
3026                {
3027                    index += 1;
3028                }
3029            }
3030            if let Some(oid) = valid_cache_tree_oid(child_cache, index - start) {
3031                tree_entries.push(TreeEntry {
3032                    mode: 0o040000,
3033                    name: BString::from(name),
3034                    oid,
3035                });
3036                continue;
3037            }
3038            let mut child_prefix = Vec::with_capacity(prefix.len() + name.len() + 1);
3039            child_prefix.extend_from_slice(prefix);
3040            child_prefix.extend_from_slice(name);
3041            child_prefix.push(b'/');
3042            let oid = write_tree_entries_stream(
3043                &entries[start..index],
3044                &child_prefix,
3045                child_cache,
3046                odb,
3047                checker,
3048                missing_ok,
3049            )?;
3050            tree_entries.push(TreeEntry {
3051                mode: 0o040000,
3052                name: BString::from(name),
3053                oid,
3054            });
3055            continue;
3056        }
3057
3058        let mode = entry.write_tree_mode();
3059        let oid = entry.write_tree_oid();
3060        if !missing_ok && !sley_index::is_gitlink(mode) && !checker.contains(&oid)? {
3061            eprintln!(
3062                "error: invalid object {:o} {} for '{}'",
3063                mode,
3064                oid,
3065                String::from_utf8_lossy(path)
3066            );
3067            eprintln!("fatal: git-write-tree: error building trees");
3068            return Err(GitError::Exit(128));
3069        }
3070        tree_entries.push(TreeEntry {
3071            mode,
3072            name: BString::from(remainder),
3073            oid,
3074        });
3075        index += 1;
3076    }
3077
3078    tree_entries.sort_by(|left, right| {
3079        git_tree_entry_cmp(
3080            left.name.as_bytes(),
3081            left.mode,
3082            right.name.as_bytes(),
3083            right.mode,
3084        )
3085    });
3086    odb.write_object(EncodedObject::new(
3087        ObjectType::Tree,
3088        Tree {
3089            entries: tree_entries,
3090        }
3091        .write(),
3092    ))
3093}
3094
3095fn valid_cache_tree_oid(tree: Option<&CacheTree>, entry_count: usize) -> Option<ObjectId> {
3096    let tree = tree?;
3097    if valid_cache_tree_entry_count(Some(tree))? != entry_count {
3098        return None;
3099    }
3100    tree.oid
3101}
3102
3103fn valid_cache_tree_entry_count(tree: Option<&CacheTree>) -> Option<usize> {
3104    let tree = tree?;
3105    if tree.entry_count < 0 || tree.oid.is_none() {
3106        return None;
3107    }
3108    Some(tree.entry_count as usize)
3109}
3110
3111fn same_tree_component(path: &[u8], prefix: &[u8], name: &[u8]) -> Result<bool> {
3112    let Some(remainder) = path.strip_prefix(prefix) else {
3113        return Err(GitError::InvalidPath(format!(
3114            "invalid index path {}",
3115            String::from_utf8_lossy(path)
3116        )));
3117    };
3118    Ok(remainder.starts_with(name) && remainder.get(name.len()) == Some(&b'/'))
3119}
3120
3121pub fn stream_short_status<F>(
3122    worktree_root: impl AsRef<Path>,
3123    git_dir: impl AsRef<Path>,
3124    format: ObjectFormat,
3125    emit: F,
3126) -> Result<()>
3127where
3128    F: for<'a> FnMut(ShortStatusRow<'a>) -> Result<StreamControl>,
3129{
3130    stream_short_status_with_options(
3131        worktree_root,
3132        git_dir,
3133        format,
3134        ShortStatusOptions::default(),
3135        emit,
3136    )
3137}
3138
3139pub fn short_status_count(
3140    worktree_root: impl AsRef<Path>,
3141    git_dir: impl AsRef<Path>,
3142    format: ObjectFormat,
3143) -> Result<usize> {
3144    short_status_count_with_options(
3145        worktree_root,
3146        git_dir,
3147        format,
3148        ShortStatusOptions::default(),
3149    )
3150}
3151
3152pub fn short_status_count_with_options(
3153    worktree_root: impl AsRef<Path>,
3154    git_dir: impl AsRef<Path>,
3155    format: ObjectFormat,
3156    options: ShortStatusOptions,
3157) -> Result<usize> {
3158    let worktree_root = worktree_root.as_ref();
3159    let git_dir = git_dir.as_ref();
3160    let db = FileObjectDatabase::from_git_dir(git_dir, format);
3161    if !options.include_ignored
3162        && let Some(count) = short_status_borrowed_head_matches_index_count_if_possible(
3163            worktree_root,
3164            git_dir,
3165            format,
3166            &db,
3167            options.untracked_mode,
3168        )?
3169    {
3170        return Ok(count);
3171    }
3172    let mut count = 0usize;
3173    stream_short_status_with_options(worktree_root, git_dir, format, options, |_| {
3174        count += 1;
3175        Ok(StreamControl::Continue)
3176    })?;
3177    Ok(count)
3178}
3179
3180#[derive(Debug, Clone, Default)]
3181struct StatusProfileCounters {
3182    fast_path_borrowed: bool,
3183    read_dir_calls: u64,
3184    dir_entries_seen: u64,
3185    file_type_calls: u64,
3186    ignore_checks: u64,
3187    ignore_pattern_tests: u64,
3188    ignore_glob_fallback_tests: u64,
3189    tracked_exact_hits: u64,
3190    tracked_dir_prefix_hits: u64,
3191    tracked_skip_worktree_prefix_hits: u64,
3192    untracked_rows: u64,
3193    tracked_elapsed_us: u128,
3194    untracked_elapsed_us: u128,
3195    render_elapsed_us: u128,
3196    overlap_enabled: bool,
3197}
3198
3199impl StatusProfileCounters {
3200    fn enabled() -> bool {
3201        std::env::var_os("SLEY_STATUS_PROFILE").is_some_and(|value| value != "0")
3202    }
3203
3204    fn merge_untracked(&mut self, other: StatusProfileCounters) {
3205        self.read_dir_calls += other.read_dir_calls;
3206        self.dir_entries_seen += other.dir_entries_seen;
3207        self.file_type_calls += other.file_type_calls;
3208        self.ignore_checks += other.ignore_checks;
3209        self.ignore_pattern_tests += other.ignore_pattern_tests;
3210        self.ignore_glob_fallback_tests += other.ignore_glob_fallback_tests;
3211        self.tracked_exact_hits += other.tracked_exact_hits;
3212        self.tracked_dir_prefix_hits += other.tracked_dir_prefix_hits;
3213        self.tracked_skip_worktree_prefix_hits += other.tracked_skip_worktree_prefix_hits;
3214        self.untracked_rows += other.untracked_rows;
3215        self.untracked_elapsed_us += other.untracked_elapsed_us;
3216    }
3217
3218    fn emit(&self) {
3219        eprintln!(
3220            "{{\"schema\":\"sley.status.profile.v1\",\
3221             \"fast_path_borrowed\":{},\
3222             \"read_dir_calls\":{},\
3223             \"dir_entries_seen\":{},\
3224             \"file_type_calls\":{},\
3225             \"ignore_checks\":{},\
3226             \"ignore_pattern_tests\":{},\
3227             \"ignore_glob_fallback_tests\":{},\
3228             \"tracked_exact_hits\":{},\
3229             \"tracked_dir_prefix_hits\":{},\
3230             \"tracked_skip_worktree_prefix_hits\":{},\
3231             \"untracked_rows\":{},\
3232             \"tracked_elapsed_us\":{},\
3233             \"untracked_elapsed_us\":{},\
3234             \"render_elapsed_us\":{},\
3235             \"overlap_enabled\":{}}}",
3236            self.fast_path_borrowed,
3237            self.read_dir_calls,
3238            self.dir_entries_seen,
3239            self.file_type_calls,
3240            self.ignore_checks,
3241            self.ignore_pattern_tests,
3242            self.ignore_glob_fallback_tests,
3243            self.tracked_exact_hits,
3244            self.tracked_dir_prefix_hits,
3245            self.tracked_skip_worktree_prefix_hits,
3246            self.untracked_rows,
3247            self.tracked_elapsed_us,
3248            self.untracked_elapsed_us,
3249            self.render_elapsed_us,
3250            self.overlap_enabled
3251        );
3252    }
3253}
3254
3255/// Compare one expected tracked entry to the worktree path named by `path`.
3256///
3257/// `path` is repository-relative and uses the platform path representation. For
3258/// callers that already carry git's byte path form, use
3259/// [`worktree_entry_state_by_git_path`].
3260pub fn worktree_entry_state(
3261    worktree_root: impl AsRef<Path>,
3262    git_dir: impl AsRef<Path>,
3263    format: ObjectFormat,
3264    path: impl AsRef<Path>,
3265    expected_oid: &ObjectId,
3266    expected_mode: u32,
3267    index_probe: Option<&IndexStatProbe>,
3268) -> Result<WorktreeEntryState> {
3269    let path = path.as_ref();
3270    if path.is_absolute() {
3271        return Err(GitError::InvalidPath(format!(
3272            "worktree entry path {} is absolute",
3273            path.display()
3274        )));
3275    }
3276    let git_path = git_path_bytes(path)?;
3277    worktree_entry_state_by_git_path(
3278        worktree_root,
3279        git_dir,
3280        format,
3281        &git_path,
3282        expected_oid,
3283        expected_mode,
3284        index_probe,
3285    )
3286}
3287
3288/// Compare one expected tracked entry to the worktree path named by a
3289/// repository-relative git path (`/` separators, raw bytes).
3290///
3291/// The comparison uses the same clean-filter, symlink-target, gitlink, and
3292/// racy-clean stat shortcut rules as [`stream_short_status_with_options`].
3293pub fn worktree_entry_state_by_git_path(
3294    worktree_root: impl AsRef<Path>,
3295    git_dir: impl AsRef<Path>,
3296    format: ObjectFormat,
3297    git_path: &[u8],
3298    expected_oid: &ObjectId,
3299    expected_mode: u32,
3300    index_probe: Option<&IndexStatProbe>,
3301) -> Result<WorktreeEntryState> {
3302    let worktree_root = worktree_root.as_ref();
3303    let git_dir = git_dir.as_ref();
3304    let stat_cache =
3305        index_probe.and_then(|probe| probe.stat_cache_for(git_path, expected_oid, expected_mode));
3306    let Some(worktree_entry) = worktree_entry_for_git_path(
3307        worktree_root,
3308        git_dir,
3309        format,
3310        git_path,
3311        expected_oid,
3312        expected_mode,
3313        stat_cache.as_ref(),
3314    )?
3315    else {
3316        return Ok(WorktreeEntryState::Deleted);
3317    };
3318    if worktree_entry.mode == expected_mode && worktree_entry.oid == *expected_oid {
3319        Ok(WorktreeEntryState::Clean)
3320    } else {
3321        Ok(WorktreeEntryState::Modified)
3322    }
3323}
3324
3325pub fn stream_short_status_with_options<F>(
3326    worktree_root: impl AsRef<Path>,
3327    git_dir: impl AsRef<Path>,
3328    format: ObjectFormat,
3329    options: ShortStatusOptions,
3330    mut emit: F,
3331) -> Result<()>
3332where
3333    F: for<'a> FnMut(ShortStatusRow<'a>) -> Result<StreamControl>,
3334{
3335    let worktree_root = worktree_root.as_ref();
3336    let git_dir = git_dir.as_ref();
3337    let db = FileObjectDatabase::from_git_dir(git_dir, format);
3338    if !options.include_ignored
3339        && let Some(()) = stream_short_status_borrowed_head_matches_index_if_possible(
3340            worktree_root,
3341            git_dir,
3342            format,
3343            &db,
3344            options.untracked_mode,
3345            &mut emit,
3346        )?
3347    {
3348        return Ok(());
3349    }
3350    for entry in collect_short_status_with_options(worktree_root, git_dir, format, options)? {
3351        if emit(entry.as_row())?.is_stop() {
3352            break;
3353        }
3354    }
3355    Ok(())
3356}
3357
3358fn collect_short_status_with_options(
3359    worktree_root: impl AsRef<Path>,
3360    git_dir: impl AsRef<Path>,
3361    format: ObjectFormat,
3362    options: ShortStatusOptions,
3363) -> Result<Vec<ShortStatusEntry>> {
3364    let worktree_root = worktree_root.as_ref();
3365    let git_dir = git_dir.as_ref();
3366    let db = FileObjectDatabase::from_git_dir(git_dir, format);
3367    if !options.include_ignored
3368        && let Some(entries) = short_status_borrowed_head_matches_index_if_possible(
3369            worktree_root,
3370            git_dir,
3371            format,
3372            &db,
3373            options.untracked_mode,
3374        )?
3375    {
3376        return Ok(entries);
3377    }
3378    // Parse the index once: the stat cache lets the worktree walk skip
3379    // re-hashing files whose stat proves they are unchanged since staging
3380    // (git's racy-git shortcut). When HEAD matches the index, the status
3381    // comparison can stream directly from the parsed index and avoid building a
3382    // second path-sorted copy of every tracked entry.
3383    let (parsed_index, stat_cache, head_matches_index) =
3384        read_index_with_stat_cache(git_dir, format, &db)?;
3385    if head_matches_index && !options.include_ignored {
3386        let mut ignores = IgnoreMatcher::from_worktree_base(worktree_root)?;
3387        let entries = short_status_tracked_only(
3388            worktree_root,
3389            git_dir,
3390            format,
3391            &db,
3392            &parsed_index,
3393            &stat_cache,
3394            true,
3395            options.untracked_mode,
3396        );
3397        let mut entries = entries?;
3398        let untracked_paths = status_untracked_paths_from_index(
3399            worktree_root,
3400            git_dir,
3401            &parsed_index,
3402            &stat_cache,
3403            &mut ignores,
3404            options.untracked_mode,
3405            None,
3406        )?;
3407        for path in untracked_paths {
3408            entries.push(ShortStatusEntry {
3409                index: b'?',
3410                worktree: b'?',
3411                path,
3412                head_mode: None,
3413                index_mode: None,
3414                worktree_mode: None,
3415                head_oid: None,
3416                index_oid: None,
3417                submodule: None,
3418            });
3419        }
3420        return Ok(entries);
3421    }
3422    let index = index_entries_from_index(parsed_index);
3423    let head = if head_matches_index {
3424        None
3425    } else {
3426        Some(head_tree_entries(git_dir, format, &db)?)
3427    };
3428    let tracked_paths = if options.untracked_mode == StatusUntrackedMode::None {
3429        Some(index.keys().cloned().collect::<BTreeSet<_>>())
3430    } else {
3431        None
3432    };
3433    let mut ignores = IgnoreMatcher::from_worktree_base(worktree_root)?;
3434    let (worktree, submodule_dirt_map, tracked_presence) =
3435        status_worktree_entries_with_submodule_dirt(
3436            worktree_root,
3437            git_dir,
3438            format,
3439            &stat_cache,
3440            tracked_paths.as_ref(),
3441            Some(&mut ignores),
3442        )?;
3443    let mut entries = Vec::new();
3444    if head_matches_index {
3445        collect_status_entries_head_matches_index(
3446            &index,
3447            &worktree,
3448            &tracked_presence,
3449            &submodule_dirt_map,
3450            options.untracked_mode,
3451            &mut entries,
3452        );
3453    } else if let Some(head) = head.as_ref() {
3454        collect_status_entries_with_head(
3455            StatusComparisonInputs {
3456                head,
3457                index: &index,
3458                worktree: &worktree,
3459                tracked_presence: &tracked_presence,
3460                submodule_dirt_map: &submodule_dirt_map,
3461                ignores: &ignores,
3462            },
3463            options.untracked_mode,
3464            &mut entries,
3465        );
3466    }
3467    if options.include_ignored {
3468        let ignored_paths =
3469            ignored_untracked_paths(worktree_root, git_dir, &index, &ignores, true)?;
3470        let ignored_paths: Vec<Vec<u8>> = match options.ignored_mode {
3471            StatusIgnoredMode::Matching => ignored_paths,
3472            StatusIgnoredMode::Traditional => {
3473                let mut rolled = BTreeSet::new();
3474                for path in ignored_paths {
3475                    let path = ignored_traditional_rollup_path(
3476                        worktree_root,
3477                        git_dir,
3478                        &path,
3479                        &index,
3480                        &ignores,
3481                    )?;
3482                    if ignored_traditional_path_is_empty_directory(worktree_root, &path)? {
3483                        continue;
3484                    }
3485                    rolled.insert(path);
3486                }
3487                rolled.into_iter().collect()
3488            }
3489        };
3490        for path in ignored_paths {
3491            entries.push(ShortStatusEntry {
3492                index: b'!',
3493                worktree: b'!',
3494                path,
3495                head_mode: None,
3496                index_mode: None,
3497                worktree_mode: None,
3498                head_oid: None,
3499                index_oid: None,
3500                submodule: None,
3501            });
3502        }
3503    }
3504    let untracked_paths: Vec<Vec<u8>> = match options.untracked_mode {
3505        StatusUntrackedMode::All => worktree
3506            .keys()
3507            .filter(|path| !index.contains_key(*path) && !ignores.is_ignored(path, false))
3508            .cloned()
3509            .collect(),
3510        StatusUntrackedMode::Normal => {
3511            normal_untracked_paths_from_worktree(&worktree, &index, &ignores)
3512        }
3513        StatusUntrackedMode::None => Vec::new(),
3514    };
3515    for path in untracked_paths {
3516        entries.push(ShortStatusEntry {
3517            index: b'?',
3518            worktree: b'?',
3519            path,
3520            head_mode: None,
3521            index_mode: None,
3522            worktree_mode: None,
3523            head_oid: None,
3524            index_oid: None,
3525            submodule: None,
3526        });
3527    }
3528    entries.sort_by(|left, right| {
3529        status_sort_category(left)
3530            .cmp(&status_sort_category(right))
3531            .then_with(|| left.path.cmp(&right.path))
3532    });
3533    Ok(entries)
3534}
3535
3536fn collect_status_entries_head_matches_index(
3537    index: &BTreeMap<Vec<u8>, TrackedEntry>,
3538    worktree: &BTreeMap<Vec<u8>, TrackedEntry>,
3539    tracked_presence: &HashSet<Vec<u8>>,
3540    submodule_dirt_map: &BTreeMap<Vec<u8>, u8>,
3541    untracked_mode: StatusUntrackedMode,
3542    entries: &mut Vec<ShortStatusEntry>,
3543) {
3544    for (path, index_entry) in index {
3545        let worktree_entry = worktree.get(path);
3546        let worktree_present =
3547            worktree_entry.is_some() || tracked_presence.contains(path.as_slice());
3548        let submodule = status_submodule_from_entries(
3549            path,
3550            index_entry,
3551            worktree_entry,
3552            submodule_dirt_map,
3553            untracked_mode,
3554        );
3555        let worktree_code = match worktree_entry {
3556            None if !worktree_present => b'D',
3557            Some(worktree_entry) if worktree_entry != index_entry => b'M',
3558            _ if submodule.is_some_and(|sub| sub.any()) => b'M',
3559            _ => b' ',
3560        };
3561        if worktree_code != b' ' {
3562            entries.push(ShortStatusEntry {
3563                index: b' ',
3564                worktree: worktree_code,
3565                path: path.clone(),
3566                head_mode: Some(index_entry.mode),
3567                index_mode: Some(index_entry.mode),
3568                worktree_mode: status_worktree_mode(
3569                    Some(index_entry),
3570                    worktree_entry,
3571                    worktree_present,
3572                ),
3573                head_oid: Some(index_entry.oid),
3574                index_oid: Some(index_entry.oid),
3575                submodule: submodule.filter(|sub| sub.any()),
3576            });
3577        }
3578    }
3579}
3580
3581struct StatusComparisonInputs<'a> {
3582    head: &'a BTreeMap<Vec<u8>, TrackedEntry>,
3583    index: &'a BTreeMap<Vec<u8>, TrackedEntry>,
3584    worktree: &'a BTreeMap<Vec<u8>, TrackedEntry>,
3585    tracked_presence: &'a HashSet<Vec<u8>>,
3586    submodule_dirt_map: &'a BTreeMap<Vec<u8>, u8>,
3587    ignores: &'a IgnoreMatcher,
3588}
3589
3590fn collect_status_entries_with_head(
3591    inputs: StatusComparisonInputs<'_>,
3592    untracked_mode: StatusUntrackedMode,
3593    entries: &mut Vec<ShortStatusEntry>,
3594) {
3595    let mut paths = BTreeSet::new();
3596    paths.extend(inputs.head.keys().cloned());
3597    paths.extend(inputs.index.keys().cloned());
3598    paths.extend(
3599        inputs
3600            .worktree
3601            .keys()
3602            .filter(|path| inputs.index.contains_key(*path))
3603            .cloned(),
3604    );
3605
3606    for path in paths {
3607        let head_entry = inputs.head.get(&path);
3608        let index_entry = inputs.index.get(&path);
3609        let worktree_entry = inputs.worktree.get(&path);
3610        let worktree_present =
3611            worktree_entry.is_some() || inputs.tracked_presence.contains(path.as_slice());
3612        if head_entry.is_none()
3613            && index_entry.is_none()
3614            && worktree_entry.is_some()
3615            && inputs.ignores.is_ignored(&path, false)
3616        {
3617            continue;
3618        }
3619        let submodule = match index_entry {
3620            Some(index_entry) => status_submodule_from_entries(
3621                &path,
3622                index_entry,
3623                worktree_entry,
3624                inputs.submodule_dirt_map,
3625                untracked_mode,
3626            ),
3627            None => None,
3628        };
3629        let (index_code, worktree_code) =
3630            if head_entry.is_none() && index_entry.is_none() && worktree_entry.is_some() {
3631                (b'?', b'?')
3632            } else {
3633                let index_code = match (head_entry, index_entry) {
3634                    (None, Some(_)) => b'A',
3635                    (Some(_), None) => b'D',
3636                    (Some(left), Some(right)) if left != right => b'M',
3637                    _ => b' ',
3638                };
3639                let worktree_code = match (index_entry, worktree_entry) {
3640                    (None, Some(_)) => b'?',
3641                    (Some(_), None) if !worktree_present => b'D',
3642                    (Some(left), Some(right)) if left != right => b'M',
3643                    _ if submodule.is_some_and(|sub| sub.any()) => b'M',
3644                    _ => b' ',
3645                };
3646                (index_code, worktree_code)
3647            };
3648        if index_code != b' ' || worktree_code != b' ' {
3649            entries.push(ShortStatusEntry {
3650                index: index_code,
3651                worktree: worktree_code,
3652                path,
3653                head_mode: head_entry.map(|entry| entry.mode),
3654                index_mode: index_entry.map(|entry| entry.mode),
3655                worktree_mode: status_worktree_mode(index_entry, worktree_entry, worktree_present),
3656                head_oid: head_entry.map(|entry| entry.oid),
3657                index_oid: index_entry.map(|entry| entry.oid),
3658                submodule: submodule.filter(|sub| sub.any()),
3659            });
3660        }
3661    }
3662}
3663
3664fn status_worktree_mode(
3665    index_entry: Option<&TrackedEntry>,
3666    worktree_entry: Option<&TrackedEntry>,
3667    worktree_present: bool,
3668) -> Option<u32> {
3669    worktree_entry.map(|entry| entry.mode).or_else(|| {
3670        worktree_present
3671            .then(|| index_entry.map(|entry| entry.mode))
3672            .flatten()
3673    })
3674}
3675
3676fn status_submodule_from_entries(
3677    path: &[u8],
3678    index_entry: &TrackedEntry,
3679    worktree_entry: Option<&TrackedEntry>,
3680    submodule_dirt_map: &BTreeMap<Vec<u8>, u8>,
3681    untracked_mode: StatusUntrackedMode,
3682) -> Option<SubmoduleStatus> {
3683    let worktree_entry = worktree_entry?;
3684    if !sley_index::is_gitlink(index_entry.mode) || !sley_index::is_gitlink(worktree_entry.mode) {
3685        return None;
3686    }
3687    let dirt = submodule_dirt_map.get(path).copied().unwrap_or(0);
3688    Some(SubmoduleStatus {
3689        new_commits: index_entry.oid != worktree_entry.oid,
3690        modified_content: dirt & DIRTY_SUBMODULE_MODIFIED != 0,
3691        untracked_content: dirt & DIRTY_SUBMODULE_UNTRACKED != 0
3692            && !matches!(untracked_mode, StatusUntrackedMode::None),
3693    })
3694}
3695
3696fn short_status_tracked_only(
3697    worktree_root: &Path,
3698    git_dir: &Path,
3699    format: ObjectFormat,
3700    db: &FileObjectDatabase,
3701    index: &Index,
3702    stat_cache: &IndexStatCache,
3703    head_matches_index: bool,
3704    untracked_mode: StatusUntrackedMode,
3705) -> Result<Vec<ShortStatusEntry>> {
3706    let normal_entry_count = index
3707        .entries
3708        .iter()
3709        .filter(|entry| entry.stage() == Stage::Normal)
3710        .count();
3711    if head_matches_index && normal_entry_count >= 512 {
3712        return short_status_tracked_only_head_matches_index_parallel(
3713            worktree_root,
3714            git_dir,
3715            format,
3716            index,
3717            stat_cache,
3718            untracked_mode,
3719        );
3720    }
3721    let head = if head_matches_index {
3722        None
3723    } else {
3724        Some(head_tree_entries(git_dir, format, db)?)
3725    };
3726    if !head_matches_index && normal_entry_count >= 512 {
3727        if let Some(head) = head.as_ref() {
3728            return short_status_tracked_only_with_head_parallel(
3729                worktree_root,
3730                git_dir,
3731                format,
3732                index,
3733                stat_cache,
3734                head,
3735                untracked_mode,
3736            );
3737        }
3738    }
3739    let mut clean_filter = None;
3740    let mut entries = Vec::new();
3741    for entry in index
3742        .entries
3743        .iter()
3744        .filter(|entry| entry.stage() == Stage::Normal)
3745    {
3746        let path = entry.path.as_bytes();
3747        let index_entry = TrackedEntry {
3748            mode: entry.mode,
3749            oid: entry.oid,
3750        };
3751        let head_entry = if head_matches_index {
3752            Some(&index_entry)
3753        } else {
3754            head.as_ref().and_then(|head| head.get(path))
3755        };
3756        let worktree_entry = worktree_entry_for_index_entry_with_attributes(
3757            worktree_root,
3758            git_dir,
3759            format,
3760            entry,
3761            stat_cache,
3762            &mut clean_filter,
3763        )?;
3764        let submodule = tracked_only_submodule_status(
3765            worktree_root,
3766            path,
3767            &index_entry,
3768            worktree_entry.as_ref(),
3769            untracked_mode,
3770        )?;
3771        let index_code = match head_entry {
3772            None => b'A',
3773            Some(head_entry) if *head_entry != index_entry => b'M',
3774            _ => b' ',
3775        };
3776        let worktree_code = match worktree_entry.as_ref() {
3777            None => b'D',
3778            Some(worktree_entry) if *worktree_entry != index_entry => b'M',
3779            _ if submodule.is_some_and(|sub| sub.any()) => b'M',
3780            _ => b' ',
3781        };
3782        if index_code != b' ' || worktree_code != b' ' {
3783            entries.push(ShortStatusEntry {
3784                index: index_code,
3785                worktree: worktree_code,
3786                path: path.to_vec(),
3787                head_mode: head_entry.map(|entry| entry.mode),
3788                index_mode: Some(index_entry.mode),
3789                worktree_mode: worktree_entry.as_ref().map(|entry| entry.mode),
3790                head_oid: head_entry.map(|entry| entry.oid),
3791                index_oid: Some(index_entry.oid),
3792                submodule: submodule.filter(|sub| sub.any()),
3793            });
3794        }
3795    }
3796    if let Some(head) = head.as_ref() {
3797        let index_paths = index
3798            .entries
3799            .iter()
3800            .filter(|entry| entry.stage() == Stage::Normal)
3801            .map(|entry| entry.path.as_bytes().to_vec())
3802            .collect::<HashSet<_>>();
3803        for (path, head_entry) in head {
3804            if index_paths.contains(path.as_slice()) {
3805                continue;
3806            }
3807            entries.push(ShortStatusEntry {
3808                index: b'D',
3809                worktree: b' ',
3810                path: path.clone(),
3811                head_mode: Some(head_entry.mode),
3812                index_mode: None,
3813                worktree_mode: None,
3814                head_oid: Some(head_entry.oid),
3815                index_oid: None,
3816                submodule: None,
3817            });
3818        }
3819    }
3820    entries.sort_by(|left, right| {
3821        status_sort_category(left)
3822            .cmp(&status_sort_category(right))
3823            .then_with(|| left.path.cmp(&right.path))
3824    });
3825    Ok(entries)
3826}
3827
3828fn short_status_borrowed_head_matches_index_if_possible(
3829    worktree_root: &Path,
3830    git_dir: &Path,
3831    format: ObjectFormat,
3832    db: &FileObjectDatabase,
3833    untracked_mode: StatusUntrackedMode,
3834) -> Result<Option<Vec<ShortStatusEntry>>> {
3835    let index_path = repository_index_path(git_dir);
3836    let index_metadata = match fs::metadata(&index_path) {
3837        Ok(metadata) => metadata,
3838        Err(err)
3839            if err.kind() == std::io::ErrorKind::NotFound
3840                && matches!(untracked_mode, StatusUntrackedMode::None) =>
3841        {
3842            return Ok(Some(Vec::new()));
3843        }
3844        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
3845        Err(err) => return Err(err.into()),
3846    };
3847    let index_bytes = fs::read(&index_path)?;
3848    let borrowed = match BorrowedIndex::parse(&index_bytes, format) {
3849        Ok(index) => index,
3850        Err(GitError::Unsupported(_)) => return Ok(None),
3851        Err(err) => return Err(err),
3852    };
3853    let Some(head_tree_oid) = resolve_head_tree_oid(git_dir, format, db)? else {
3854        return Ok(None);
3855    };
3856    let stage0_entry_count = borrowed
3857        .entries
3858        .iter()
3859        .filter(|entry| entry.stage() == Stage::Normal)
3860        .count();
3861    if !head_matches_borrowed_index_from_cache_tree(
3862        &borrowed,
3863        format,
3864        &head_tree_oid,
3865        stage0_entry_count,
3866    )? {
3867        return Ok(None);
3868    }
3869
3870    let index_mtime = file_mtime_parts(&index_metadata);
3871    let stat_cache = IndexStatCache::from_index_mtime_only(index_mtime);
3872    let profile_enabled = StatusProfileCounters::enabled();
3873    let mut profile = profile_enabled.then(|| StatusProfileCounters {
3874        fast_path_borrowed: true,
3875        ..StatusProfileCounters::default()
3876    });
3877
3878    if matches!(untracked_mode, StatusUntrackedMode::None) {
3879        let tracked_start = Instant::now();
3880        let entries = short_status_borrowed_tracked_only_head_matches_index_parallel(
3881            worktree_root,
3882            git_dir,
3883            format,
3884            &borrowed,
3885            &stat_cache,
3886            untracked_mode,
3887        )?;
3888        if let Some(profile) = profile.as_mut() {
3889            profile.tracked_elapsed_us = tracked_start.elapsed().as_micros();
3890            profile.emit();
3891        }
3892        return Ok(Some(entries));
3893    }
3894
3895    if stage0_entry_count < 8192 {
3896        let tracked_start = Instant::now();
3897        let mut entries = short_status_borrowed_tracked_only_head_matches_index_parallel(
3898            worktree_root,
3899            git_dir,
3900            format,
3901            &borrowed,
3902            &stat_cache,
3903            untracked_mode,
3904        )?;
3905        if let Some(profile) = profile.as_mut() {
3906            profile.tracked_elapsed_us = tracked_start.elapsed().as_micros();
3907        }
3908        let mut ignores = IgnoreMatcher::from_worktree_base(worktree_root)?;
3909        let untracked_start = Instant::now();
3910        let untracked_paths = status_untracked_paths_from_borrowed_index(
3911            worktree_root,
3912            git_dir,
3913            &borrowed,
3914            &mut ignores,
3915            untracked_mode,
3916            profile.as_mut(),
3917        )?;
3918        if let Some(profile) = profile.as_mut() {
3919            profile.untracked_elapsed_us = untracked_start.elapsed().as_micros();
3920            profile.untracked_rows = untracked_paths.len() as u64;
3921        }
3922        let render_start = Instant::now();
3923        append_untracked_status_entries(&mut entries, untracked_paths);
3924        if let Some(profile) = profile.as_mut() {
3925            profile.render_elapsed_us = render_start.elapsed().as_micros();
3926            profile.emit();
3927        }
3928        return Ok(Some(entries));
3929    }
3930
3931    if let Some(profile) = profile.as_mut() {
3932        profile.overlap_enabled = true;
3933    }
3934    if profile_enabled {
3935        let (mut entries, untracked_paths, untracked_profile) =
3936            std::thread::scope(|scope| -> Result<_> {
3937                let tracked = scope.spawn(|| {
3938                    let start = Instant::now();
3939                    short_status_borrowed_tracked_only_head_matches_index_parallel(
3940                        worktree_root,
3941                        git_dir,
3942                        format,
3943                        &borrowed,
3944                        &stat_cache,
3945                        untracked_mode,
3946                    )
3947                    .map(|entries| (entries, start.elapsed().as_micros()))
3948                });
3949                let untracked = scope.spawn(|| -> Result<(Vec<Vec<u8>>, StatusProfileCounters)> {
3950                    let mut local_profile = StatusProfileCounters::default();
3951                    let mut ignores = IgnoreMatcher::from_worktree_base(worktree_root)?;
3952                    let start = Instant::now();
3953                    let paths = status_untracked_paths_from_borrowed_index(
3954                        worktree_root,
3955                        git_dir,
3956                        &borrowed,
3957                        &mut ignores,
3958                        untracked_mode,
3959                        Some(&mut local_profile),
3960                    )?;
3961                    local_profile.untracked_elapsed_us = start.elapsed().as_micros();
3962                    local_profile.untracked_rows = paths.len() as u64;
3963                    Ok((paths, local_profile))
3964                });
3965                let (entries, tracked_elapsed_us) = tracked
3966                    .join()
3967                    .map_err(|_| GitError::Command("status worker panicked".into()))??;
3968                let (untracked_paths, untracked_profile) = untracked
3969                    .join()
3970                    .map_err(|_| GitError::Command("status worker panicked".into()))??;
3971                if let Some(profile) = profile.as_mut() {
3972                    profile.tracked_elapsed_us = tracked_elapsed_us;
3973                }
3974                Ok((entries, untracked_paths, Some(untracked_profile)))
3975            })?;
3976        if let Some(profile) = profile.as_mut() {
3977            if let Some(untracked_profile) = untracked_profile {
3978                profile.merge_untracked(untracked_profile);
3979            }
3980        }
3981        let render_start = Instant::now();
3982        append_untracked_status_entries(&mut entries, untracked_paths);
3983        if let Some(profile) = profile.as_mut() {
3984            profile.render_elapsed_us = render_start.elapsed().as_micros();
3985            profile.emit();
3986        }
3987        return Ok(Some(entries));
3988    }
3989    let (mut entries, untracked_paths) = std::thread::scope(|scope| -> Result<_> {
3990        let tracked = scope.spawn(|| {
3991            short_status_borrowed_tracked_only_head_matches_index_parallel(
3992                worktree_root,
3993                git_dir,
3994                format,
3995                &borrowed,
3996                &stat_cache,
3997                untracked_mode,
3998            )
3999        });
4000        let untracked = scope.spawn(|| -> Result<Vec<Vec<u8>>> {
4001            let mut ignores = IgnoreMatcher::from_worktree_base(worktree_root)?;
4002            status_untracked_paths_from_borrowed_index(
4003                worktree_root,
4004                git_dir,
4005                &borrowed,
4006                &mut ignores,
4007                untracked_mode,
4008                None,
4009            )
4010        });
4011        let entries = tracked
4012            .join()
4013            .map_err(|_| GitError::Command("status worker panicked".into()))??;
4014        let untracked_paths = untracked
4015            .join()
4016            .map_err(|_| GitError::Command("status worker panicked".into()))??;
4017        Ok((entries, untracked_paths))
4018    })?;
4019    let render_start = Instant::now();
4020    append_untracked_status_entries(&mut entries, untracked_paths);
4021    if let Some(profile) = profile.as_mut() {
4022        profile.render_elapsed_us = render_start.elapsed().as_micros();
4023        profile.emit();
4024    }
4025    Ok(Some(entries))
4026}
4027
4028fn stream_short_status_borrowed_head_matches_index_if_possible<F>(
4029    worktree_root: &Path,
4030    git_dir: &Path,
4031    format: ObjectFormat,
4032    db: &FileObjectDatabase,
4033    untracked_mode: StatusUntrackedMode,
4034    emit: &mut F,
4035) -> Result<Option<()>>
4036where
4037    F: for<'a> FnMut(ShortStatusRow<'a>) -> Result<StreamControl>,
4038{
4039    let index_path = repository_index_path(git_dir);
4040    let index_metadata = match fs::metadata(&index_path) {
4041        Ok(metadata) => metadata,
4042        Err(err)
4043            if err.kind() == std::io::ErrorKind::NotFound
4044                && matches!(untracked_mode, StatusUntrackedMode::None) =>
4045        {
4046            return Ok(Some(()));
4047        }
4048        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
4049        Err(err) => return Err(err.into()),
4050    };
4051    let index_bytes = fs::read(&index_path)?;
4052    let borrowed = match BorrowedIndex::parse(&index_bytes, format) {
4053        Ok(index) => index,
4054        Err(GitError::Unsupported(_)) => return Ok(None),
4055        Err(err) => return Err(err),
4056    };
4057    let Some(head_tree_oid) = resolve_head_tree_oid(git_dir, format, db)? else {
4058        return Ok(None);
4059    };
4060    let stage0_entry_count = borrowed
4061        .entries
4062        .iter()
4063        .filter(|entry| entry.stage() == Stage::Normal)
4064        .count();
4065    if !head_matches_borrowed_index_from_cache_tree(
4066        &borrowed,
4067        format,
4068        &head_tree_oid,
4069        stage0_entry_count,
4070    )? {
4071        return Ok(None);
4072    }
4073
4074    let index_mtime = file_mtime_parts(&index_metadata);
4075    let stat_cache = IndexStatCache::from_index_mtime_only(index_mtime);
4076    let profile_enabled = StatusProfileCounters::enabled();
4077    let mut profile = profile_enabled.then(|| StatusProfileCounters {
4078        fast_path_borrowed: true,
4079        ..StatusProfileCounters::default()
4080    });
4081
4082    if matches!(untracked_mode, StatusUntrackedMode::None) {
4083        let tracked_start = Instant::now();
4084        let tracked_control =
4085            stream_short_status_borrowed_tracked_only_head_matches_index_parallel(
4086                worktree_root,
4087                git_dir,
4088                format,
4089                &borrowed,
4090                &stat_cache,
4091                untracked_mode,
4092                emit,
4093            )?;
4094        if let Some(profile) = profile.as_mut() {
4095            profile.tracked_elapsed_us = tracked_start.elapsed().as_micros();
4096        }
4097        if let Some(profile) = profile.as_ref() {
4098            profile.emit();
4099        }
4100        if tracked_control.is_stop() {
4101            return Ok(Some(()));
4102        }
4103        return Ok(Some(()));
4104    }
4105
4106    if stage0_entry_count < 8192 {
4107        let tracked_start = Instant::now();
4108        let tracked_control =
4109            stream_short_status_borrowed_tracked_only_head_matches_index_parallel(
4110                worktree_root,
4111                git_dir,
4112                format,
4113                &borrowed,
4114                &stat_cache,
4115                untracked_mode,
4116                emit,
4117            )?;
4118        if let Some(profile) = profile.as_mut() {
4119            profile.tracked_elapsed_us = tracked_start.elapsed().as_micros();
4120        }
4121        if tracked_control.is_stop() {
4122            if let Some(profile) = profile.as_ref() {
4123                profile.emit();
4124            }
4125            return Ok(Some(()));
4126        }
4127        let mut ignores = IgnoreMatcher::from_worktree_base(worktree_root)?;
4128        let untracked_start = Instant::now();
4129        stream_status_untracked_paths_from_borrowed_index(
4130            worktree_root,
4131            git_dir,
4132            &borrowed,
4133            &mut ignores,
4134            untracked_mode,
4135            profile.as_mut(),
4136            emit_untracked_status_entry(emit),
4137        )?;
4138        if let Some(profile) = profile.as_mut() {
4139            profile.untracked_elapsed_us = untracked_start.elapsed().as_micros();
4140            profile.emit();
4141        }
4142        return Ok(Some(()));
4143    }
4144
4145    if let Some(profile) = profile.as_mut() {
4146        profile.overlap_enabled = true;
4147    }
4148    let (tracked_control, untracked_paths, untracked_profile) =
4149        std::thread::scope(|scope| -> Result<_> {
4150            let untracked = scope.spawn(|| -> Result<(Vec<Vec<u8>>, StatusProfileCounters)> {
4151                let mut local_profile = StatusProfileCounters::default();
4152                let mut ignores = IgnoreMatcher::from_worktree_base(worktree_root)?;
4153                let start = Instant::now();
4154                let paths = status_untracked_paths_from_borrowed_index(
4155                    worktree_root,
4156                    git_dir,
4157                    &borrowed,
4158                    &mut ignores,
4159                    untracked_mode,
4160                    profile_enabled.then_some(&mut local_profile),
4161                )?;
4162                local_profile.untracked_elapsed_us = start.elapsed().as_micros();
4163                local_profile.untracked_rows = paths.len() as u64;
4164                Ok((paths, local_profile))
4165            });
4166            let tracked_start = Instant::now();
4167            let tracked_control =
4168                stream_short_status_borrowed_tracked_only_head_matches_index_parallel(
4169                    worktree_root,
4170                    git_dir,
4171                    format,
4172                    &borrowed,
4173                    &stat_cache,
4174                    untracked_mode,
4175                    emit,
4176                )?;
4177            let tracked_elapsed_us = tracked_start.elapsed().as_micros();
4178            let (untracked_paths, untracked_profile) = untracked
4179                .join()
4180                .map_err(|_| GitError::Command("status worker panicked".into()))??;
4181            if let Some(profile) = profile.as_mut() {
4182                profile.tracked_elapsed_us = tracked_elapsed_us;
4183            }
4184            Ok((
4185                tracked_control,
4186                untracked_paths,
4187                profile_enabled.then_some(untracked_profile),
4188            ))
4189        })?;
4190    if tracked_control.is_stop() {
4191        if let Some(profile) = profile.as_mut()
4192            && let Some(untracked_profile) = untracked_profile
4193        {
4194            profile.merge_untracked(untracked_profile);
4195            profile.emit();
4196        }
4197        return Ok(Some(()));
4198    }
4199    if let Some(profile) = profile.as_mut()
4200        && let Some(untracked_profile) = untracked_profile
4201    {
4202        profile.merge_untracked(untracked_profile);
4203    }
4204    let render_start = Instant::now();
4205    for path in untracked_paths {
4206        let row = untracked_status_row(&path);
4207        if emit(row)?.is_stop() {
4208            break;
4209        }
4210    }
4211    if let Some(profile) = profile.as_mut() {
4212        profile.render_elapsed_us = render_start.elapsed().as_micros();
4213        profile.emit();
4214    }
4215    Ok(Some(()))
4216}
4217
4218fn short_status_borrowed_head_matches_index_count_if_possible(
4219    worktree_root: &Path,
4220    git_dir: &Path,
4221    format: ObjectFormat,
4222    db: &FileObjectDatabase,
4223    untracked_mode: StatusUntrackedMode,
4224) -> Result<Option<usize>> {
4225    let index_path = repository_index_path(git_dir);
4226    let index_metadata = match fs::metadata(&index_path) {
4227        Ok(metadata) => metadata,
4228        Err(err)
4229            if err.kind() == std::io::ErrorKind::NotFound
4230                && matches!(untracked_mode, StatusUntrackedMode::None) =>
4231        {
4232            return Ok(Some(0));
4233        }
4234        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
4235        Err(err) => return Err(err.into()),
4236    };
4237    let index_bytes = fs::read(&index_path)?;
4238    let borrowed = match BorrowedIndex::parse(&index_bytes, format) {
4239        Ok(index) => index,
4240        Err(GitError::Unsupported(_)) => return Ok(None),
4241        Err(err) => return Err(err),
4242    };
4243    let Some(head_tree_oid) = resolve_head_tree_oid(git_dir, format, db)? else {
4244        return Ok(None);
4245    };
4246    let stage0_entry_count = borrowed
4247        .entries
4248        .iter()
4249        .filter(|entry| entry.stage() == Stage::Normal)
4250        .count();
4251    if !head_matches_borrowed_index_from_cache_tree(
4252        &borrowed,
4253        format,
4254        &head_tree_oid,
4255        stage0_entry_count,
4256    )? {
4257        return Ok(None);
4258    }
4259
4260    let index_mtime = file_mtime_parts(&index_metadata);
4261    let stat_cache = IndexStatCache::from_index_mtime_only(index_mtime);
4262    let profile_enabled = StatusProfileCounters::enabled();
4263    let mut profile = profile_enabled.then(|| StatusProfileCounters {
4264        fast_path_borrowed: true,
4265        ..StatusProfileCounters::default()
4266    });
4267
4268    if matches!(untracked_mode, StatusUntrackedMode::None) {
4269        let tracked_start = Instant::now();
4270        let count = short_status_borrowed_tracked_only_head_matches_index_count_parallel(
4271            worktree_root,
4272            git_dir,
4273            format,
4274            &borrowed,
4275            &stat_cache,
4276            untracked_mode,
4277        )?;
4278        if let Some(profile) = profile.as_mut() {
4279            profile.tracked_elapsed_us = tracked_start.elapsed().as_micros();
4280            profile.emit();
4281        }
4282        return Ok(Some(count));
4283    }
4284
4285    if stage0_entry_count < 8192 {
4286        let tracked_start = Instant::now();
4287        let tracked_count = short_status_borrowed_tracked_only_head_matches_index_count_parallel(
4288            worktree_root,
4289            git_dir,
4290            format,
4291            &borrowed,
4292            &stat_cache,
4293            untracked_mode,
4294        )?;
4295        if let Some(profile) = profile.as_mut() {
4296            profile.tracked_elapsed_us = tracked_start.elapsed().as_micros();
4297        }
4298        let mut ignores = IgnoreMatcher::from_worktree_base(worktree_root)?;
4299        let untracked_start = Instant::now();
4300        let untracked_count = status_untracked_count_from_borrowed_index(
4301            worktree_root,
4302            git_dir,
4303            &borrowed,
4304            &mut ignores,
4305            untracked_mode,
4306            profile.as_mut(),
4307        )?;
4308        if let Some(profile) = profile.as_mut() {
4309            profile.untracked_elapsed_us = untracked_start.elapsed().as_micros();
4310            profile.untracked_rows = untracked_count as u64;
4311            profile.emit();
4312        }
4313        return Ok(Some(tracked_count + untracked_count));
4314    }
4315
4316    if let Some(profile) = profile.as_mut() {
4317        profile.overlap_enabled = true;
4318    }
4319    let (tracked_count, untracked_count, untracked_profile) =
4320        std::thread::scope(|scope| -> Result<_> {
4321            let tracked = scope.spawn(|| {
4322                let start = Instant::now();
4323                short_status_borrowed_tracked_only_head_matches_index_count_parallel(
4324                    worktree_root,
4325                    git_dir,
4326                    format,
4327                    &borrowed,
4328                    &stat_cache,
4329                    untracked_mode,
4330                )
4331                .map(|count| (count, start.elapsed().as_micros()))
4332            });
4333            let untracked = scope.spawn(|| -> Result<(usize, StatusProfileCounters)> {
4334                let mut local_profile = StatusProfileCounters::default();
4335                let mut ignores = IgnoreMatcher::from_worktree_base(worktree_root)?;
4336                let start = Instant::now();
4337                let count = status_untracked_count_from_borrowed_index(
4338                    worktree_root,
4339                    git_dir,
4340                    &borrowed,
4341                    &mut ignores,
4342                    untracked_mode,
4343                    profile_enabled.then_some(&mut local_profile),
4344                )?;
4345                local_profile.untracked_elapsed_us = start.elapsed().as_micros();
4346                local_profile.untracked_rows = count as u64;
4347                Ok((count, local_profile))
4348            });
4349            let (tracked_count, tracked_elapsed_us) = tracked
4350                .join()
4351                .map_err(|_| GitError::Command("status worker panicked".into()))??;
4352            let (untracked_count, untracked_profile) = untracked
4353                .join()
4354                .map_err(|_| GitError::Command("status worker panicked".into()))??;
4355            if let Some(profile) = profile.as_mut() {
4356                profile.tracked_elapsed_us = tracked_elapsed_us;
4357            }
4358            Ok((
4359                tracked_count,
4360                untracked_count,
4361                profile_enabled.then_some(untracked_profile),
4362            ))
4363        })?;
4364    if let Some(profile) = profile.as_mut() {
4365        if let Some(untracked_profile) = untracked_profile {
4366            profile.merge_untracked(untracked_profile);
4367        }
4368        profile.emit();
4369    }
4370    Ok(Some(tracked_count + untracked_count))
4371}
4372
4373fn emit_untracked_status_entry<'a, F>(
4374    emit: &'a mut F,
4375) -> impl FnMut(&[u8]) -> Result<StreamControl> + 'a
4376where
4377    F: for<'row> FnMut(ShortStatusRow<'row>) -> Result<StreamControl>,
4378{
4379    |path| emit(untracked_status_row(path))
4380}
4381
4382fn untracked_status_entry(path: Vec<u8>) -> ShortStatusEntry {
4383    ShortStatusEntry {
4384        index: b'?',
4385        worktree: b'?',
4386        path,
4387        head_mode: None,
4388        index_mode: None,
4389        worktree_mode: None,
4390        head_oid: None,
4391        index_oid: None,
4392        submodule: None,
4393    }
4394}
4395
4396fn untracked_status_row(path: &[u8]) -> ShortStatusRow<'_> {
4397    ShortStatusRow {
4398        index: b'?',
4399        worktree: b'?',
4400        path,
4401        head_mode: None,
4402        index_mode: None,
4403        worktree_mode: None,
4404        head_oid: None,
4405        index_oid: None,
4406        submodule: None,
4407    }
4408}
4409
4410fn append_untracked_status_entries(
4411    entries: &mut Vec<ShortStatusEntry>,
4412    untracked_paths: Vec<Vec<u8>>,
4413) {
4414    for path in untracked_paths {
4415        entries.push(untracked_status_entry(path));
4416    }
4417}
4418
4419#[derive(Debug, Clone, Copy)]
4420enum TrackedOnlyPrecheck {
4421    Deleted(usize),
4422    Slow(usize),
4423}
4424
4425#[derive(Debug)]
4426enum TrackedOnlyPrecheckOutcome {
4427    Clean,
4428    Deleted,
4429    Slow,
4430}
4431
4432fn short_status_tracked_only_head_matches_index_parallel(
4433    worktree_root: &Path,
4434    git_dir: &Path,
4435    format: ObjectFormat,
4436    index: &Index,
4437    stat_cache: &IndexStatCache,
4438    untracked_mode: StatusUntrackedMode,
4439) -> Result<Vec<ShortStatusEntry>> {
4440    let prechecks = tracked_only_non_clean_prechecks_parallel(worktree_root, index, stat_cache)?;
4441
4442    let mut clean_filter = None;
4443    let mut entries = Vec::new();
4444    for precheck in prechecks {
4445        match precheck {
4446            TrackedOnlyPrecheck::Deleted(idx) => {
4447                let entry = &index.entries[idx];
4448                let path = entry.path.as_bytes();
4449                entries.push(ShortStatusEntry {
4450                    index: b' ',
4451                    worktree: b'D',
4452                    path: path.to_vec(),
4453                    head_mode: Some(entry.mode),
4454                    index_mode: Some(entry.mode),
4455                    worktree_mode: None,
4456                    head_oid: Some(entry.oid),
4457                    index_oid: Some(entry.oid),
4458                    submodule: None,
4459                });
4460            }
4461            TrackedOnlyPrecheck::Slow(idx) => {
4462                let entry = &index.entries[idx];
4463                let path = entry.path.as_bytes();
4464                let index_entry = TrackedEntry {
4465                    mode: entry.mode,
4466                    oid: entry.oid,
4467                };
4468                let worktree_entry = worktree_entry_for_index_entry_with_attributes(
4469                    worktree_root,
4470                    git_dir,
4471                    format,
4472                    entry,
4473                    stat_cache,
4474                    &mut clean_filter,
4475                )?;
4476                let submodule = tracked_only_submodule_status(
4477                    worktree_root,
4478                    path,
4479                    &index_entry,
4480                    worktree_entry.as_ref(),
4481                    untracked_mode,
4482                )?;
4483                let worktree_code = match worktree_entry.as_ref() {
4484                    None => b'D',
4485                    Some(worktree_entry) if *worktree_entry != index_entry => b'M',
4486                    _ if submodule.is_some_and(|sub| sub.any()) => b'M',
4487                    _ => b' ',
4488                };
4489                if worktree_code != b' ' {
4490                    entries.push(ShortStatusEntry {
4491                        index: b' ',
4492                        worktree: worktree_code,
4493                        path: path.to_vec(),
4494                        head_mode: Some(index_entry.mode),
4495                        index_mode: Some(index_entry.mode),
4496                        worktree_mode: worktree_entry.as_ref().map(|entry| entry.mode),
4497                        head_oid: Some(index_entry.oid),
4498                        index_oid: Some(index_entry.oid),
4499                        submodule: submodule.filter(|sub| sub.any()),
4500                    });
4501                }
4502            }
4503        }
4504    }
4505    entries.sort_by(|left, right| {
4506        status_sort_category(left)
4507            .cmp(&status_sort_category(right))
4508            .then_with(|| left.path.cmp(&right.path))
4509    });
4510    Ok(entries)
4511}
4512
4513fn short_status_borrowed_tracked_only_head_matches_index_parallel(
4514    worktree_root: &Path,
4515    git_dir: &Path,
4516    format: ObjectFormat,
4517    index: &BorrowedIndex<'_>,
4518    stat_cache: &IndexStatCache,
4519    untracked_mode: StatusUntrackedMode,
4520) -> Result<Vec<ShortStatusEntry>> {
4521    let prechecks =
4522        tracked_only_borrowed_non_clean_prechecks_parallel(worktree_root, index, stat_cache)?;
4523
4524    let mut clean_filter = None;
4525    let mut entries = Vec::new();
4526    for precheck in prechecks {
4527        match precheck {
4528            TrackedOnlyPrecheck::Deleted(idx) => {
4529                let entry = &index.entries[idx];
4530                entries.push(ShortStatusEntry {
4531                    index: b' ',
4532                    worktree: b'D',
4533                    path: entry.path.to_vec(),
4534                    head_mode: Some(entry.mode),
4535                    index_mode: Some(entry.mode),
4536                    worktree_mode: None,
4537                    head_oid: Some(entry.oid),
4538                    index_oid: Some(entry.oid),
4539                    submodule: None,
4540                });
4541            }
4542            TrackedOnlyPrecheck::Slow(idx) => {
4543                let entry = &index.entries[idx];
4544                let index_entry = TrackedEntry {
4545                    mode: entry.mode,
4546                    oid: entry.oid,
4547                };
4548                let worktree_entry = worktree_entry_for_index_entry_ref_with_attributes(
4549                    worktree_root,
4550                    git_dir,
4551                    format,
4552                    entry,
4553                    stat_cache,
4554                    &mut clean_filter,
4555                )?;
4556                let submodule = tracked_only_submodule_status(
4557                    worktree_root,
4558                    entry.path,
4559                    &index_entry,
4560                    worktree_entry.as_ref(),
4561                    untracked_mode,
4562                )?;
4563                let worktree_code = match worktree_entry.as_ref() {
4564                    None => b'D',
4565                    Some(worktree_entry) if *worktree_entry != index_entry => b'M',
4566                    _ if submodule.is_some_and(|sub| sub.any()) => b'M',
4567                    _ => b' ',
4568                };
4569                if worktree_code != b' ' {
4570                    entries.push(ShortStatusEntry {
4571                        index: b' ',
4572                        worktree: worktree_code,
4573                        path: entry.path.to_vec(),
4574                        head_mode: Some(index_entry.mode),
4575                        index_mode: Some(index_entry.mode),
4576                        worktree_mode: worktree_entry.as_ref().map(|entry| entry.mode),
4577                        head_oid: Some(index_entry.oid),
4578                        index_oid: Some(index_entry.oid),
4579                        submodule: submodule.filter(|sub| sub.any()),
4580                    });
4581                }
4582            }
4583        }
4584    }
4585    entries.sort_by(|left, right| {
4586        status_sort_category(left)
4587            .cmp(&status_sort_category(right))
4588            .then_with(|| left.path.cmp(&right.path))
4589    });
4590    Ok(entries)
4591}
4592
4593fn stream_short_status_borrowed_tracked_only_head_matches_index_parallel<F>(
4594    worktree_root: &Path,
4595    git_dir: &Path,
4596    format: ObjectFormat,
4597    index: &BorrowedIndex<'_>,
4598    stat_cache: &IndexStatCache,
4599    untracked_mode: StatusUntrackedMode,
4600    emit: &mut F,
4601) -> Result<StreamControl>
4602where
4603    F: for<'a> FnMut(ShortStatusRow<'a>) -> Result<StreamControl>,
4604{
4605    let prechecks =
4606        tracked_only_borrowed_non_clean_prechecks_parallel(worktree_root, index, stat_cache)?;
4607
4608    let mut clean_filter = None;
4609    for precheck in prechecks {
4610        match precheck {
4611            TrackedOnlyPrecheck::Deleted(idx) => {
4612                let entry = &index.entries[idx];
4613                if emit(ShortStatusRow {
4614                    index: b' ',
4615                    worktree: b'D',
4616                    path: entry.path,
4617                    head_mode: Some(entry.mode),
4618                    index_mode: Some(entry.mode),
4619                    worktree_mode: None,
4620                    head_oid: Some(entry.oid),
4621                    index_oid: Some(entry.oid),
4622                    submodule: None,
4623                })?
4624                .is_stop()
4625                {
4626                    return Ok(StreamControl::Stop);
4627                }
4628            }
4629            TrackedOnlyPrecheck::Slow(idx) => {
4630                let entry = &index.entries[idx];
4631                let index_entry = TrackedEntry {
4632                    mode: entry.mode,
4633                    oid: entry.oid,
4634                };
4635                let worktree_entry = worktree_entry_for_index_entry_ref_with_attributes(
4636                    worktree_root,
4637                    git_dir,
4638                    format,
4639                    entry,
4640                    stat_cache,
4641                    &mut clean_filter,
4642                )?;
4643                let submodule = tracked_only_submodule_status(
4644                    worktree_root,
4645                    entry.path,
4646                    &index_entry,
4647                    worktree_entry.as_ref(),
4648                    untracked_mode,
4649                )?;
4650                let worktree_code = match worktree_entry.as_ref() {
4651                    None => b'D',
4652                    Some(worktree_entry) if *worktree_entry != index_entry => b'M',
4653                    _ if submodule.is_some_and(|sub| sub.any()) => b'M',
4654                    _ => b' ',
4655                };
4656                if worktree_code != b' ' {
4657                    if emit(ShortStatusRow {
4658                        index: b' ',
4659                        worktree: worktree_code,
4660                        path: entry.path,
4661                        head_mode: Some(index_entry.mode),
4662                        index_mode: Some(index_entry.mode),
4663                        worktree_mode: worktree_entry.as_ref().map(|entry| entry.mode),
4664                        head_oid: Some(index_entry.oid),
4665                        index_oid: Some(index_entry.oid),
4666                        submodule: submodule.filter(|sub| sub.any()),
4667                    })?
4668                    .is_stop()
4669                    {
4670                        return Ok(StreamControl::Stop);
4671                    }
4672                }
4673            }
4674        }
4675    }
4676    Ok(StreamControl::Continue)
4677}
4678
4679fn short_status_borrowed_tracked_only_head_matches_index_count_parallel(
4680    worktree_root: &Path,
4681    git_dir: &Path,
4682    format: ObjectFormat,
4683    index: &BorrowedIndex<'_>,
4684    stat_cache: &IndexStatCache,
4685    untracked_mode: StatusUntrackedMode,
4686) -> Result<usize> {
4687    let prechecks =
4688        tracked_only_borrowed_non_clean_prechecks_parallel(worktree_root, index, stat_cache)?;
4689
4690    let mut clean_filter = None;
4691    let mut count = 0usize;
4692    for precheck in prechecks {
4693        match precheck {
4694            TrackedOnlyPrecheck::Deleted(_) => count += 1,
4695            TrackedOnlyPrecheck::Slow(idx) => {
4696                let entry = &index.entries[idx];
4697                let index_entry = TrackedEntry {
4698                    mode: entry.mode,
4699                    oid: entry.oid,
4700                };
4701                let worktree_entry = worktree_entry_for_index_entry_ref_with_attributes(
4702                    worktree_root,
4703                    git_dir,
4704                    format,
4705                    entry,
4706                    stat_cache,
4707                    &mut clean_filter,
4708                )?;
4709                let submodule = tracked_only_submodule_status(
4710                    worktree_root,
4711                    entry.path,
4712                    &index_entry,
4713                    worktree_entry.as_ref(),
4714                    untracked_mode,
4715                )?;
4716                let worktree_code = match worktree_entry.as_ref() {
4717                    None => b'D',
4718                    Some(worktree_entry) if *worktree_entry != index_entry => b'M',
4719                    _ if submodule.is_some_and(|sub| sub.any()) => b'M',
4720                    _ => b' ',
4721                };
4722                if worktree_code != b' ' {
4723                    count += 1;
4724                }
4725            }
4726        }
4727    }
4728    Ok(count)
4729}
4730
4731fn short_status_tracked_only_with_head_parallel(
4732    worktree_root: &Path,
4733    git_dir: &Path,
4734    format: ObjectFormat,
4735    index: &Index,
4736    stat_cache: &IndexStatCache,
4737    head: &BTreeMap<Vec<u8>, TrackedEntry>,
4738    untracked_mode: StatusUntrackedMode,
4739) -> Result<Vec<ShortStatusEntry>> {
4740    let prechecks = tracked_only_non_clean_prechecks_parallel(worktree_root, index, stat_cache)?;
4741    let mut precheck_cursor = 0usize;
4742    let mut clean_filter = None;
4743    let mut entries = Vec::new();
4744
4745    for (idx, entry) in index.entries.iter().enumerate() {
4746        if entry.stage() != Stage::Normal {
4747            continue;
4748        }
4749        let path = entry.path.as_bytes();
4750        let index_entry = TrackedEntry {
4751            mode: entry.mode,
4752            oid: entry.oid,
4753        };
4754        let head_entry = head.get(path);
4755        let index_code = match head_entry {
4756            None => b'A',
4757            Some(head_entry) if *head_entry != index_entry => b'M',
4758            _ => b' ',
4759        };
4760        let precheck = prechecks
4761            .get(precheck_cursor)
4762            .copied()
4763            .and_then(|precheck| {
4764                if tracked_only_precheck_index(precheck) == idx {
4765                    precheck_cursor += 1;
4766                    Some(precheck)
4767                } else {
4768                    None
4769                }
4770            });
4771        let (worktree_code, worktree_mode, submodule) = match precheck {
4772            None => (b' ', Some(index_entry.mode), None),
4773            Some(TrackedOnlyPrecheck::Deleted(_)) => (b'D', None, None),
4774            Some(TrackedOnlyPrecheck::Slow(_)) => {
4775                let worktree_entry = worktree_entry_for_index_entry_with_attributes(
4776                    worktree_root,
4777                    git_dir,
4778                    format,
4779                    entry,
4780                    stat_cache,
4781                    &mut clean_filter,
4782                )?;
4783                let submodule = tracked_only_submodule_status(
4784                    worktree_root,
4785                    path,
4786                    &index_entry,
4787                    worktree_entry.as_ref(),
4788                    untracked_mode,
4789                )?;
4790                let worktree_code = match worktree_entry.as_ref() {
4791                    None => b'D',
4792                    Some(worktree_entry) if *worktree_entry != index_entry => b'M',
4793                    _ if submodule.is_some_and(|sub| sub.any()) => b'M',
4794                    _ => b' ',
4795                };
4796                (
4797                    worktree_code,
4798                    worktree_entry.as_ref().map(|entry| entry.mode),
4799                    submodule.filter(|sub| sub.any()),
4800                )
4801            }
4802        };
4803        if index_code != b' ' || worktree_code != b' ' {
4804            entries.push(ShortStatusEntry {
4805                index: index_code,
4806                worktree: worktree_code,
4807                path: path.to_vec(),
4808                head_mode: head_entry.map(|entry| entry.mode),
4809                index_mode: Some(index_entry.mode),
4810                worktree_mode,
4811                head_oid: head_entry.map(|entry| entry.oid),
4812                index_oid: Some(index_entry.oid),
4813                submodule,
4814            });
4815        }
4816    }
4817
4818    let index_paths = index
4819        .entries
4820        .iter()
4821        .filter(|entry| entry.stage() == Stage::Normal)
4822        .map(|entry| entry.path.as_bytes().to_vec())
4823        .collect::<HashSet<_>>();
4824    for (path, head_entry) in head {
4825        if index_paths.contains(path.as_slice()) {
4826            continue;
4827        }
4828        entries.push(ShortStatusEntry {
4829            index: b'D',
4830            worktree: b' ',
4831            path: path.clone(),
4832            head_mode: Some(head_entry.mode),
4833            index_mode: None,
4834            worktree_mode: None,
4835            head_oid: Some(head_entry.oid),
4836            index_oid: None,
4837            submodule: None,
4838        });
4839    }
4840    entries.sort_by(|left, right| {
4841        status_sort_category(left)
4842            .cmp(&status_sort_category(right))
4843            .then_with(|| left.path.cmp(&right.path))
4844    });
4845    Ok(entries)
4846}
4847
4848fn tracked_only_precheck_index(precheck: TrackedOnlyPrecheck) -> usize {
4849    match precheck {
4850        TrackedOnlyPrecheck::Deleted(idx) | TrackedOnlyPrecheck::Slow(idx) => idx,
4851    }
4852}
4853
4854fn tracked_only_non_clean_prechecks_parallel(
4855    worktree_root: &Path,
4856    index: &Index,
4857    stat_cache: &IndexStatCache,
4858) -> Result<Vec<TrackedOnlyPrecheck>> {
4859    let normal_indices = index
4860        .entries
4861        .iter()
4862        .enumerate()
4863        .filter_map(|(idx, entry)| (entry.stage() == Stage::Normal).then_some(idx))
4864        .collect::<Vec<_>>();
4865    if normal_indices.is_empty() {
4866        return Ok(Vec::new());
4867    }
4868    let max_workers = std::thread::available_parallelism()
4869        .map(|count| count.get())
4870        .unwrap_or(1)
4871        .min(16);
4872    let worker_count = max_workers.min(normal_indices.len().div_ceil(512)).max(1);
4873    if worker_count == 1 {
4874        let mut prechecks = Vec::new();
4875        let mut absolute = PathBuf::new();
4876        for idx in normal_indices {
4877            let entry = &index.entries[idx];
4878            match tracked_only_stat_precheck(worktree_root, entry, stat_cache, &mut absolute)? {
4879                TrackedOnlyPrecheckOutcome::Clean => {}
4880                TrackedOnlyPrecheckOutcome::Deleted => {
4881                    prechecks.push(TrackedOnlyPrecheck::Deleted(idx));
4882                }
4883                TrackedOnlyPrecheckOutcome::Slow => {
4884                    prechecks.push(TrackedOnlyPrecheck::Slow(idx));
4885                }
4886            }
4887        }
4888        return Ok(prechecks);
4889    }
4890    let chunk_size = normal_indices.len().div_ceil(worker_count);
4891    let mut prechecks = std::thread::scope(|scope| -> Result<Vec<TrackedOnlyPrecheck>> {
4892        let mut handles = Vec::new();
4893        for chunk in normal_indices.chunks(chunk_size) {
4894            handles.push(scope.spawn(move || -> Result<Vec<TrackedOnlyPrecheck>> {
4895                let mut prechecks = Vec::new();
4896                let mut absolute = PathBuf::new();
4897                for &idx in chunk {
4898                    let entry = &index.entries[idx];
4899                    match tracked_only_stat_precheck(
4900                        worktree_root,
4901                        entry,
4902                        stat_cache,
4903                        &mut absolute,
4904                    )? {
4905                        TrackedOnlyPrecheckOutcome::Clean => {}
4906                        TrackedOnlyPrecheckOutcome::Deleted => {
4907                            prechecks.push(TrackedOnlyPrecheck::Deleted(idx));
4908                        }
4909                        TrackedOnlyPrecheckOutcome::Slow => {
4910                            prechecks.push(TrackedOnlyPrecheck::Slow(idx));
4911                        }
4912                    }
4913                }
4914                Ok(prechecks)
4915            }));
4916        }
4917        let mut prechecks = Vec::new();
4918        for handle in handles {
4919            let mut chunk = handle
4920                .join()
4921                .map_err(|_| GitError::Command("status worker panicked".into()))??;
4922            prechecks.append(&mut chunk);
4923        }
4924        Ok(prechecks)
4925    })?;
4926    prechecks.sort_by_key(|precheck| match precheck {
4927        TrackedOnlyPrecheck::Deleted(idx) | TrackedOnlyPrecheck::Slow(idx) => *idx,
4928    });
4929    Ok(prechecks)
4930}
4931
4932fn tracked_only_borrowed_non_clean_prechecks_parallel(
4933    worktree_root: &Path,
4934    index: &BorrowedIndex<'_>,
4935    stat_cache: &IndexStatCache,
4936) -> Result<Vec<TrackedOnlyPrecheck>> {
4937    let normal_indices = index
4938        .entries
4939        .iter()
4940        .enumerate()
4941        .filter_map(|(idx, entry)| (entry.stage() == Stage::Normal).then_some(idx))
4942        .collect::<Vec<_>>();
4943    if normal_indices.is_empty() {
4944        return Ok(Vec::new());
4945    }
4946    let max_workers = std::thread::available_parallelism()
4947        .map(|count| count.get())
4948        .unwrap_or(1)
4949        .min(16);
4950    let worker_count = max_workers.min(normal_indices.len().div_ceil(512)).max(1);
4951    if worker_count == 1 {
4952        let mut prechecks = Vec::new();
4953        let mut absolute = PathBuf::new();
4954        for idx in normal_indices {
4955            let entry = &index.entries[idx];
4956            match tracked_only_borrowed_stat_precheck(
4957                worktree_root,
4958                entry,
4959                stat_cache,
4960                &mut absolute,
4961            )? {
4962                TrackedOnlyPrecheckOutcome::Clean => {}
4963                TrackedOnlyPrecheckOutcome::Deleted => {
4964                    prechecks.push(TrackedOnlyPrecheck::Deleted(idx));
4965                }
4966                TrackedOnlyPrecheckOutcome::Slow => {
4967                    prechecks.push(TrackedOnlyPrecheck::Slow(idx));
4968                }
4969            }
4970        }
4971        return Ok(prechecks);
4972    }
4973    let chunk_size = normal_indices.len().div_ceil(worker_count);
4974    let mut prechecks = std::thread::scope(|scope| -> Result<Vec<TrackedOnlyPrecheck>> {
4975        let mut handles = Vec::new();
4976        for chunk in normal_indices.chunks(chunk_size) {
4977            handles.push(scope.spawn(move || -> Result<Vec<TrackedOnlyPrecheck>> {
4978                let mut prechecks = Vec::new();
4979                let mut absolute = PathBuf::new();
4980                for &idx in chunk {
4981                    let entry = &index.entries[idx];
4982                    match tracked_only_borrowed_stat_precheck(
4983                        worktree_root,
4984                        entry,
4985                        stat_cache,
4986                        &mut absolute,
4987                    )? {
4988                        TrackedOnlyPrecheckOutcome::Clean => {}
4989                        TrackedOnlyPrecheckOutcome::Deleted => {
4990                            prechecks.push(TrackedOnlyPrecheck::Deleted(idx));
4991                        }
4992                        TrackedOnlyPrecheckOutcome::Slow => {
4993                            prechecks.push(TrackedOnlyPrecheck::Slow(idx));
4994                        }
4995                    }
4996                }
4997                Ok(prechecks)
4998            }));
4999        }
5000        let mut prechecks = Vec::new();
5001        for handle in handles {
5002            let mut chunk = handle
5003                .join()
5004                .map_err(|_| GitError::Command("status worker panicked".into()))??;
5005            prechecks.append(&mut chunk);
5006        }
5007        Ok(prechecks)
5008    })?;
5009    prechecks.sort_by_key(|precheck| match precheck {
5010        TrackedOnlyPrecheck::Deleted(idx) | TrackedOnlyPrecheck::Slow(idx) => *idx,
5011    });
5012    Ok(prechecks)
5013}
5014
5015fn tracked_only_stat_precheck(
5016    worktree_root: &Path,
5017    index_entry: &IndexEntry,
5018    stat_cache: &IndexStatCache,
5019    absolute: &mut PathBuf,
5020) -> Result<TrackedOnlyPrecheckOutcome> {
5021    if sley_index::is_gitlink(index_entry.mode) {
5022        return Ok(TrackedOnlyPrecheckOutcome::Slow);
5023    }
5024    let git_path = index_entry.path.as_bytes();
5025    set_worktree_path_from_repo_path(worktree_root, git_path, absolute)?;
5026    let metadata = match fs::symlink_metadata(&absolute) {
5027        Ok(metadata) => metadata,
5028        Err(err)
5029            if matches!(
5030                err.kind(),
5031                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
5032            ) =>
5033        {
5034            return Ok(TrackedOnlyPrecheckOutcome::Deleted);
5035        }
5036        Err(err) => return Err(err.into()),
5037    };
5038    let file_type = metadata.file_type();
5039    if file_type.is_dir() || !(file_type.is_file() || file_type.is_symlink()) {
5040        return Ok(TrackedOnlyPrecheckOutcome::Slow);
5041    }
5042    if stat_cache
5043        .reuse_index_entry(index_entry, &metadata)
5044        .is_some()
5045    {
5046        Ok(TrackedOnlyPrecheckOutcome::Clean)
5047    } else {
5048        Ok(TrackedOnlyPrecheckOutcome::Slow)
5049    }
5050}
5051
5052fn tracked_only_borrowed_stat_precheck(
5053    worktree_root: &Path,
5054    index_entry: &IndexEntryRef<'_>,
5055    stat_cache: &IndexStatCache,
5056    absolute: &mut PathBuf,
5057) -> Result<TrackedOnlyPrecheckOutcome> {
5058    if sley_index::is_gitlink(index_entry.mode) {
5059        return Ok(TrackedOnlyPrecheckOutcome::Slow);
5060    }
5061    set_worktree_path_from_repo_path(worktree_root, index_entry.path, absolute)?;
5062    let metadata = match fs::symlink_metadata(&absolute) {
5063        Ok(metadata) => metadata,
5064        Err(err)
5065            if matches!(
5066                err.kind(),
5067                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
5068            ) =>
5069        {
5070            return Ok(TrackedOnlyPrecheckOutcome::Deleted);
5071        }
5072        Err(err) => return Err(err.into()),
5073    };
5074    let file_type = metadata.file_type();
5075    if file_type.is_dir() || !(file_type.is_file() || file_type.is_symlink()) {
5076        return Ok(TrackedOnlyPrecheckOutcome::Slow);
5077    }
5078    if stat_cache
5079        .reuse_index_entry_ref(index_entry, &metadata)
5080        .is_some()
5081    {
5082        Ok(TrackedOnlyPrecheckOutcome::Clean)
5083    } else {
5084        Ok(TrackedOnlyPrecheckOutcome::Slow)
5085    }
5086}
5087
5088fn set_worktree_path_from_repo_path(
5089    worktree_root: &Path,
5090    git_path: &[u8],
5091    out: &mut PathBuf,
5092) -> Result<()> {
5093    out.clear();
5094    out.push(worktree_root);
5095    push_repo_path(out, git_path)
5096}
5097
5098#[cfg(unix)]
5099fn push_repo_path(out: &mut PathBuf, path: &[u8]) -> Result<()> {
5100    use std::os::unix::ffi::OsStrExt;
5101
5102    out.push(Path::new(std::ffi::OsStr::from_bytes(path)));
5103    Ok(())
5104}
5105
5106#[cfg(not(unix))]
5107fn push_repo_path(out: &mut PathBuf, path: &[u8]) -> Result<()> {
5108    let path = std::str::from_utf8(path)
5109        .map_err(|_| GitError::InvalidPath("index path is not utf8".into()))?;
5110    for component in path.split('/') {
5111        out.push(component);
5112    }
5113    Ok(())
5114}
5115
5116fn tracked_only_submodule_status(
5117    worktree_root: &Path,
5118    path: &[u8],
5119    index_entry: &TrackedEntry,
5120    worktree_entry: Option<&TrackedEntry>,
5121    untracked_mode: StatusUntrackedMode,
5122) -> Result<Option<SubmoduleStatus>> {
5123    let Some(worktree_entry) = worktree_entry else {
5124        return Ok(None);
5125    };
5126    if !sley_index::is_gitlink(index_entry.mode) || !sley_index::is_gitlink(worktree_entry.mode) {
5127        return Ok(None);
5128    }
5129    let absolute = worktree_root.join(repo_path_to_os_path(path)?);
5130    let dirt = if absolute.is_dir() {
5131        submodule_dirt(&absolute)
5132    } else {
5133        0
5134    };
5135    Ok(Some(SubmoduleStatus {
5136        new_commits: index_entry.oid != worktree_entry.oid,
5137        modified_content: dirt & DIRTY_SUBMODULE_MODIFIED != 0,
5138        untracked_content: dirt & DIRTY_SUBMODULE_UNTRACKED != 0
5139            && !matches!(untracked_mode, StatusUntrackedMode::None),
5140    }))
5141}
5142
5143fn status_sort_category(entry: &ShortStatusEntry) -> u8 {
5144    match (entry.index, entry.worktree) {
5145        (b'?', b'?') => 1,
5146        (b'!', b'!') => 2,
5147        _ => 0,
5148    }
5149}
5150
5151pub fn untracked_paths(
5152    worktree_root: impl AsRef<Path>,
5153    git_dir: impl AsRef<Path>,
5154    format: ObjectFormat,
5155) -> Result<Vec<Vec<u8>>> {
5156    untracked_paths_with_options(
5157        worktree_root,
5158        git_dir,
5159        format,
5160        UntrackedPathOptions::default(),
5161    )
5162}
5163
5164/// Pathspec filter for untracked collection. Mirrors git `ls-files` pathspec
5165/// semantics: literal paths, recursive directory prefixes, and fnmatch globs.
5166#[derive(Debug, Clone, PartialEq, Eq)]
5167pub struct UntrackedPathspecFilter {
5168    pub path: Vec<u8>,
5169    pub recursive: bool,
5170    pub is_glob: bool,
5171}
5172
5173#[derive(Debug, Clone, PartialEq, Eq, Default)]
5174pub struct UntrackedPathOptions {
5175    pub directory: bool,
5176    pub no_empty_directory: bool,
5177    pub preserve_ignored_directories: bool,
5178    pub exclude_standard: bool,
5179    pub ignored_only: bool,
5180    pub exclude_patterns: Vec<Vec<u8>>,
5181    pub exclude_per_directory: Vec<String>,
5182    pub pathspecs: Vec<UntrackedPathspecFilter>,
5183}
5184
5185// The wildmatch engine and the single-item pathspec matcher now live in the
5186// shared `sley-pathspec` crate. Re-export them so existing `sley-worktree`
5187// callers (and the t3070 `ls-files` path) keep their public surface unchanged.
5188pub use sley_pathspec::{
5189    PathspecMatchMagic, WM_CASEFOLD, WM_PATHNAME, pathspec_is_glob, pathspec_item_matches,
5190    wildmatch,
5191};
5192
5193/// Whether `path` matches an `ls-files` pathspec (literal, directory prefix, or glob).
5194pub fn untracked_pathspec_matches(spec: &UntrackedPathspecFilter, path: &[u8]) -> bool {
5195    if spec.path.is_empty() {
5196        return true;
5197    }
5198    let path_no_slash = path.strip_suffix(b"/").unwrap_or(path);
5199    if path == spec.path.as_slice() || path_no_slash == spec.path.as_slice() {
5200        return true;
5201    }
5202    if spec.recursive
5203        && let Some(rest) = path
5204            .strip_prefix(spec.path.as_slice())
5205            .and_then(|rest| rest.strip_prefix(b"/"))
5206        && !rest.is_empty()
5207    {
5208        return true;
5209    }
5210    if spec.is_glob {
5211        return untracked_wildmatch(&spec.path, path)
5212            || untracked_wildmatch(&spec.path, path_no_slash);
5213    }
5214    false
5215}
5216
5217/// Whether a directory walk must descend into `parent` to satisfy active pathspecs.
5218pub fn untracked_pathspec_needs_descent(parent: &[u8], specs: &[UntrackedPathspecFilter]) -> bool {
5219    if specs.is_empty() {
5220        return false;
5221    }
5222    let parent_prefix = if parent.is_empty() {
5223        Vec::new()
5224    } else {
5225        let mut prefix = parent.to_vec();
5226        prefix.push(b'/');
5227        prefix
5228    };
5229    for spec in specs {
5230        if !parent.is_empty()
5231            && spec.path.starts_with(&parent_prefix)
5232            && spec.path.as_slice() != parent
5233        {
5234            return true;
5235        }
5236        if spec.is_glob && glob_pathspec_may_match_under(&spec.path, parent) {
5237            return true;
5238        }
5239        if spec.recursive
5240            && !parent.is_empty()
5241            && parent.starts_with(spec.path.as_slice())
5242            && parent != spec.path.as_slice()
5243        {
5244            return true;
5245        }
5246    }
5247    false
5248}
5249
5250/// Whether some pathspec selects the directory `git_path` *as a whole* (so an
5251/// untracked directory can roll up to `dir/` under `--directory`), as opposed to
5252/// only matching something strictly below it (which forces descent). A
5253/// directory-prefix pathspec covering the directory, an exact directory match, or
5254/// a glob matching the directory's own name all count; a deeper glob such as
5255/// `dir/*.c` or an exact file path inside the directory does not.
5256fn untracked_pathspec_selects_directory(
5257    specs: &[UntrackedPathspecFilter],
5258    git_path: &[u8],
5259) -> bool {
5260    specs
5261        .iter()
5262        .any(|spec| untracked_pathspec_matches(spec, git_path))
5263}
5264
5265fn glob_pathspec_may_match_under(pattern: &[u8], dir: &[u8]) -> bool {
5266    let literal_prefix = literal_prefix_before_glob(pattern);
5267    if literal_prefix.is_empty() {
5268        return true;
5269    }
5270    if dir.is_empty() {
5271        return true;
5272    }
5273    let mut dir_prefix = dir.to_vec();
5274    dir_prefix.push(b'/');
5275    if literal_prefix.starts_with(&dir_prefix) {
5276        return true;
5277    }
5278    if dir_prefix.starts_with(&literal_prefix) {
5279        return true;
5280    }
5281    literal_prefix
5282        .strip_suffix(b"/")
5283        .is_some_and(|prefix| prefix == dir)
5284}
5285
5286fn literal_prefix_before_glob(pattern: &[u8]) -> Vec<u8> {
5287    let mut prefix = Vec::new();
5288    for &byte in pattern {
5289        if matches!(byte, b'*' | b'?' | b'[') {
5290            break;
5291        }
5292        prefix.push(byte);
5293    }
5294    prefix
5295}
5296
5297fn insert_untracked_directory(paths: &mut BTreeSet<Vec<u8>>, git_path: &[u8]) {
5298    let mut directory = git_path.to_vec();
5299    if directory.last() != Some(&b'/') {
5300        directory.push(b'/');
5301    }
5302    paths.insert(directory);
5303}
5304
5305/// fnmatch-style glob where `*` and `?` match any byte including `/`.
5306fn untracked_wildmatch(pattern: &[u8], text: &[u8]) -> bool {
5307    // Untracked-walk pathspec globs match with PATHMATCH semantics (`*` crosses
5308    // `/`), matching git's default (non-GLOB-magic) pathspec behavior.
5309    wildmatch(pattern, text, 0)
5310}
5311
5312#[derive(Debug, Clone, PartialEq, Eq)]
5313pub struct IgnoreMatch {
5314    pub source: Vec<u8>,
5315    pub line_number: usize,
5316    pub pattern: Vec<u8>,
5317    pub ignored: bool,
5318}
5319
5320#[derive(Debug, Clone, PartialEq, Eq)]
5321pub enum AttributeState {
5322    Set,
5323    Unset,
5324    Value(Vec<u8>),
5325}
5326
5327#[derive(Debug, Clone, PartialEq, Eq)]
5328pub struct AttributeCheck {
5329    pub attribute: Vec<u8>,
5330    pub state: Option<AttributeState>,
5331}
5332
5333pub fn untracked_paths_with_options(
5334    worktree_root: impl AsRef<Path>,
5335    git_dir: impl AsRef<Path>,
5336    format: ObjectFormat,
5337    options: UntrackedPathOptions,
5338) -> Result<Vec<Vec<u8>>> {
5339    let worktree_root = worktree_root.as_ref();
5340    let git_dir = git_dir.as_ref();
5341    let db = FileObjectDatabase::from_git_dir(git_dir, format);
5342    let (index, stat_cache, _) = read_index_entries_with_stat_cache(git_dir, format, &db)?;
5343    let ignores = IgnoreMatcher::from_sources(
5344        worktree_root,
5345        options.exclude_standard,
5346        &options.exclude_patterns,
5347        &options.exclude_per_directory,
5348    )?;
5349    if options.ignored_only {
5350        return ignored_untracked_paths(
5351            worktree_root,
5352            git_dir,
5353            &index,
5354            &ignores,
5355            options.directory,
5356        );
5357    }
5358    if options.directory {
5359        let mut paths = BTreeSet::new();
5360        collect_untracked_directory_paths(
5361            worktree_root,
5362            git_dir,
5363            worktree_root,
5364            &index,
5365            &ignores,
5366            &options,
5367            &mut paths,
5368        )?;
5369        return Ok(paths.into_iter().collect());
5370    }
5371    let worktree = worktree_entries_with_stat_cache(
5372        worktree_root,
5373        git_dir,
5374        format,
5375        Some(&stat_cache),
5376        None,
5377        None,
5378    )?;
5379    Ok(ls_files_untracked_paths_from_worktree(
5380        &worktree, &index, &ignores,
5381    ))
5382}
5383
5384/// Untracked paths for `ls-files --others` (without `--directory`): every
5385/// untracked file is listed individually, except embedded-repository boundaries
5386/// which are emitted as `dir/` to match git's non-submodule `.git` handling.
5387fn ls_files_untracked_paths_from_worktree(
5388    worktree: &BTreeMap<Vec<u8>, TrackedEntry>,
5389    index: &BTreeMap<Vec<u8>, TrackedEntry>,
5390    ignores: &IgnoreMatcher,
5391) -> Vec<Vec<u8>> {
5392    let mut paths = BTreeSet::new();
5393    for (path, entry) in worktree {
5394        if index.contains_key(path) || ignores.is_ignored(path, false) {
5395            continue;
5396        }
5397        if entry.mode == 0o040000 && entry.oid.is_null() {
5398            insert_untracked_directory(&mut paths, path);
5399            continue;
5400        }
5401        paths.insert(path.clone());
5402    }
5403    paths.into_iter().collect()
5404}
5405
5406pub fn path_matches_standard_ignore(
5407    worktree_root: impl AsRef<Path>,
5408    path: &[u8],
5409    is_dir: bool,
5410) -> Result<bool> {
5411    path_matches_ignore(worktree_root, path, is_dir, true, &[])
5412}
5413
5414pub fn standard_ignore_match(
5415    worktree_root: impl AsRef<Path>,
5416    path: &[u8],
5417    is_dir: bool,
5418) -> Result<Option<IgnoreMatch>> {
5419    let ignores = IgnoreMatcher::from_worktree_root(worktree_root.as_ref())?;
5420    Ok(ignores.match_for(path, is_dir).map(IgnorePattern::to_match))
5421}
5422
5423pub fn standard_attributes_for_path(
5424    worktree_root: impl AsRef<Path>,
5425    path: &[u8],
5426    requested: &[Vec<u8>],
5427    all: bool,
5428) -> Result<Vec<AttributeCheck>> {
5429    let matcher = AttributeMatcher::from_worktree_root(worktree_root.as_ref())?;
5430    Ok(matcher.attributes_for_path(path, requested, all))
5431}
5432
5433/// A reusable matcher for standard worktree attributes (global or
5434/// `core.attributesFile`, every in-tree `.gitattributes`, and
5435/// `$GIT_DIR/info/attributes`).
5436///
5437/// This is behaviourally identical to [`standard_attributes_for_path`] except
5438/// the attribute sources are read once and reused for each path.
5439pub struct StandardAttributeMatcher {
5440    matcher: AttributeMatcher,
5441}
5442
5443impl StandardAttributeMatcher {
5444    pub fn from_worktree_root(worktree_root: impl AsRef<Path>) -> Result<Self> {
5445        Ok(Self {
5446            matcher: AttributeMatcher::from_worktree_root(worktree_root.as_ref())?,
5447        })
5448    }
5449
5450    pub fn attributes_for_path(
5451        &self,
5452        path: &[u8],
5453        requested: &[Vec<u8>],
5454        all: bool,
5455    ) -> Vec<AttributeCheck> {
5456        self.matcher.attributes_for_path(path, requested, all)
5457    }
5458}
5459
5460pub fn standard_attributes_for_path_from_tree(
5461    worktree_root: impl AsRef<Path>,
5462    db: &FileObjectDatabase,
5463    format: ObjectFormat,
5464    tree_oid: &ObjectId,
5465    path: &[u8],
5466    requested: &[Vec<u8>],
5467    all: bool,
5468) -> Result<Vec<AttributeCheck>> {
5469    let mut matcher = AttributeMatcher::default();
5470    let worktree_root = worktree_root.as_ref();
5471    if !matcher.read_configured_attributes(worktree_root) {
5472        matcher.read_default_global_attributes();
5473    }
5474    collect_attribute_patterns_from_tree(db, format, tree_oid, Vec::new(), &mut matcher)?;
5475    read_attribute_patterns(
5476        worktree_root.join(".git").join("info").join("attributes"),
5477        &mut matcher,
5478        &[],
5479        b".git/info/attributes",
5480    );
5481    Ok(matcher.attributes_for_path(path, requested, all))
5482}
5483
5484pub fn standard_attributes_for_path_from_index(
5485    worktree_root: impl AsRef<Path>,
5486    git_dir: impl AsRef<Path>,
5487    format: ObjectFormat,
5488    path: &[u8],
5489    requested: &[Vec<u8>],
5490    all: bool,
5491) -> Result<Vec<AttributeCheck>> {
5492    let worktree_root = worktree_root.as_ref();
5493    let git_dir = git_dir.as_ref();
5494    let mut matcher = AttributeMatcher::default();
5495    if !matcher.read_configured_attributes(worktree_root) {
5496        matcher.read_default_global_attributes();
5497    }
5498    let db = FileObjectDatabase::from_git_dir(git_dir, format);
5499    collect_attribute_patterns_from_index(git_dir, format, &db, &mut matcher)?;
5500    read_attribute_patterns(
5501        worktree_root.join(".git").join("info").join("attributes"),
5502        &mut matcher,
5503        &[],
5504        b".git/info/attributes",
5505    );
5506    Ok(matcher.attributes_for_path(path, requested, all))
5507}
5508
5509pub fn path_matches_ignore(
5510    worktree_root: impl AsRef<Path>,
5511    path: &[u8],
5512    is_dir: bool,
5513    exclude_standard: bool,
5514    exclude_patterns: &[Vec<u8>],
5515) -> Result<bool> {
5516    path_matches_ignore_with_per_directory(
5517        worktree_root,
5518        path,
5519        is_dir,
5520        exclude_standard,
5521        exclude_patterns,
5522        &[],
5523    )
5524}
5525
5526pub fn path_matches_ignore_with_per_directory(
5527    worktree_root: impl AsRef<Path>,
5528    path: &[u8],
5529    is_dir: bool,
5530    exclude_standard: bool,
5531    exclude_patterns: &[Vec<u8>],
5532    exclude_per_directory: &[String],
5533) -> Result<bool> {
5534    let ignores = IgnoreMatcher::from_sources(
5535        worktree_root.as_ref(),
5536        exclude_standard,
5537        exclude_patterns,
5538        exclude_per_directory,
5539    )?;
5540    Ok(ignores.is_ignored(path, is_dir))
5541}
5542
5543pub fn ignored_index_entries<'a>(
5544    worktree_root: impl AsRef<Path>,
5545    entries: &'a [IndexEntry],
5546    exclude_standard: bool,
5547    exclude_patterns: &[Vec<u8>],
5548    exclude_per_directory: &[String],
5549) -> Result<Vec<&'a IndexEntry>> {
5550    let ignores = IgnoreMatcher::from_sources(
5551        worktree_root.as_ref(),
5552        exclude_standard,
5553        exclude_patterns,
5554        exclude_per_directory,
5555    )?;
5556    Ok(entries
5557        .iter()
5558        .filter(|entry| ignores.is_ignored(entry.path.as_bytes(), false))
5559        .collect())
5560}
5561
5562fn collect_untracked_directory_paths(
5563    root: &Path,
5564    git_dir: &Path,
5565    dir: &Path,
5566    index: &BTreeMap<Vec<u8>, TrackedEntry>,
5567    ignores: &IgnoreMatcher,
5568    options: &UntrackedPathOptions,
5569    paths: &mut BTreeSet<Vec<u8>>,
5570) -> Result<()> {
5571    if is_same_path(dir, git_dir) {
5572        return Ok(());
5573    }
5574    let mut entries = fs::read_dir(dir)?.collect::<std::result::Result<Vec<_>, _>>()?;
5575    entries.sort_by_key(|entry| entry.file_name());
5576    for entry in entries {
5577        let path = entry.path();
5578        if is_dot_git_entry(&path) {
5579            continue;
5580        }
5581        if is_embedded_git_internals(root, &path) {
5582            continue;
5583        }
5584        if is_same_path(&path, git_dir) {
5585            continue;
5586        }
5587        let metadata = entry.metadata()?;
5588        let relative = path.strip_prefix(root).map_err(|_| {
5589            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
5590        })?;
5591        let git_path = git_path_bytes(relative)?;
5592        if ignores.is_ignored(&git_path, metadata.is_dir()) {
5593            continue;
5594        }
5595        if metadata.is_dir() {
5596            if is_nested_repository_boundary(&path) {
5597                insert_untracked_directory(paths, &git_path);
5598                continue;
5599            }
5600            let has_tracked_below = index_has_path_under(index, &git_path);
5601            let needs_descent = untracked_pathspec_needs_descent(&git_path, &options.pathspecs);
5602            if has_tracked_below {
5603                collect_untracked_directory_paths(
5604                    root, git_dir, &path, index, ignores, options, paths,
5605                )?;
5606            } else if needs_descent {
5607                // A pathspec reaches into this wholly-untracked directory. Git's
5608                // `--directory` still rolls it up to `dir/` when a pathspec selects
5609                // the directory *as a whole* (a directory-prefix that covers it, or
5610                // a glob matching its name). It descends only when a pathspec
5611                // targets something strictly below it that does not select the
5612                // directory itself (e.g. a deeper glob like `dir/*.c` or an exact
5613                // file path).
5614                if untracked_pathspec_selects_directory(&options.pathspecs, &git_path) {
5615                    insert_untracked_directory(paths, &git_path);
5616                    continue;
5617                }
5618                collect_untracked_directory_paths(
5619                    root, git_dir, &path, index, ignores, options, paths,
5620                )?;
5621            } else if options.preserve_ignored_directories
5622                && directory_has_ignored(&path, root, git_dir, ignores)?
5623            {
5624                collect_untracked_directory_paths(
5625                    root, git_dir, &path, index, ignores, options, paths,
5626                )?;
5627            } else if !options.no_empty_directory
5628                || directory_has_file(&path, root, git_dir, ignores)?
5629            {
5630                insert_untracked_directory(paths, &git_path);
5631            }
5632        } else if !index.contains_key(&git_path)
5633            && (metadata.is_file() || metadata.file_type().is_symlink())
5634            && (options.pathspecs.is_empty()
5635                || options
5636                    .pathspecs
5637                    .iter()
5638                    .any(|spec| untracked_pathspec_matches(spec, &git_path)))
5639        {
5640            // A file reached here was found by descending into its parent
5641            // directory, which happens only when that directory is not eligible
5642            // for rollup (it contains tracked content, has ignored entries `-d`
5643            // must preserve, or a pathspec selects something strictly below it).
5644            // Git's `--directory` rollup is a directory-level decision made when
5645            // the whole directory matches; an individually-reached file is always
5646            // listed individually.
5647            paths.insert(git_path);
5648        }
5649    }
5650    Ok(())
5651}
5652
5653fn index_has_path_under(index: &BTreeMap<Vec<u8>, TrackedEntry>, directory: &[u8]) -> bool {
5654    // The index map is sorted, so a single range query finds whether any tracked
5655    // path lives under `directory/` in O(log n) — scanning every key was O(n) per
5656    // untracked directory (quadratic over a deep untracked tree).
5657    let mut prefix = directory.to_vec();
5658    prefix.push(b'/');
5659    index
5660        .range::<[u8], _>((
5661            std::ops::Bound::Included(prefix.as_slice()),
5662            std::ops::Bound::Unbounded,
5663        ))
5664        .next()
5665        .is_some_and(|(path, _)| path.starts_with(&prefix))
5666}
5667
5668/// Derives normal-mode untracked paths (directory rollup) from the worktree map
5669/// produced by the single status walk, avoiding a third filesystem traversal.
5670fn normal_untracked_paths_from_worktree(
5671    worktree: &BTreeMap<Vec<u8>, TrackedEntry>,
5672    index: &BTreeMap<Vec<u8>, TrackedEntry>,
5673    ignores: &IgnoreMatcher,
5674) -> Vec<Vec<u8>> {
5675    let mut paths = BTreeSet::new();
5676    for (path, entry) in worktree {
5677        if index.contains_key(path) || ignores.is_ignored(path, false) {
5678            continue;
5679        }
5680        if entry.mode == 0o040000 && entry.oid.is_null() {
5681            insert_untracked_directory(&mut paths, path);
5682            continue;
5683        }
5684        paths.insert(untracked_normal_rollup_path(path, index, ignores));
5685    }
5686    paths.into_iter().collect()
5687}
5688
5689fn status_untracked_paths_from_index(
5690    root: &Path,
5691    git_dir: &Path,
5692    index: &Index,
5693    stat_cache: &IndexStatCache,
5694    ignores: &mut IgnoreMatcher,
5695    untracked_mode: StatusUntrackedMode,
5696    profile: Option<&mut StatusProfileCounters>,
5697) -> Result<Vec<Vec<u8>>> {
5698    if matches!(untracked_mode, StatusUntrackedMode::None) {
5699        return Ok(Vec::new());
5700    }
5701    let mut paths = Vec::new();
5702    let tracked_dirs = stage0_tracked_directories(index);
5703    let tracked = IndexStatusLookup {
5704        stat_cache,
5705        tracked_dirs: &tracked_dirs,
5706    };
5707    let mut context = StatusUntrackedWalk {
5708        git_dir,
5709        tracked: &tracked,
5710        ignores,
5711        untracked_mode,
5712        profile,
5713    };
5714    collect_status_untracked_paths(&mut context, root, &[], &mut paths)?;
5715    paths.sort();
5716    paths.dedup();
5717    Ok(paths)
5718}
5719
5720fn status_untracked_paths_from_borrowed_index(
5721    root: &Path,
5722    git_dir: &Path,
5723    index: &BorrowedIndex<'_>,
5724    ignores: &mut IgnoreMatcher,
5725    untracked_mode: StatusUntrackedMode,
5726    profile: Option<&mut StatusProfileCounters>,
5727) -> Result<Vec<Vec<u8>>> {
5728    if matches!(untracked_mode, StatusUntrackedMode::None) {
5729        return Ok(Vec::new());
5730    }
5731    let mut paths = Vec::new();
5732    let tracked = BorrowedIndexLookup::new(&index.entries);
5733    let mut context = StatusUntrackedWalk {
5734        git_dir,
5735        tracked: &tracked,
5736        ignores,
5737        untracked_mode,
5738        profile,
5739    };
5740    collect_status_untracked_paths(&mut context, root, &[], &mut paths)?;
5741    paths.sort();
5742    paths.dedup();
5743    Ok(paths)
5744}
5745
5746fn stream_status_untracked_paths_from_borrowed_index<F>(
5747    root: &Path,
5748    git_dir: &Path,
5749    index: &BorrowedIndex<'_>,
5750    ignores: &mut IgnoreMatcher,
5751    untracked_mode: StatusUntrackedMode,
5752    profile: Option<&mut StatusProfileCounters>,
5753    mut emit: F,
5754) -> Result<()>
5755where
5756    F: for<'a> FnMut(&'a [u8]) -> Result<StreamControl>,
5757{
5758    if matches!(untracked_mode, StatusUntrackedMode::None) {
5759        return Ok(());
5760    }
5761    let tracked = BorrowedIndexLookup::new(&index.entries);
5762    let mut context = StatusUntrackedWalk {
5763        git_dir,
5764        tracked: &tracked,
5765        ignores,
5766        untracked_mode,
5767        profile,
5768    };
5769    stream_status_untracked_paths(&mut context, root, &[], &mut emit).map(|_| ())
5770}
5771
5772fn status_untracked_count_from_borrowed_index(
5773    root: &Path,
5774    git_dir: &Path,
5775    index: &BorrowedIndex<'_>,
5776    ignores: &mut IgnoreMatcher,
5777    untracked_mode: StatusUntrackedMode,
5778    profile: Option<&mut StatusProfileCounters>,
5779) -> Result<usize> {
5780    if matches!(untracked_mode, StatusUntrackedMode::None) {
5781        return Ok(0);
5782    }
5783    let tracked = BorrowedIndexLookup::new(&index.entries);
5784    let mut context = StatusUntrackedWalk {
5785        git_dir,
5786        tracked: &tracked,
5787        ignores,
5788        untracked_mode,
5789        profile,
5790    };
5791    let mut count = 0usize;
5792    count_status_untracked_paths(&mut context, root, &[], &mut count)?;
5793    Ok(count)
5794}
5795
5796trait StatusTrackedLookup {
5797    fn tracked_kind(&self, git_path: &[u8]) -> Option<StatusTrackedKind>;
5798    fn tracked_directory_kind(&self, git_path: &[u8]) -> Option<StatusTrackedDirectoryKind>;
5799}
5800
5801#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5802enum StatusTrackedKind {
5803    File,
5804    Gitlink,
5805    SkipWorktree,
5806}
5807
5808impl StatusTrackedKind {
5809    fn from_mode_and_skip(mode: u32, skip_worktree: bool) -> Self {
5810        if sley_index::is_gitlink(mode) {
5811            Self::Gitlink
5812        } else if skip_worktree {
5813            Self::SkipWorktree
5814        } else {
5815            Self::File
5816        }
5817    }
5818}
5819
5820#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5821enum StatusTrackedDirectoryKind {
5822    ContainsTracked,
5823    TrackedExcluded,
5824}
5825
5826struct IndexStatusLookup<'a> {
5827    stat_cache: &'a IndexStatCache,
5828    tracked_dirs: &'a HashSet<&'a [u8]>,
5829}
5830
5831impl StatusTrackedLookup for IndexStatusLookup<'_> {
5832    fn tracked_kind(&self, git_path: &[u8]) -> Option<StatusTrackedKind> {
5833        self.stat_cache.entries.get(git_path).map(|entry| {
5834            StatusTrackedKind::from_mode_and_skip(entry.mode, entry.is_skip_worktree())
5835        })
5836    }
5837
5838    fn tracked_directory_kind(&self, git_path: &[u8]) -> Option<StatusTrackedDirectoryKind> {
5839        self.tracked_dirs
5840            .contains(git_path)
5841            .then_some(StatusTrackedDirectoryKind::ContainsTracked)
5842    }
5843}
5844
5845struct BorrowedIndexLookup<'a> {
5846    entries: &'a [IndexEntryRef<'a>],
5847    tracked: HashMap<&'a [u8], StatusTrackedKind>,
5848}
5849
5850impl<'a> BorrowedIndexLookup<'a> {
5851    fn new(entries: &'a [IndexEntryRef<'a>]) -> Self {
5852        let mut tracked = HashMap::with_capacity(entries.len());
5853        for entry in entries {
5854            if entry.stage() != Stage::Normal {
5855                continue;
5856            }
5857            let path = entry.path;
5858            tracked.insert(
5859                path,
5860                StatusTrackedKind::from_mode_and_skip(entry.mode, entry.is_skip_worktree()),
5861            );
5862        }
5863        Self { entries, tracked }
5864    }
5865}
5866
5867impl StatusTrackedLookup for BorrowedIndexLookup<'_> {
5868    fn tracked_kind(&self, git_path: &[u8]) -> Option<StatusTrackedKind> {
5869        self.tracked.get(git_path).copied()
5870    }
5871
5872    fn tracked_directory_kind(&self, git_path: &[u8]) -> Option<StatusTrackedDirectoryKind> {
5873        let mut prefix = git_path.to_vec();
5874        prefix.push(b'/');
5875        let start = self
5876            .entries
5877            .partition_point(|entry| entry.path < prefix.as_slice());
5878        let mut saw_normal = false;
5879        for entry in self.entries[start..]
5880            .iter()
5881            .take_while(|entry| entry.path.starts_with(&prefix))
5882        {
5883            if entry.stage() != Stage::Normal {
5884                continue;
5885            }
5886            saw_normal = true;
5887            if !entry.is_skip_worktree() {
5888                return Some(StatusTrackedDirectoryKind::ContainsTracked);
5889            }
5890        }
5891        saw_normal.then_some(StatusTrackedDirectoryKind::TrackedExcluded)
5892    }
5893}
5894
5895struct StatusUntrackedWalk<'a, T: StatusTrackedLookup + ?Sized> {
5896    git_dir: &'a Path,
5897    tracked: &'a T,
5898    ignores: &'a mut IgnoreMatcher,
5899    untracked_mode: StatusUntrackedMode,
5900    profile: Option<&'a mut StatusProfileCounters>,
5901}
5902
5903fn collect_status_untracked_paths<T: StatusTrackedLookup + ?Sized>(
5904    context: &mut StatusUntrackedWalk<'_, T>,
5905    dir: &Path,
5906    dir_git_path: &[u8],
5907    paths: &mut Vec<Vec<u8>>,
5908) -> Result<()> {
5909    if is_same_path(dir, context.git_dir) {
5910        return Ok(());
5911    }
5912    let ignore_len = context.ignores.patterns.len();
5913    let entries = read_dir_entries_with_ignore_patterns(
5914        dir,
5915        dir_git_path,
5916        context.ignores,
5917        context.profile.as_deref_mut(),
5918    )?;
5919    let result = (|| -> Result<()> {
5920        let mut git_path = dir_git_path.to_vec();
5921        for entry in entries {
5922            let file_name = entry.file_name();
5923            if file_name == std::ffi::OsStr::new(".git") {
5924                continue;
5925            }
5926            let path_len = git_path_push_component(&mut git_path, &file_name);
5927            let entry_result = (|| -> Result<()> {
5928                if let Some(tracked_kind) = context.tracked.tracked_kind(&git_path) {
5929                    if let Some(profile) = context.profile.as_deref_mut() {
5930                        profile.tracked_exact_hits += 1;
5931                    }
5932                    if !matches!(context.untracked_mode, StatusUntrackedMode::All)
5933                        || tracked_kind == StatusTrackedKind::Gitlink
5934                    {
5935                        return Ok(());
5936                    }
5937                    if let Some(profile) = context.profile.as_deref_mut() {
5938                        profile.file_type_calls += 1;
5939                    }
5940                    let file_type = entry.file_type()?;
5941                    if file_type.is_dir() {
5942                        let path = entry.path();
5943                        if !is_same_path(&path, context.git_dir) {
5944                            collect_status_untracked_paths(context, &path, &git_path, paths)?;
5945                        }
5946                    }
5947                    return Ok(());
5948                }
5949                if let Some(profile) = context.profile.as_deref_mut() {
5950                    profile.file_type_calls += 1;
5951                }
5952                let file_type = entry.file_type()?;
5953                let is_dir = file_type.is_dir();
5954                if file_type.is_file() || file_type.is_symlink() {
5955                    if !context.ignores.is_ignored_profiled(
5956                        &git_path,
5957                        false,
5958                        context.profile.as_deref_mut(),
5959                    ) {
5960                        paths.push(git_path.clone());
5961                    }
5962                    return Ok(());
5963                } else if is_dir {
5964                    if context.ignores.is_ignored_profiled(
5965                        &git_path,
5966                        true,
5967                        context.profile.as_deref_mut(),
5968                    ) {
5969                        return Ok(());
5970                    }
5971                    let path = entry.path();
5972                    if is_same_path(&path, context.git_dir) {
5973                        return Ok(());
5974                    }
5975                    let tracked_directory = context.tracked.tracked_directory_kind(&git_path);
5976                    if let Some(directory_kind) = tracked_directory {
5977                        if let Some(profile) = context.profile.as_deref_mut() {
5978                            profile.tracked_dir_prefix_hits += 1;
5979                            if directory_kind == StatusTrackedDirectoryKind::TrackedExcluded {
5980                                profile.tracked_skip_worktree_prefix_hits += 1;
5981                            }
5982                        }
5983                    }
5984                    match context.untracked_mode {
5985                        StatusUntrackedMode::All => {
5986                            if tracked_directory.is_none() && is_nested_repository_boundary(&path) {
5987                                push_untracked_directory(paths, &git_path);
5988                            } else {
5989                                collect_status_untracked_paths(context, &path, &git_path, paths)?;
5990                            }
5991                        }
5992                        StatusUntrackedMode::Normal => {
5993                            if tracked_directory.is_some() {
5994                                collect_status_untracked_paths(context, &path, &git_path, paths)?;
5995                            } else if is_nested_repository_boundary(&path) {
5996                                push_untracked_directory(paths, &git_path);
5997                            } else if status_untracked_directory_has_file(
5998                                context, &path, &git_path,
5999                            )? {
6000                                push_untracked_directory(paths, &git_path);
6001                            }
6002                        }
6003                        StatusUntrackedMode::None => {}
6004                    }
6005                }
6006                Ok(())
6007            })();
6008            git_path.truncate(path_len);
6009            entry_result?;
6010        }
6011        Ok(())
6012    })();
6013    context.ignores.truncate(ignore_len);
6014    result
6015}
6016
6017fn stream_status_untracked_paths<T, F>(
6018    context: &mut StatusUntrackedWalk<'_, T>,
6019    dir: &Path,
6020    dir_git_path: &[u8],
6021    emit: &mut F,
6022) -> Result<StreamControl>
6023where
6024    T: StatusTrackedLookup + ?Sized,
6025    F: for<'a> FnMut(&'a [u8]) -> Result<StreamControl>,
6026{
6027    if is_same_path(dir, context.git_dir) {
6028        return Ok(StreamControl::Continue);
6029    }
6030    let ignore_len = context.ignores.patterns.len();
6031    let mut entries = read_dir_entries_with_ignore_patterns(
6032        dir,
6033        dir_git_path,
6034        context.ignores,
6035        context.profile.as_deref_mut(),
6036    )?;
6037    entries.sort_by_key(|entry| entry.file_name());
6038    let result = (|| -> Result<StreamControl> {
6039        let mut git_path = dir_git_path.to_vec();
6040        for entry in entries {
6041            let file_name = entry.file_name();
6042            if file_name == std::ffi::OsStr::new(".git") {
6043                continue;
6044            }
6045            let path_len = git_path_push_component(&mut git_path, &file_name);
6046            let entry_result = (|| -> Result<StreamControl> {
6047                if let Some(tracked_kind) = context.tracked.tracked_kind(&git_path) {
6048                    if let Some(profile) = context.profile.as_deref_mut() {
6049                        profile.tracked_exact_hits += 1;
6050                    }
6051                    if !matches!(context.untracked_mode, StatusUntrackedMode::All)
6052                        || tracked_kind == StatusTrackedKind::Gitlink
6053                    {
6054                        return Ok(StreamControl::Continue);
6055                    }
6056                    if let Some(profile) = context.profile.as_deref_mut() {
6057                        profile.file_type_calls += 1;
6058                    }
6059                    let file_type = entry.file_type()?;
6060                    if file_type.is_dir() {
6061                        let path = entry.path();
6062                        if !is_same_path(&path, context.git_dir) {
6063                            if stream_status_untracked_paths(context, &path, &git_path, emit)?
6064                                .is_stop()
6065                            {
6066                                return Ok(StreamControl::Stop);
6067                            }
6068                        }
6069                    }
6070                    return Ok(StreamControl::Continue);
6071                }
6072                if let Some(profile) = context.profile.as_deref_mut() {
6073                    profile.file_type_calls += 1;
6074                }
6075                let file_type = entry.file_type()?;
6076                let is_dir = file_type.is_dir();
6077                if file_type.is_file() || file_type.is_symlink() {
6078                    if !context.ignores.is_ignored_profiled(
6079                        &git_path,
6080                        false,
6081                        context.profile.as_deref_mut(),
6082                    ) {
6083                        if emit_status_untracked_path(context, &git_path, emit)?.is_stop() {
6084                            return Ok(StreamControl::Stop);
6085                        }
6086                    }
6087                    return Ok(StreamControl::Continue);
6088                } else if is_dir {
6089                    if context.ignores.is_ignored_profiled(
6090                        &git_path,
6091                        true,
6092                        context.profile.as_deref_mut(),
6093                    ) {
6094                        return Ok(StreamControl::Continue);
6095                    }
6096                    let path = entry.path();
6097                    if is_same_path(&path, context.git_dir) {
6098                        return Ok(StreamControl::Continue);
6099                    }
6100                    let tracked_directory = context.tracked.tracked_directory_kind(&git_path);
6101                    if let Some(directory_kind) = tracked_directory {
6102                        if let Some(profile) = context.profile.as_deref_mut() {
6103                            profile.tracked_dir_prefix_hits += 1;
6104                            if directory_kind == StatusTrackedDirectoryKind::TrackedExcluded {
6105                                profile.tracked_skip_worktree_prefix_hits += 1;
6106                            }
6107                        }
6108                    }
6109                    match context.untracked_mode {
6110                        StatusUntrackedMode::All => {
6111                            if tracked_directory.is_none() && is_nested_repository_boundary(&path) {
6112                                let directory_len = git_path.len();
6113                                if git_path.last() != Some(&b'/') {
6114                                    git_path.push(b'/');
6115                                }
6116                                let control =
6117                                    emit_status_untracked_path(context, &git_path, emit)?;
6118                                git_path.truncate(directory_len);
6119                                if control.is_stop() {
6120                                    return Ok(StreamControl::Stop);
6121                                }
6122                            } else {
6123                                if stream_status_untracked_paths(context, &path, &git_path, emit)?
6124                                    .is_stop()
6125                                {
6126                                    return Ok(StreamControl::Stop);
6127                                }
6128                            }
6129                        }
6130                        StatusUntrackedMode::Normal => {
6131                            if tracked_directory.is_some() {
6132                                if stream_status_untracked_paths(context, &path, &git_path, emit)?
6133                                    .is_stop()
6134                                {
6135                                    return Ok(StreamControl::Stop);
6136                                }
6137                            } else if is_nested_repository_boundary(&path)
6138                                || status_untracked_directory_has_file(context, &path, &git_path)?
6139                            {
6140                                let directory_len = git_path.len();
6141                                if git_path.last() != Some(&b'/') {
6142                                    git_path.push(b'/');
6143                                }
6144                                let control =
6145                                    emit_status_untracked_path(context, &git_path, emit)?;
6146                                git_path.truncate(directory_len);
6147                                if control.is_stop() {
6148                                    return Ok(StreamControl::Stop);
6149                                }
6150                            }
6151                        }
6152                        StatusUntrackedMode::None => {}
6153                    }
6154                }
6155                Ok(StreamControl::Continue)
6156            })();
6157            git_path.truncate(path_len);
6158            if entry_result?.is_stop() {
6159                return Ok(StreamControl::Stop);
6160            }
6161        }
6162        Ok(StreamControl::Continue)
6163    })();
6164    context.ignores.truncate(ignore_len);
6165    result
6166}
6167
6168fn count_status_untracked_paths<T: StatusTrackedLookup + ?Sized>(
6169    context: &mut StatusUntrackedWalk<'_, T>,
6170    dir: &Path,
6171    dir_git_path: &[u8],
6172    count: &mut usize,
6173) -> Result<()> {
6174    if is_same_path(dir, context.git_dir) {
6175        return Ok(());
6176    }
6177    let ignore_len = context.ignores.patterns.len();
6178    let entries = read_dir_entries_with_ignore_patterns(
6179        dir,
6180        dir_git_path,
6181        context.ignores,
6182        context.profile.as_deref_mut(),
6183    )?;
6184    let result = (|| -> Result<()> {
6185        let mut git_path = dir_git_path.to_vec();
6186        for entry in entries {
6187            let file_name = entry.file_name();
6188            if file_name == std::ffi::OsStr::new(".git") {
6189                continue;
6190            }
6191            let path_len = git_path_push_component(&mut git_path, &file_name);
6192            let entry_result = (|| -> Result<()> {
6193                if let Some(tracked_kind) = context.tracked.tracked_kind(&git_path) {
6194                    if let Some(profile) = context.profile.as_deref_mut() {
6195                        profile.tracked_exact_hits += 1;
6196                    }
6197                    if !matches!(context.untracked_mode, StatusUntrackedMode::All)
6198                        || tracked_kind == StatusTrackedKind::Gitlink
6199                    {
6200                        return Ok(());
6201                    }
6202                    if let Some(profile) = context.profile.as_deref_mut() {
6203                        profile.file_type_calls += 1;
6204                    }
6205                    let file_type = entry.file_type()?;
6206                    if file_type.is_dir() {
6207                        let path = entry.path();
6208                        if !is_same_path(&path, context.git_dir) {
6209                            count_status_untracked_paths(context, &path, &git_path, count)?;
6210                        }
6211                    }
6212                    return Ok(());
6213                }
6214                if let Some(profile) = context.profile.as_deref_mut() {
6215                    profile.file_type_calls += 1;
6216                }
6217                let file_type = entry.file_type()?;
6218                let is_dir = file_type.is_dir();
6219                if file_type.is_file() || file_type.is_symlink() {
6220                    if !context.ignores.is_ignored_profiled(
6221                        &git_path,
6222                        false,
6223                        context.profile.as_deref_mut(),
6224                    ) {
6225                        *count += 1;
6226                    }
6227                    return Ok(());
6228                } else if is_dir {
6229                    if context.ignores.is_ignored_profiled(
6230                        &git_path,
6231                        true,
6232                        context.profile.as_deref_mut(),
6233                    ) {
6234                        return Ok(());
6235                    }
6236                    let path = entry.path();
6237                    if is_same_path(&path, context.git_dir) {
6238                        return Ok(());
6239                    }
6240                    let tracked_directory = context.tracked.tracked_directory_kind(&git_path);
6241                    if let Some(directory_kind) = tracked_directory {
6242                        if let Some(profile) = context.profile.as_deref_mut() {
6243                            profile.tracked_dir_prefix_hits += 1;
6244                            if directory_kind == StatusTrackedDirectoryKind::TrackedExcluded {
6245                                profile.tracked_skip_worktree_prefix_hits += 1;
6246                            }
6247                        }
6248                    }
6249                    match context.untracked_mode {
6250                        StatusUntrackedMode::All => {
6251                            if tracked_directory.is_none() && is_nested_repository_boundary(&path) {
6252                                *count += 1;
6253                            } else {
6254                                count_status_untracked_paths(context, &path, &git_path, count)?;
6255                            }
6256                        }
6257                        StatusUntrackedMode::Normal => {
6258                            if tracked_directory.is_some() {
6259                                count_status_untracked_paths(context, &path, &git_path, count)?;
6260                            } else if is_nested_repository_boundary(&path)
6261                                || status_untracked_directory_has_file(context, &path, &git_path)?
6262                            {
6263                                *count += 1;
6264                            }
6265                        }
6266                        StatusUntrackedMode::None => {}
6267                    }
6268                }
6269                Ok(())
6270            })();
6271            git_path.truncate(path_len);
6272            entry_result?;
6273        }
6274        Ok(())
6275    })();
6276    context.ignores.truncate(ignore_len);
6277    result
6278}
6279
6280fn emit_status_untracked_path<T, F>(
6281    context: &mut StatusUntrackedWalk<'_, T>,
6282    path: &[u8],
6283    emit: &mut F,
6284) -> Result<StreamControl>
6285where
6286    T: StatusTrackedLookup + ?Sized,
6287    F: for<'a> FnMut(&'a [u8]) -> Result<StreamControl>,
6288{
6289    if let Some(profile) = context.profile.as_deref_mut() {
6290        profile.untracked_rows += 1;
6291    }
6292    emit(path)
6293}
6294
6295fn stage0_tracked_directories(index: &Index) -> HashSet<&[u8]> {
6296    let mut directories = HashSet::new();
6297    for entry in index
6298        .entries
6299        .iter()
6300        .filter(|entry| entry.stage() == Stage::Normal)
6301    {
6302        let path = entry.path.as_bytes();
6303        for (idx, byte) in path.iter().enumerate() {
6304            if *byte == b'/' && idx > 0 {
6305                directories.insert(&path[..idx]);
6306            }
6307        }
6308    }
6309    directories
6310}
6311
6312fn status_untracked_directory_has_file<T: StatusTrackedLookup + ?Sized>(
6313    context: &mut StatusUntrackedWalk<'_, T>,
6314    dir: &Path,
6315    dir_git_path: &[u8],
6316) -> Result<bool> {
6317    if is_same_path(dir, context.git_dir) {
6318        return Ok(false);
6319    }
6320    let ignore_len = context.ignores.patterns.len();
6321    let entries = read_dir_entries_with_ignore_patterns(
6322        dir,
6323        dir_git_path,
6324        context.ignores,
6325        context.profile.as_deref_mut(),
6326    )?;
6327    let result = (|| -> Result<bool> {
6328        let mut git_path = dir_git_path.to_vec();
6329        for entry in entries {
6330            let file_name = entry.file_name();
6331            if file_name == std::ffi::OsStr::new(".git") {
6332                continue;
6333            }
6334            let path_len = git_path_push_component(&mut git_path, &file_name);
6335            let entry_result = (|| -> Result<Option<bool>> {
6336                if let Some(profile) = context.profile.as_deref_mut() {
6337                    profile.file_type_calls += 1;
6338                }
6339                let file_type = entry.file_type()?;
6340                let is_dir = file_type.is_dir();
6341                if context.ignores.is_ignored_profiled(
6342                    &git_path,
6343                    is_dir,
6344                    context.profile.as_deref_mut(),
6345                ) {
6346                    return Ok(None);
6347                }
6348                if file_type.is_file() || file_type.is_symlink() {
6349                    return Ok(Some(true));
6350                }
6351                if is_dir {
6352                    let path = entry.path();
6353                    if is_same_path(&path, context.git_dir) {
6354                        return Ok(None);
6355                    }
6356                    if is_nested_repository_boundary(&path) {
6357                        return Ok(Some(true));
6358                    }
6359                    if status_untracked_directory_has_file(context, &path, &git_path)? {
6360                        return Ok(Some(true));
6361                    }
6362                }
6363                Ok(None)
6364            })();
6365            git_path.truncate(path_len);
6366            if let Some(has_file) = entry_result? {
6367                return Ok(has_file);
6368            }
6369        }
6370        Ok(false)
6371    })();
6372    context.ignores.truncate(ignore_len);
6373    result
6374}
6375
6376fn read_dir_entries_with_ignore_patterns(
6377    dir: &Path,
6378    base: &[u8],
6379    matcher: &mut IgnoreMatcher,
6380    mut profile: Option<&mut StatusProfileCounters>,
6381) -> Result<Vec<fs::DirEntry>> {
6382    let mut entries = Vec::new();
6383    let mut ignore_path = None;
6384    if let Some(profile) = profile.as_deref_mut() {
6385        profile.read_dir_calls += 1;
6386    }
6387    for entry in fs::read_dir(dir)? {
6388        let entry = entry?;
6389        if let Some(profile) = profile.as_deref_mut() {
6390            profile.dir_entries_seen += 1;
6391        }
6392        if entry.file_name() == std::ffi::OsStr::new(".gitignore") {
6393            ignore_path = Some(entry.path());
6394        }
6395        entries.push(entry);
6396    }
6397    if let Some(path) = ignore_path {
6398        let mut source = base.to_vec();
6399        if !source.is_empty() {
6400            source.push(b'/');
6401        }
6402        source.extend_from_slice(b".gitignore");
6403        read_ignore_patterns_into_matcher(path, matcher, base, &source);
6404    }
6405    Ok(entries)
6406}
6407
6408fn push_untracked_directory(paths: &mut Vec<Vec<u8>>, git_path: &[u8]) {
6409    paths.push(untracked_directory_path(git_path));
6410}
6411
6412fn untracked_directory_path(git_path: &[u8]) -> Vec<u8> {
6413    let mut directory = git_path.to_vec();
6414    if directory.last() != Some(&b'/') {
6415        directory.push(b'/');
6416    }
6417    directory
6418}
6419
6420fn untracked_normal_rollup_path(
6421    file_path: &[u8],
6422    index: &BTreeMap<Vec<u8>, TrackedEntry>,
6423    ignores: &IgnoreMatcher,
6424) -> Vec<u8> {
6425    let segments = file_path
6426        .split(|byte| *byte == b'/')
6427        .filter(|segment| !segment.is_empty())
6428        .collect::<Vec<_>>();
6429    if segments.len() <= 1 {
6430        return file_path.to_vec();
6431    }
6432    let mut prefix = Vec::new();
6433    for segment in &segments[..segments.len() - 1] {
6434        if !prefix.is_empty() {
6435            prefix.push(b'/');
6436        }
6437        prefix.extend_from_slice(segment);
6438        if index_has_path_under(index, &prefix) {
6439            break;
6440        }
6441        if !ignores.is_ignored(&prefix, true) {
6442            let mut directory = prefix;
6443            directory.push(b'/');
6444            return directory;
6445        }
6446    }
6447    file_path.to_vec()
6448}
6449
6450fn ignored_traditional_rollup_path(
6451    root: &Path,
6452    git_dir: &Path,
6453    path: &[u8],
6454    index: &BTreeMap<Vec<u8>, TrackedEntry>,
6455    ignores: &IgnoreMatcher,
6456) -> Result<Vec<u8>> {
6457    let rolled = untracked_normal_rollup_path(path, index, ignores);
6458    if rolled == path {
6459        return Ok(rolled);
6460    }
6461    let Some(directory_path) = rolled.strip_suffix(b"/") else {
6462        return Ok(rolled);
6463    };
6464    if ignores.is_ignored(directory_path, true) {
6465        return Ok(rolled);
6466    }
6467    let mut absolute = PathBuf::new();
6468    set_worktree_path_from_repo_path(root, directory_path, &mut absolute)?;
6469    if directory_has_file(&absolute, root, git_dir, ignores)? {
6470        return Ok(path.to_vec());
6471    }
6472    Ok(rolled)
6473}
6474
6475fn directory_has_file(
6476    dir: &Path,
6477    root: &Path,
6478    git_dir: &Path,
6479    ignores: &IgnoreMatcher,
6480) -> Result<bool> {
6481    if is_same_path(dir, git_dir) {
6482        return Ok(false);
6483    }
6484    for entry in fs::read_dir(dir)? {
6485        let entry = entry?;
6486        let path = entry.path();
6487        if is_dot_git_entry(&path) {
6488            continue;
6489        }
6490        if is_embedded_git_internals(root, &path) {
6491            continue;
6492        }
6493        if is_same_path(&path, git_dir) {
6494            continue;
6495        }
6496        let metadata = entry.metadata()?;
6497        let relative = path.strip_prefix(root).map_err(|_| {
6498            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
6499        })?;
6500        let git_path = git_path_bytes(relative)?;
6501        if ignores.is_ignored(&git_path, metadata.is_dir()) {
6502            continue;
6503        }
6504        if metadata.is_file() || metadata.file_type().is_symlink() {
6505            return Ok(true);
6506        }
6507        if metadata.is_dir() {
6508            if is_nested_repository_boundary(&path) {
6509                continue;
6510            }
6511            if directory_has_file(&path, root, git_dir, ignores)? {
6512                return Ok(true);
6513            }
6514        }
6515    }
6516    Ok(false)
6517}
6518
6519fn directory_has_ignored(
6520    dir: &Path,
6521    root: &Path,
6522    git_dir: &Path,
6523    ignores: &IgnoreMatcher,
6524) -> Result<bool> {
6525    if is_same_path(dir, git_dir) {
6526        return Ok(false);
6527    }
6528    for entry in fs::read_dir(dir)? {
6529        let entry = entry?;
6530        let path = entry.path();
6531        if is_dot_git_entry(&path) {
6532            continue;
6533        }
6534        if is_same_path(&path, git_dir) {
6535            continue;
6536        }
6537        let metadata = entry.metadata()?;
6538        let relative = path.strip_prefix(root).map_err(|_| {
6539            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
6540        })?;
6541        let git_path = git_path_bytes(relative)?;
6542        if ignores.is_ignored(&git_path, metadata.is_dir()) {
6543            return Ok(true);
6544        }
6545        if metadata.is_dir() && directory_has_ignored(&path, root, git_dir, ignores)? {
6546            return Ok(true);
6547        }
6548    }
6549    Ok(false)
6550}
6551
6552fn ignored_untracked_paths(
6553    root: &Path,
6554    git_dir: &Path,
6555    index: &BTreeMap<Vec<u8>, TrackedEntry>,
6556    ignores: &IgnoreMatcher,
6557    directory: bool,
6558) -> Result<Vec<Vec<u8>>> {
6559    let mut paths = BTreeSet::new();
6560    let context = IgnoredUntrackedContext {
6561        root,
6562        git_dir,
6563        index,
6564        ignores,
6565        directory,
6566    };
6567    collect_ignored_untracked_paths(&context, root, false, &mut paths)?;
6568    Ok(paths.into_iter().collect())
6569}
6570
6571fn ignored_traditional_path_is_empty_directory(root: &Path, path: &[u8]) -> Result<bool> {
6572    let Some(path) = path.strip_suffix(b"/") else {
6573        return Ok(false);
6574    };
6575    let mut absolute = PathBuf::new();
6576    set_worktree_path_from_repo_path(root, path, &mut absolute)?;
6577    match fs::read_dir(&absolute) {
6578        Ok(mut entries) => Ok(entries.next().is_none()),
6579        Err(err) if err.kind() == std::io::ErrorKind::NotADirectory => Ok(false),
6580        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
6581        Err(err) => Err(err.into()),
6582    }
6583}
6584
6585struct IgnoredUntrackedContext<'a> {
6586    root: &'a Path,
6587    git_dir: &'a Path,
6588    index: &'a BTreeMap<Vec<u8>, TrackedEntry>,
6589    ignores: &'a IgnoreMatcher,
6590    directory: bool,
6591}
6592
6593fn collect_ignored_untracked_paths(
6594    context: &IgnoredUntrackedContext<'_>,
6595    dir: &Path,
6596    parent_ignored: bool,
6597    paths: &mut BTreeSet<Vec<u8>>,
6598) -> Result<()> {
6599    if is_same_path(dir, context.git_dir) {
6600        return Ok(());
6601    }
6602    let mut entries = fs::read_dir(dir)?.collect::<std::result::Result<Vec<_>, _>>()?;
6603    entries.sort_by_key(|entry| entry.file_name());
6604    for entry in entries {
6605        let path = entry.path();
6606        if is_dot_git_entry(&path) {
6607            continue;
6608        }
6609        if is_same_path(&path, context.git_dir) {
6610            continue;
6611        }
6612        let metadata = entry.metadata()?;
6613        let relative = path.strip_prefix(context.root).map_err(|_| {
6614            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
6615        })?;
6616        let git_path = git_path_bytes(relative)?;
6617        if metadata.is_dir() {
6618            let ignored = parent_ignored || context.ignores.is_ignored(&git_path, true);
6619            if ignored && !index_has_path_under(context.index, &git_path) {
6620                if context.directory {
6621                    let mut directory_path = git_path;
6622                    directory_path.push(b'/');
6623                    paths.insert(directory_path);
6624                } else {
6625                    collect_ignored_untracked_paths(context, &path, true, paths)?;
6626                }
6627            } else {
6628                if is_nested_repository_boundary(&path) {
6629                    continue;
6630                }
6631                collect_ignored_untracked_paths(context, &path, ignored, paths)?;
6632            }
6633        } else if !context.index.contains_key(&git_path)
6634            && (metadata.is_file() || metadata.file_type().is_symlink())
6635            && (parent_ignored || context.ignores.is_ignored(&git_path, false))
6636        {
6637            paths.insert(git_path);
6638        }
6639    }
6640    Ok(())
6641}
6642
6643#[derive(Debug, Default)]
6644struct IgnoreMatcher {
6645    patterns: Vec<IgnorePattern>,
6646    buckets: IgnorePatternBuckets,
6647}
6648
6649#[derive(Debug, Default)]
6650struct IgnorePatternBuckets {
6651    literal_basename: HashMap<Vec<u8>, Vec<usize>>,
6652    directory_literal_basename: HashMap<Vec<u8>, Vec<usize>>,
6653    literal_path_basename: HashMap<Vec<u8>, Vec<usize>>,
6654    directory_literal_path_basename: HashMap<Vec<u8>, Vec<usize>>,
6655    path_suffix_basename: HashMap<Vec<u8>, Vec<usize>>,
6656    directory_path_suffix_basename: HashMap<Vec<u8>, Vec<usize>>,
6657    glob_path_literal_basename: HashMap<Vec<u8>, Vec<usize>>,
6658    glob_directory_literal_basename: HashMap<Vec<u8>, Vec<usize>>,
6659    glob_path_suffix_basename: Vec<usize>,
6660    glob_path_prefix_basename: Vec<usize>,
6661    glob_directory_suffix_basename: Vec<usize>,
6662    glob_directory_prefix_basename: Vec<usize>,
6663    suffix_basename: HashMap<u8, Vec<usize>>,
6664    prefix_basename: HashMap<u8, Vec<usize>>,
6665    other: Vec<usize>,
6666}
6667
6668impl IgnorePatternBuckets {
6669    fn push(&mut self, index: usize, pattern: &IgnorePattern) {
6670        match pattern.bucket_kind() {
6671            IgnoreBucketKind::LiteralBasename => self
6672                .literal_basename
6673                .entry(pattern.pattern.clone())
6674                .or_default()
6675                .push(index),
6676            IgnoreBucketKind::DirectoryLiteralBasename => self
6677                .directory_literal_basename
6678                .entry(pattern.pattern.clone())
6679                .or_default()
6680                .push(index),
6681            IgnoreBucketKind::LiteralPathBasename => self
6682                .literal_path_basename
6683                .entry(path_basename(&pattern.pattern).to_vec())
6684                .or_default()
6685                .push(index),
6686            IgnoreBucketKind::DirectoryLiteralPathBasename => self
6687                .directory_literal_path_basename
6688                .entry(path_basename(&pattern.pattern).to_vec())
6689                .or_default()
6690                .push(index),
6691            IgnoreBucketKind::PathSuffixBasename => {
6692                let suffix = pattern
6693                    .pattern
6694                    .strip_prefix(b"**/")
6695                    .unwrap_or(&pattern.pattern);
6696                self.path_suffix_basename
6697                    .entry(path_basename(suffix).to_vec())
6698                    .or_default()
6699                    .push(index);
6700            }
6701            IgnoreBucketKind::DirectoryPathSuffixBasename => {
6702                let suffix = pattern
6703                    .pattern
6704                    .strip_prefix(b"**/")
6705                    .unwrap_or(&pattern.pattern);
6706                self.directory_path_suffix_basename
6707                    .entry(path_basename(suffix).to_vec())
6708                    .or_default()
6709                    .push(index);
6710            }
6711            IgnoreBucketKind::GlobPathLiteralBasename => self
6712                .glob_path_literal_basename
6713                .entry(path_basename(&pattern.pattern).to_vec())
6714                .or_default()
6715                .push(index),
6716            IgnoreBucketKind::GlobDirectoryLiteralBasename => self
6717                .glob_directory_literal_basename
6718                .entry(path_basename(&pattern.pattern).to_vec())
6719                .or_default()
6720                .push(index),
6721            IgnoreBucketKind::GlobPathSuffixBasename => self.glob_path_suffix_basename.push(index),
6722            IgnoreBucketKind::GlobPathPrefixBasename => self.glob_path_prefix_basename.push(index),
6723            IgnoreBucketKind::GlobDirectorySuffixBasename => {
6724                self.glob_directory_suffix_basename.push(index)
6725            }
6726            IgnoreBucketKind::GlobDirectoryPrefixBasename => {
6727                self.glob_directory_prefix_basename.push(index)
6728            }
6729            IgnoreBucketKind::SuffixBasename => self
6730                .suffix_basename
6731                .entry(*pattern.pattern.last().expect("suffix literal is non-empty"))
6732                .or_default()
6733                .push(index),
6734            IgnoreBucketKind::PrefixBasename => self
6735                .prefix_basename
6736                .entry(pattern.pattern[0])
6737                .or_default()
6738                .push(index),
6739            IgnoreBucketKind::Other => self.other.push(index),
6740        }
6741    }
6742
6743    fn truncate(&mut self, len: usize) {
6744        fn truncate_indices(indices: &mut Vec<usize>, len: usize) {
6745            let keep = indices.partition_point(|index| *index < len);
6746            indices.truncate(keep);
6747        }
6748        for indices in self.literal_basename.values_mut() {
6749            truncate_indices(indices, len);
6750        }
6751        for indices in self.directory_literal_basename.values_mut() {
6752            truncate_indices(indices, len);
6753        }
6754        for indices in self.literal_path_basename.values_mut() {
6755            truncate_indices(indices, len);
6756        }
6757        for indices in self.directory_literal_path_basename.values_mut() {
6758            truncate_indices(indices, len);
6759        }
6760        for indices in self.path_suffix_basename.values_mut() {
6761            truncate_indices(indices, len);
6762        }
6763        for indices in self.directory_path_suffix_basename.values_mut() {
6764            truncate_indices(indices, len);
6765        }
6766        for indices in self.glob_path_literal_basename.values_mut() {
6767            truncate_indices(indices, len);
6768        }
6769        for indices in self.glob_directory_literal_basename.values_mut() {
6770            truncate_indices(indices, len);
6771        }
6772        truncate_indices(&mut self.glob_path_suffix_basename, len);
6773        truncate_indices(&mut self.glob_path_prefix_basename, len);
6774        truncate_indices(&mut self.glob_directory_suffix_basename, len);
6775        truncate_indices(&mut self.glob_directory_prefix_basename, len);
6776        for indices in self.suffix_basename.values_mut() {
6777            truncate_indices(indices, len);
6778        }
6779        for indices in self.prefix_basename.values_mut() {
6780            truncate_indices(indices, len);
6781        }
6782        truncate_indices(&mut self.other, len);
6783    }
6784}
6785
6786#[derive(Debug)]
6787struct IgnorePattern {
6788    base: Vec<u8>,
6789    pattern: Vec<u8>,
6790    original: Vec<u8>,
6791    source: Vec<u8>,
6792    line_number: usize,
6793    negated: bool,
6794    directory_only: bool,
6795    anchored: bool,
6796    has_slash: bool,
6797    /// How `pattern` should be matched against a slash-free segment. Most
6798    /// `.gitignore` entries are literals or simple `*.ext` / `prefix*` globs, all
6799    /// of which match without the allocating wildcard DP engine; only genuinely
6800    /// complex globs fall through to [`wildcard_path_matches`].
6801    match_kind: MatchKind,
6802    glob_literal_prefix_len: usize,
6803}
6804
6805/// Classification of an [`IgnorePattern`] that lets common shapes skip the
6806/// general wildcard matcher. Literal/prefix/suffix variants match a slash-free
6807/// segment; [`MatchKind::PathSuffix`] handles the common `**/literal/path`
6808/// shape, and the remaining complex patterns defer to the full engine.
6809#[derive(Debug, Clone, Copy, PartialEq, Eq)]
6810enum MatchKind {
6811    /// No metacharacters: matches by byte equality.
6812    Literal,
6813    /// `*X` with `X` literal: matches a segment ending in `X`.
6814    Suffix,
6815    /// `X*` with `X` literal: matches a segment starting with `X`.
6816    Prefix,
6817    /// `**/X/Y` with a literal suffix: matches a path ending at `X/Y`.
6818    PathSuffix,
6819    /// Anything else: defer to [`wildcard_path_matches`].
6820    Glob,
6821}
6822
6823fn path_basename(path: &[u8]) -> &[u8] {
6824    path.rsplit(|byte| *byte == b'/').next().unwrap_or(path)
6825}
6826
6827fn path_component_has_glob_meta(component: &[u8]) -> bool {
6828    component
6829        .iter()
6830        .any(|byte| matches!(byte, b'*' | b'?' | b'[' | b'\\'))
6831}
6832
6833fn final_component_match_kind(pattern: &[u8]) -> MatchKind {
6834    classify_ignore_pattern(path_basename(pattern))
6835}
6836
6837fn visit_directory_match_components(
6838    path: &[u8],
6839    is_dir: bool,
6840    mut visit: impl FnMut(&[u8]),
6841) {
6842    let mut start = 0usize;
6843    for (index, byte) in path.iter().enumerate() {
6844        if *byte == b'/' {
6845            if index > start {
6846                visit(&path[start..index]);
6847            }
6848            start = index + 1;
6849        }
6850    }
6851    if is_dir && start < path.len() {
6852        visit(&path[start..]);
6853    }
6854}
6855
6856#[derive(Debug, Clone, Copy, PartialEq, Eq)]
6857enum IgnoreBucketKind {
6858    LiteralBasename,
6859    DirectoryLiteralBasename,
6860    LiteralPathBasename,
6861    DirectoryLiteralPathBasename,
6862    PathSuffixBasename,
6863    DirectoryPathSuffixBasename,
6864    GlobPathLiteralBasename,
6865    GlobDirectoryLiteralBasename,
6866    GlobPathSuffixBasename,
6867    GlobPathPrefixBasename,
6868    GlobDirectorySuffixBasename,
6869    GlobDirectoryPrefixBasename,
6870    SuffixBasename,
6871    PrefixBasename,
6872    Other,
6873}
6874
6875/// Classify `pattern` for [`MatchKind`]. `*X`/`X*` fast paths require the literal
6876/// part to be slash-free so that `ends_with`/`starts_with` on a single segment is
6877/// exactly equivalent to the glob (`*` never crosses `/`).
6878fn classify_ignore_pattern(pattern: &[u8]) -> MatchKind {
6879    if let Some(suffix) = pattern.strip_prefix(b"**/")
6880        && !suffix.is_empty()
6881        && !suffix
6882            .iter()
6883            .any(|byte| matches!(byte, b'*' | b'?' | b'[' | b'\\'))
6884    {
6885        return MatchKind::PathSuffix;
6886    }
6887    let stars = pattern.iter().filter(|byte| **byte == b'*').count();
6888    let other_meta = pattern
6889        .iter()
6890        .any(|byte| matches!(byte, b'?' | b'[' | b'\\'));
6891    if stars == 0 && !other_meta {
6892        return MatchKind::Literal;
6893    }
6894    if stars == 1 && !other_meta {
6895        let literal = if pattern.first() == Some(&b'*') {
6896            Some((&pattern[1..], MatchKind::Suffix))
6897        } else if pattern.last() == Some(&b'*') {
6898            Some((&pattern[..pattern.len() - 1], MatchKind::Prefix))
6899        } else {
6900            None
6901        };
6902        if let Some((literal, kind)) = literal
6903            && !literal.is_empty()
6904            && !literal.contains(&b'/')
6905        {
6906            return kind;
6907        }
6908    }
6909    MatchKind::Glob
6910}
6911
6912impl IgnoreMatcher {
6913    fn from_sources(
6914        root: &Path,
6915        exclude_standard: bool,
6916        patterns: &[Vec<u8>],
6917        per_directory: &[String],
6918    ) -> Result<Self> {
6919        let mut matcher = if exclude_standard {
6920            Self::from_worktree_root(root)?
6921        } else {
6922            Self::default()
6923        };
6924        matcher.extend_patterns(patterns);
6925        matcher.extend_per_directory_patterns(root, per_directory)?;
6926        Ok(matcher)
6927    }
6928
6929    /// Builds only the repository-wide ignore sources — `core.excludesFile` (or the
6930    /// default global) and `$GIT_DIR/info/exclude` — *without* walking the worktree
6931    /// for `.gitignore`. The caller folds each directory's `.gitignore` into the
6932    /// matcher as it descends (see [`read_dir_ignore_patterns`]), so status reads
6933    /// the tree exactly once instead of doing a separate full-tree ignore pass.
6934    fn from_worktree_base(root: &Path) -> Result<Self> {
6935        let mut matcher = Self::default();
6936        read_ignore_patterns(
6937            root.join(".git").join("info").join("exclude"),
6938            &mut matcher.patterns,
6939            &[],
6940            b".git/info/exclude",
6941        );
6942        if !read_core_excludes_file(root, &mut matcher.patterns) {
6943            read_default_global_excludes_file(&mut matcher.patterns);
6944        }
6945        matcher.rebuild_buckets();
6946        Ok(matcher)
6947    }
6948
6949    fn from_worktree_root(root: &Path) -> Result<Self> {
6950        let mut matcher = Self::default();
6951        read_ignore_patterns(
6952            root.join(".git").join("info").join("exclude"),
6953            &mut matcher.patterns,
6954            &[],
6955            b".git/info/exclude",
6956        );
6957        if !read_core_excludes_file(root, &mut matcher.patterns) {
6958            read_default_global_excludes_file(&mut matcher.patterns);
6959        }
6960        collect_per_directory_patterns(
6961            root,
6962            root,
6963            &[String::from(".gitignore")],
6964            &mut matcher.patterns,
6965        )?;
6966        matcher.rebuild_buckets();
6967        Ok(matcher)
6968    }
6969
6970    fn extend_patterns(&mut self, patterns: &[Vec<u8>]) {
6971        for pattern in patterns {
6972            self.push_raw_pattern(pattern, &[], &[], 0);
6973        }
6974    }
6975
6976    fn extend_per_directory_patterns(&mut self, root: &Path, names: &[String]) -> Result<()> {
6977        if names.is_empty() {
6978            return Ok(());
6979        }
6980        collect_per_directory_patterns(root, root, names, &mut self.patterns)?;
6981        self.rebuild_buckets();
6982        Ok(())
6983    }
6984
6985    fn is_ignored(&self, path: &[u8], is_dir: bool) -> bool {
6986        self.is_ignored_profiled(path, is_dir, None)
6987    }
6988
6989    fn match_for(&self, path: &[u8], is_dir: bool) -> Option<&IgnorePattern> {
6990        self.match_index_for(path, is_dir, None)
6991            .and_then(|index| self.patterns.get(index))
6992    }
6993
6994    fn is_ignored_profiled(
6995        &self,
6996        path: &[u8],
6997        is_dir: bool,
6998        mut profile: Option<&mut StatusProfileCounters>,
6999    ) -> bool {
7000        if let Some(profile) = profile.as_deref_mut() {
7001            profile.ignore_checks += 1;
7002        }
7003        self.match_index_for(path, is_dir, profile)
7004            .is_some_and(|index| !self.patterns[index].negated)
7005    }
7006
7007    fn match_index_for(
7008        &self,
7009        path: &[u8],
7010        is_dir: bool,
7011        mut profile: Option<&mut StatusProfileCounters>,
7012    ) -> Option<usize> {
7013        let basename = path_basename(path);
7014        let mut best = None;
7015        if let Some(indices) = self.buckets.literal_basename.get(basename) {
7016            self.match_bucket_candidates(indices, path, basename, is_dir, &mut best, &mut profile);
7017        }
7018        if let Some(indices) = self.buckets.literal_path_basename.get(basename) {
7019            self.match_bucket_candidates(indices, path, basename, is_dir, &mut best, &mut profile);
7020        }
7021        if let Some(indices) = self.buckets.path_suffix_basename.get(basename) {
7022            self.match_bucket_candidates(indices, path, basename, is_dir, &mut best, &mut profile);
7023        }
7024        if let Some(indices) = self.buckets.glob_path_literal_basename.get(basename) {
7025            self.match_bucket_candidates(indices, path, basename, is_dir, &mut best, &mut profile);
7026        }
7027        self.match_final_component_candidates(
7028            &self.buckets.glob_path_suffix_basename,
7029            MatchKind::Suffix,
7030            basename,
7031            path,
7032            basename,
7033            is_dir,
7034            &mut best,
7035            &mut profile,
7036        );
7037        self.match_final_component_candidates(
7038            &self.buckets.glob_path_prefix_basename,
7039            MatchKind::Prefix,
7040            basename,
7041            path,
7042            basename,
7043            is_dir,
7044            &mut best,
7045            &mut profile,
7046        );
7047        visit_directory_match_components(path, is_dir, |component| {
7048            if let Some(indices) = self.buckets.directory_literal_basename.get(component) {
7049                self.match_bucket_candidates(
7050                    indices,
7051                    path,
7052                    basename,
7053                    is_dir,
7054                    &mut best,
7055                    &mut profile,
7056                );
7057            }
7058            if let Some(indices) = self
7059                .buckets
7060                .directory_literal_path_basename
7061                .get(component)
7062            {
7063                self.match_bucket_candidates(
7064                    indices,
7065                    path,
7066                    basename,
7067                    is_dir,
7068                    &mut best,
7069                    &mut profile,
7070                );
7071            }
7072            if let Some(indices) = self.buckets.directory_path_suffix_basename.get(component) {
7073                self.match_bucket_candidates(
7074                    indices,
7075                    path,
7076                    basename,
7077                    is_dir,
7078                    &mut best,
7079                    &mut profile,
7080                );
7081            }
7082            if let Some(indices) = self.buckets.glob_directory_literal_basename.get(component) {
7083                self.match_bucket_candidates(
7084                    indices,
7085                    path,
7086                    basename,
7087                    is_dir,
7088                    &mut best,
7089                    &mut profile,
7090                );
7091            }
7092            self.match_final_component_candidates(
7093                &self.buckets.glob_directory_suffix_basename,
7094                MatchKind::Suffix,
7095                component,
7096                path,
7097                basename,
7098                is_dir,
7099                &mut best,
7100                &mut profile,
7101            );
7102            self.match_final_component_candidates(
7103                &self.buckets.glob_directory_prefix_basename,
7104                MatchKind::Prefix,
7105                component,
7106                path,
7107                basename,
7108                is_dir,
7109                &mut best,
7110                &mut profile,
7111            );
7112        });
7113        if let Some(last) = basename.last()
7114            && let Some(indices) = self.buckets.suffix_basename.get(last)
7115        {
7116            self.match_bucket_candidates(indices, path, basename, is_dir, &mut best, &mut profile);
7117        }
7118        if let Some(first) = basename.first()
7119            && let Some(indices) = self.buckets.prefix_basename.get(first)
7120        {
7121            self.match_bucket_candidates(indices, path, basename, is_dir, &mut best, &mut profile);
7122        }
7123        self.match_bucket_candidates(
7124            &self.buckets.other,
7125            path,
7126            basename,
7127            is_dir,
7128            &mut best,
7129            &mut profile,
7130        );
7131        best
7132    }
7133
7134    fn match_bucket_candidates(
7135        &self,
7136        indices: &[usize],
7137        path: &[u8],
7138        basename: &[u8],
7139        is_dir: bool,
7140        best: &mut Option<usize>,
7141        profile: &mut Option<&mut StatusProfileCounters>,
7142    ) {
7143        for &index in indices.iter().rev() {
7144            if best.is_some_and(|best| index <= best) {
7145                break;
7146            }
7147            let pattern = &self.patterns[index];
7148            if !pattern.base_matches(path) {
7149                continue;
7150            }
7151            if !pattern.glob_literal_prefix_matches(path, basename, is_dir) {
7152                continue;
7153            }
7154            if let Some(profile) = profile.as_deref_mut() {
7155                profile.ignore_pattern_tests += 1;
7156                if pattern.match_kind == MatchKind::Glob {
7157                    profile.ignore_glob_fallback_tests += 1;
7158                }
7159            }
7160            if pattern.matches_with_basename(path, basename, is_dir) {
7161                *best = Some(index);
7162                break;
7163            }
7164        }
7165    }
7166
7167    fn match_final_component_candidates(
7168        &self,
7169        indices: &[usize],
7170        kind: MatchKind,
7171        component: &[u8],
7172        path: &[u8],
7173        basename: &[u8],
7174        is_dir: bool,
7175        best: &mut Option<usize>,
7176        profile: &mut Option<&mut StatusProfileCounters>,
7177    ) {
7178        for &index in indices.iter().rev() {
7179            if best.is_some_and(|best| index <= best) {
7180                break;
7181            }
7182            let pattern = &self.patterns[index];
7183            if !pattern.base_matches(path) {
7184                continue;
7185            }
7186            let final_component = path_basename(&pattern.pattern);
7187            let candidate = match kind {
7188                MatchKind::Suffix => component.ends_with(&final_component[1..]),
7189                MatchKind::Prefix => {
7190                    component.starts_with(&final_component[..final_component.len() - 1])
7191                }
7192                _ => false,
7193            };
7194            if !candidate {
7195                continue;
7196            }
7197            if !pattern.glob_literal_prefix_matches(path, basename, is_dir) {
7198                continue;
7199            }
7200            if let Some(profile) = profile.as_deref_mut() {
7201                profile.ignore_pattern_tests += 1;
7202                if pattern.match_kind == MatchKind::Glob {
7203                    profile.ignore_glob_fallback_tests += 1;
7204                }
7205            }
7206            if pattern.matches_with_basename(path, basename, is_dir) {
7207                *best = Some(index);
7208                break;
7209            }
7210        }
7211    }
7212
7213    fn push_pattern(&mut self, pattern: IgnorePattern) {
7214        let index = self.patterns.len();
7215        self.buckets.push(index, &pattern);
7216        self.patterns.push(pattern);
7217    }
7218
7219    fn push_raw_pattern(&mut self, raw: &[u8], base: &[u8], source: &[u8], line_number: usize) {
7220        if let Some(pattern) = parse_ignore_pattern(raw, base, source, line_number) {
7221            self.push_pattern(pattern);
7222        }
7223    }
7224
7225    fn truncate(&mut self, len: usize) {
7226        if self.patterns.len() == len {
7227            return;
7228        }
7229        self.patterns.truncate(len);
7230        self.buckets.truncate(len);
7231    }
7232
7233    fn rebuild_buckets(&mut self) {
7234        let mut buckets = IgnorePatternBuckets::default();
7235        for (index, pattern) in self.patterns.iter().enumerate() {
7236            buckets.push(index, pattern);
7237        }
7238        self.buckets = buckets;
7239    }
7240}
7241
7242/// Decides whether a worktree path is included by a [`SparseCheckout`].
7243///
7244/// In [`SparseCheckoutMode::Full`] the sparse patterns are compiled with the
7245/// same `.gitignore` grammar used elsewhere in this crate ([`IgnorePattern`]);
7246/// a path is *in cone* when the last matching pattern is positive. In
7247/// [`SparseCheckoutMode::Cone`] the patterns are reduced to a set of recursive
7248/// directory prefixes plus a flag for whether top-level files are kept, and
7249/// inclusion is decided by literal prefix containment.
7250#[derive(Debug)]
7251enum SparseMatcher {
7252    Full { patterns: Vec<IgnorePattern> },
7253    Cone(ConeMatcher),
7254}
7255
7256#[derive(Debug, Default)]
7257struct ConeMatcher {
7258    /// `true` when files directly at the repository root are in cone (`/*`).
7259    root_files: bool,
7260    /// Directory prefixes (without leading or trailing `/`) whose entire
7261    /// subtree is in cone, e.g. `dir1/dir2`.
7262    recursive_dirs: Vec<Vec<u8>>,
7263    /// Parent directories that are in cone only for their direct files
7264    /// (the `/dir/*` guard Git emits so intermediate directories keep their
7265    /// own files). Stored without leading or trailing `/`.
7266    parent_dirs: Vec<Vec<u8>>,
7267}
7268
7269impl SparseMatcher {
7270    fn new(sparse: &SparseCheckout, mode: SparseCheckoutMode) -> Self {
7271        let resolved = match mode {
7272            SparseCheckoutMode::Auto => {
7273                if patterns_are_cone(&sparse.patterns) {
7274                    SparseCheckoutMode::Cone
7275                } else {
7276                    SparseCheckoutMode::Full
7277                }
7278            }
7279            other => other,
7280        };
7281        match resolved {
7282            SparseCheckoutMode::Cone => SparseMatcher::Cone(ConeMatcher::compile(&sparse.patterns)),
7283            // `Auto` has been resolved above; everything else is full matching.
7284            _ => {
7285                let mut patterns = Vec::new();
7286                for pattern in &sparse.patterns {
7287                    push_ignore_pattern(&mut patterns, pattern, &[], b"sparse-checkout", 0);
7288                }
7289                SparseMatcher::Full { patterns }
7290            }
7291        }
7292    }
7293
7294    /// Returns `true` when the given file path should be present in the
7295    /// worktree under this sparse specification.
7296    fn includes_file(&self, path: &[u8]) -> bool {
7297        match self {
7298            SparseMatcher::Full { patterns } => {
7299                let mut included = false;
7300                for pattern in patterns {
7301                    if pattern.matches(path, false) {
7302                        included = !pattern.negated;
7303                    }
7304                }
7305                included
7306            }
7307            SparseMatcher::Cone(cone) => cone.includes_file(path),
7308        }
7309    }
7310}
7311
7312impl ConeMatcher {
7313    fn compile(patterns: &[Vec<u8>]) -> Self {
7314        let mut matcher = ConeMatcher::default();
7315        for raw in patterns {
7316            let line = sparse_clean_line(raw);
7317            if line.is_empty() || line.starts_with(b"#") {
7318                continue;
7319            }
7320            // Negated guards such as `!/*/` and `!/dir/*/` only exist to stop a
7321            // recursive match from pulling in nested directories; the positive
7322            // patterns already capture the cone, so we ignore the negations.
7323            if line.starts_with(b"!") {
7324                continue;
7325            }
7326            if line == b"/*" {
7327                matcher.root_files = true;
7328                continue;
7329            }
7330            // `/dir/` -> recursive subtree.
7331            if let Some(rest) = line.strip_prefix(b"/")
7332                && let Some(dir) = rest.strip_suffix(b"/")
7333                && !dir.is_empty()
7334            {
7335                matcher.recursive_dirs.push(dir.to_vec());
7336                continue;
7337            }
7338            // `/dir/*` -> direct files of `dir` only (parent guard).
7339            if let Some(rest) = line.strip_prefix(b"/")
7340                && let Some(dir) = rest.strip_suffix(b"/*")
7341                && !dir.is_empty()
7342            {
7343                matcher.parent_dirs.push(dir.to_vec());
7344                continue;
7345            }
7346        }
7347        matcher
7348    }
7349
7350    fn includes_file(&self, path: &[u8]) -> bool {
7351        let parent = match path.iter().rposition(|byte| *byte == b'/') {
7352            Some(index) => &path[..index],
7353            None => {
7354                // A path with no slash is a top-level file.
7355                return self.root_files;
7356            }
7357        };
7358        if self
7359            .recursive_dirs
7360            .iter()
7361            .any(|dir| path_is_under_dir(path, dir))
7362        {
7363            return true;
7364        }
7365        self.parent_dirs.iter().any(|dir| dir.as_slice() == parent)
7366    }
7367}
7368
7369/// Strips a CR, leading/trailing whitespace, and an optional trailing slash is
7370/// preserved (cone patterns are slash sensitive) from a raw sparse line.
7371fn sparse_clean_line(raw: &[u8]) -> &[u8] {
7372    let line = raw.strip_suffix(b"\r").unwrap_or(raw);
7373    trim_ascii_whitespace(line)
7374}
7375
7376/// Returns `true` when `path` is the directory `dir` itself or lives anywhere
7377/// beneath it.
7378fn path_is_under_dir(path: &[u8], dir: &[u8]) -> bool {
7379    if dir.is_empty() {
7380        return true;
7381    }
7382    path.strip_prefix(dir)
7383        .is_some_and(|rest| rest.first() == Some(&b'/'))
7384}
7385
7386/// Heuristic used by [`SparseCheckoutMode::Auto`]: the pattern set is cone
7387/// shaped when every (non-comment, non-blank) line is one of the restricted
7388/// cone forms Git emits.
7389fn patterns_are_cone(patterns: &[Vec<u8>]) -> bool {
7390    let mut saw_pattern = false;
7391    for raw in patterns {
7392        let line = sparse_clean_line(raw);
7393        if line.is_empty() || line.starts_with(b"#") {
7394            continue;
7395        }
7396        saw_pattern = true;
7397        let body = line.strip_prefix(b"!").unwrap_or(line);
7398        let is_cone_shaped = body == b"/*"
7399            || body == b"/*/"
7400            || (body.starts_with(b"/")
7401                && (body.ends_with(b"/") || body.ends_with(b"/*"))
7402                && !sparse_has_glob_meta(body));
7403        if !is_cone_shaped {
7404            return false;
7405        }
7406    }
7407    saw_pattern
7408}
7409
7410/// Detects glob metacharacters that disqualify a line from cone interpretation.
7411/// A single trailing `/*` is allowed by the caller and handled separately.
7412fn sparse_has_glob_meta(body: &[u8]) -> bool {
7413    let trimmed = body.strip_suffix(b"/*").unwrap_or(body);
7414    trimmed
7415        .iter()
7416        .any(|byte| matches!(byte, b'*' | b'?' | b'[' | b']' | b'\\'))
7417}
7418
7419fn read_core_excludes_file(root: &Path, patterns: &mut Vec<IgnorePattern>) -> bool {
7420    let Ok(config) = sley_config::read_repo_config(&root.join(".git"), None) else {
7421        return false;
7422    };
7423    let Some(value) = config.get("core", None, "excludesFile") else {
7424        return false;
7425    };
7426    let path = expand_core_excludes_file(root, value);
7427    read_ignore_patterns(path, patterns, &[], value.as_bytes());
7428    true
7429}
7430
7431fn expand_core_excludes_file(root: &Path, value: &str) -> PathBuf {
7432    let path = Path::new(value);
7433    if path.is_absolute() {
7434        return path.to_path_buf();
7435    }
7436    if let Some(rest) = value.strip_prefix("~/")
7437        && let Some(home) = std::env::var_os("HOME")
7438    {
7439        return PathBuf::from(home).join(rest);
7440    }
7441    root.join(path)
7442}
7443
7444fn read_default_global_excludes_file(patterns: &mut Vec<IgnorePattern>) {
7445    if let Some(config_home) = std::env::var_os("XDG_CONFIG_HOME")
7446        && !config_home.is_empty()
7447    {
7448        let path = PathBuf::from(config_home).join("git").join("ignore");
7449        let source = path.to_string_lossy().into_owned();
7450        read_ignore_patterns(path, patterns, &[], source.as_bytes());
7451        return;
7452    }
7453    if let Some(home) = std::env::var_os("HOME") {
7454        let path = PathBuf::from(home)
7455            .join(".config")
7456            .join("git")
7457            .join("ignore");
7458        let source = path.to_string_lossy().into_owned();
7459        read_ignore_patterns(path, patterns, &[], source.as_bytes());
7460    }
7461}
7462
7463fn collect_per_directory_patterns(
7464    root: &Path,
7465    dir: &Path,
7466    names: &[String],
7467    patterns: &mut Vec<IgnorePattern>,
7468) -> Result<()> {
7469    let mut entries = fs::read_dir(dir)?.collect::<std::result::Result<Vec<_>, _>>()?;
7470    entries.sort_by_key(|entry| entry.file_name());
7471    for entry in entries {
7472        let path = entry.path();
7473        if path.file_name().and_then(|name| name.to_str()) == Some(".git") {
7474            continue;
7475        }
7476        let metadata = entry.metadata()?;
7477        if metadata.is_dir() {
7478            collect_per_directory_patterns(root, &path, names, patterns)?;
7479            continue;
7480        }
7481        if !metadata.is_file() {
7482            continue;
7483        }
7484        let Some(file_name) = path.file_name().and_then(|name| name.to_str()) else {
7485            continue;
7486        };
7487        if !names.iter().any(|name| name == file_name) {
7488            continue;
7489        }
7490        let parent = path.parent().unwrap_or(root);
7491        let relative = parent.strip_prefix(root).map_err(|_| {
7492            GitError::InvalidPath(format!("path {} is outside worktree", parent.display()))
7493        })?;
7494        let base = git_path_bytes(relative)?;
7495        let mut source = base.clone();
7496        if !source.is_empty() {
7497            source.push(b'/');
7498        }
7499        source.extend_from_slice(file_name.as_bytes());
7500        read_ignore_patterns(&path, patterns, &base, &source);
7501    }
7502    Ok(())
7503}
7504
7505fn read_ignore_patterns(
7506    path: impl AsRef<Path>,
7507    patterns: &mut Vec<IgnorePattern>,
7508    base: &[u8],
7509    source: &[u8],
7510) {
7511    let Ok(contents) = fs::read(path) else {
7512        return;
7513    };
7514    for (line, raw) in contents.split(|byte| *byte == b'\n').enumerate() {
7515        push_ignore_pattern(patterns, raw, base, source, line + 1);
7516    }
7517}
7518
7519fn read_ignore_patterns_into_matcher(
7520    path: impl AsRef<Path>,
7521    matcher: &mut IgnoreMatcher,
7522    base: &[u8],
7523    source: &[u8],
7524) {
7525    let Ok(contents) = fs::read(path) else {
7526        return;
7527    };
7528    for (line, raw) in contents.split(|byte| *byte == b'\n').enumerate() {
7529        matcher.push_raw_pattern(raw, base, source, line + 1);
7530    }
7531}
7532
7533fn push_ignore_pattern(
7534    patterns: &mut Vec<IgnorePattern>,
7535    raw: &[u8],
7536    base: &[u8],
7537    source: &[u8],
7538    line_number: usize,
7539) {
7540    if let Some(pattern) = parse_ignore_pattern(raw, base, source, line_number) {
7541        patterns.push(pattern);
7542    }
7543}
7544
7545fn parse_ignore_pattern(
7546    raw: &[u8],
7547    base: &[u8],
7548    source: &[u8],
7549    line_number: usize,
7550) -> Option<IgnorePattern> {
7551    let mut line = raw.strip_suffix(b"\r").unwrap_or(raw).to_vec();
7552    normalize_ignore_trailing_spaces(&mut line);
7553    let original = line.clone();
7554    let mut line = line.as_slice();
7555    if line.is_empty() || line.starts_with(b"#") {
7556        return None;
7557    }
7558    let negated = if line.starts_with(b"\\#") || line.starts_with(b"\\!") {
7559        line = &line[1..];
7560        false
7561    } else if let Some(pattern) = line.strip_prefix(b"!") {
7562        line = pattern;
7563        true
7564    } else {
7565        false
7566    };
7567    let directory_only = line.ends_with(b"/");
7568    let pattern = if directory_only {
7569        line.strip_suffix(b"/").unwrap_or(line)
7570    } else {
7571        line
7572    };
7573    let (anchored, pattern) = if let Some(pattern) = pattern.strip_prefix(b"/") {
7574        (true, pattern)
7575    } else {
7576        (false, pattern)
7577    };
7578    // A leading `**/` followed by a slash-free segment is, per gitignore,
7579    // identical to the bare segment ("match in all directories"): `**/Pods` ≡
7580    // `Pods`, `**/*.jks` ≡ `*.jks`. Collapse it so the pattern matches the
7581    // basename directly (a literal/suffix compare) instead of paying for the
7582    // `**` wildcard engine on the full path — verified against `git check-ignore`.
7583    let pattern = match pattern.strip_prefix(b"**/") {
7584        Some(rest) if !rest.is_empty() && !rest.contains(&b'/') => rest,
7585        _ => pattern,
7586    };
7587    if pattern.is_empty() {
7588        return None;
7589    }
7590    let match_kind = classify_ignore_pattern(pattern);
7591    let glob_literal_prefix_len = if match_kind == MatchKind::Glob {
7592        pattern
7593            .iter()
7594            .position(|byte| matches!(byte, b'*' | b'?' | b'[' | b'\\'))
7595            .unwrap_or(pattern.len())
7596    } else {
7597        0
7598    };
7599    Some(IgnorePattern {
7600        base: base.to_vec(),
7601        pattern: pattern.to_vec(),
7602        original,
7603        source: source.to_vec(),
7604        line_number,
7605        negated,
7606        directory_only,
7607        anchored,
7608        has_slash: pattern.contains(&b'/'),
7609        match_kind,
7610        glob_literal_prefix_len,
7611    })
7612}
7613
7614fn normalize_ignore_trailing_spaces(line: &mut Vec<u8>) {
7615    while line.last() == Some(&b' ') {
7616        let space_index = line.len() - 1;
7617        let backslashes = line[..space_index]
7618            .iter()
7619            .rev()
7620            .take_while(|byte| **byte == b'\\')
7621            .count();
7622        if backslashes % 2 == 1 {
7623            line.remove(space_index - 1);
7624            break;
7625        }
7626        line.pop();
7627    }
7628}
7629
7630impl IgnorePattern {
7631    fn bucket_kind(&self) -> IgnoreBucketKind {
7632        if self.match_kind == MatchKind::PathSuffix {
7633            return if self.directory_only {
7634                IgnoreBucketKind::DirectoryPathSuffixBasename
7635            } else {
7636                IgnoreBucketKind::PathSuffixBasename
7637            };
7638        }
7639        if (self.anchored || self.has_slash) && self.match_kind == MatchKind::Literal {
7640            return if self.directory_only {
7641                IgnoreBucketKind::DirectoryLiteralPathBasename
7642            } else {
7643                IgnoreBucketKind::LiteralPathBasename
7644            };
7645        }
7646        if self.has_slash
7647            && self.match_kind == MatchKind::Glob
7648            && !self.directory_only
7649            && !path_component_has_glob_meta(path_basename(&self.pattern))
7650        {
7651            return IgnoreBucketKind::GlobPathLiteralBasename;
7652        }
7653        if self.has_slash
7654            && self.match_kind == MatchKind::Glob
7655            && self.directory_only
7656            && !path_component_has_glob_meta(path_basename(&self.pattern))
7657        {
7658            return IgnoreBucketKind::GlobDirectoryLiteralBasename;
7659        }
7660        if self.has_slash && self.match_kind == MatchKind::Glob {
7661            return match (
7662                self.directory_only,
7663                final_component_match_kind(&self.pattern),
7664            ) {
7665                (false, MatchKind::Suffix) => IgnoreBucketKind::GlobPathSuffixBasename,
7666                (false, MatchKind::Prefix) => IgnoreBucketKind::GlobPathPrefixBasename,
7667                (true, MatchKind::Suffix) => IgnoreBucketKind::GlobDirectorySuffixBasename,
7668                (true, MatchKind::Prefix) => IgnoreBucketKind::GlobDirectoryPrefixBasename,
7669                _ => IgnoreBucketKind::Other,
7670            };
7671        }
7672        if self.anchored || self.has_slash {
7673            return IgnoreBucketKind::Other;
7674        }
7675        match (self.directory_only, self.match_kind) {
7676            (false, MatchKind::Literal) => IgnoreBucketKind::LiteralBasename,
7677            (true, MatchKind::Literal) => IgnoreBucketKind::DirectoryLiteralBasename,
7678            (false, MatchKind::Suffix) => IgnoreBucketKind::SuffixBasename,
7679            (false, MatchKind::Prefix) => IgnoreBucketKind::PrefixBasename,
7680            _ => IgnoreBucketKind::Other,
7681        }
7682    }
7683
7684    fn base_matches(&self, path: &[u8]) -> bool {
7685        if self.base.is_empty() {
7686            return true;
7687        }
7688        path.strip_prefix(self.base.as_slice())
7689            .is_some_and(|rest| rest.starts_with(b"/"))
7690    }
7691
7692    fn to_match(&self) -> IgnoreMatch {
7693        IgnoreMatch {
7694            source: self.source.clone(),
7695            line_number: self.line_number,
7696            pattern: self.original.clone(),
7697            ignored: !self.negated,
7698        }
7699    }
7700
7701    fn matches(&self, path: &[u8], is_dir: bool) -> bool {
7702        let basename = path_basename(path);
7703        self.matches_with_basename(path, basename, is_dir)
7704    }
7705
7706    fn glob_literal_prefix_matches(&self, path: &[u8], basename: &[u8], is_dir: bool) -> bool {
7707        if self.match_kind != MatchKind::Glob {
7708            return true;
7709        }
7710        if self.glob_literal_prefix_len == 0 {
7711            return true;
7712        }
7713        let prefix = &self.pattern[..self.glob_literal_prefix_len];
7714        let scoped_path = if self.base.is_empty() {
7715            path
7716        } else {
7717            let Some(rest) = path
7718                .strip_prefix(self.base.as_slice())
7719                .and_then(|rest| rest.strip_prefix(b"/"))
7720            else {
7721                return false;
7722            };
7723            rest
7724        };
7725        if self.anchored || self.has_slash {
7726            return scoped_path.starts_with(prefix);
7727        }
7728        if self.directory_only && !is_dir {
7729            return true;
7730        }
7731        basename.starts_with(prefix)
7732    }
7733
7734    fn matches_with_basename(&self, path: &[u8], basename: &[u8], is_dir: bool) -> bool {
7735        let path = if self.base.is_empty() {
7736            path
7737        } else {
7738            let Some(rest) = path
7739                .strip_prefix(self.base.as_slice())
7740                .and_then(|rest| rest.strip_prefix(b"/"))
7741            else {
7742                return false;
7743            };
7744            rest
7745        };
7746        if self.directory_only {
7747            return self.matches_directory(path, is_dir);
7748        }
7749        if self.anchored || self.has_slash {
7750            return self.match_segment(path);
7751        }
7752        self.match_segment(basename)
7753    }
7754
7755    fn matches_directory(&self, path: &[u8], is_dir: bool) -> bool {
7756        if self.anchored || self.has_slash {
7757            if is_dir && self.match_path(path) {
7758                return true;
7759            }
7760            // For a *file* path, a directory-only pattern can only apply
7761            // through an *ancestor* directory of the file: the leaf is matched
7762            // only because it lives inside a directory the pattern excludes
7763            // (e.g. `/tmp-*/` excludes `tmp-info-only`, so `tmp-info-only/x`
7764            // is excluded too). Upstream git models this through directory
7765            // traversal — `last_matching_pattern` skips a MUSTBEDIR pattern for
7766            // a non-directory leaf (`dtype != DT_DIR`), and a file is excluded
7767            // only when one of its parent directories is excluded.
7768            //
7769            // A *negated* directory-only pattern (`!data/**/`) re-includes a
7770            // directory but, per git, does NOT re-include the files inside it
7771            // (git's docs: "it is not possible to re-include a file if a parent
7772            // directory of that file is excluded" — re-including the dir with
7773            // `!dir/` still requires an explicit `!dir/*` to reach its files).
7774            // So a negated directory-only pattern must never match a file via
7775            // its ancestor, otherwise it wrongly wins the leaf scan and
7776            // un-ignores a file that an earlier positive pattern ignored
7777            // (t0008-ignores "directories and ** matches": `data/**` +
7778            // `!data/**/` must leave `data/data1/file1` ignored).
7779            if self.negated {
7780                return false;
7781            }
7782            return path
7783                .iter()
7784                .enumerate()
7785                .any(|(idx, byte)| *byte == b'/' && self.match_path(&path[..idx]));
7786        }
7787        let mut components = path.split(|byte| *byte == b'/').peekable();
7788        while let Some(component) = components.next() {
7789            if self.match_segment(component) && (is_dir || components.peek().is_some()) {
7790                return true;
7791            }
7792        }
7793        false
7794    }
7795
7796    fn match_path(&self, value: &[u8]) -> bool {
7797        match self.match_kind {
7798            MatchKind::Literal => self.pattern == value,
7799            MatchKind::Suffix => !value.contains(&b'/') && value.ends_with(&self.pattern[1..]),
7800            MatchKind::Prefix => {
7801                !value.contains(&b'/') && value.starts_with(&self.pattern[..self.pattern.len() - 1])
7802            }
7803            MatchKind::PathSuffix => {
7804                let suffix = &self.pattern[3..];
7805                value
7806                    .strip_suffix(suffix)
7807                    .is_some_and(|prefix| prefix.is_empty() || prefix.ends_with(b"/"))
7808            }
7809            MatchKind::Glob => wildcard_path_matches(&self.pattern, value),
7810        }
7811    }
7812
7813    /// Match a slash-free `value` (a basename or path component) against this
7814    /// pattern. Literal and simple `*X`/`X*` patterns resolve with a direct
7815    /// comparison; only complex globs pay for the allocating wildcard engine.
7816    fn match_segment(&self, value: &[u8]) -> bool {
7817        self.match_path(value)
7818    }
7819}
7820
7821thread_local! {
7822    /// Reused dynamic-programming scratch for [`wildcard_path_matches`]. Flat
7823    /// `(pattern.len()+1) * (value.len()+1)` grid of memoised results, kept across
7824    /// calls so the hot ignore/attribute matching loop never reallocates.
7825    static WILDCARD_MEMO: RefCell<Vec<Option<bool>>> = const { RefCell::new(Vec::new()) };
7826}
7827
7828fn wildcard_path_matches(pattern: &[u8], value: &[u8]) -> bool {
7829    let stride = value.len() + 1;
7830    let cells = (pattern.len() + 1) * stride;
7831    WILDCARD_MEMO.with_borrow_mut(|memo| {
7832        // One reused allocation; clearing then resizing fills the grid with `None`.
7833        memo.clear();
7834        memo.resize(cells, None);
7835        wildcard_path_matches_from(pattern, value, 0, 0, memo, stride)
7836    })
7837}
7838
7839fn wildcard_path_matches_from(
7840    pattern: &[u8],
7841    value: &[u8],
7842    pattern_index: usize,
7843    value_index: usize,
7844    memo: &mut [Option<bool>],
7845    stride: usize,
7846) -> bool {
7847    let cell = pattern_index * stride + value_index;
7848    if let Some(cached) = memo[cell] {
7849        return cached;
7850    }
7851    let matched = if pattern_index == pattern.len() {
7852        value_index == value.len()
7853    } else {
7854        match pattern[pattern_index] {
7855            b'*' if pattern.get(pattern_index + 1) == Some(&b'*') => wildcard_double_star_matches(
7856                pattern,
7857                value,
7858                pattern_index,
7859                value_index,
7860                memo,
7861                stride,
7862            ),
7863            b'*' => {
7864                if wildcard_path_matches_from(
7865                    pattern,
7866                    value,
7867                    pattern_index + 1,
7868                    value_index,
7869                    memo,
7870                    stride,
7871                ) {
7872                    true
7873                } else {
7874                    let mut next = value_index;
7875                    while next < value.len() && value[next] != b'/' {
7876                        next += 1;
7877                        if wildcard_path_matches_from(
7878                            pattern,
7879                            value,
7880                            pattern_index + 1,
7881                            next,
7882                            memo,
7883                            stride,
7884                        ) {
7885                            return true;
7886                        }
7887                    }
7888                    false
7889                }
7890            }
7891            b'?' => {
7892                value_index < value.len()
7893                    && value[value_index] != b'/'
7894                    && wildcard_path_matches_from(
7895                        pattern,
7896                        value,
7897                        pattern_index + 1,
7898                        value_index + 1,
7899                        memo,
7900                        stride,
7901                    )
7902            }
7903            b'[' => {
7904                if value_index < value.len() && value[value_index] != b'/' {
7905                    if let Some((class_matches, next_pattern_index)) =
7906                        wildcard_class_matches(pattern, pattern_index, value[value_index])
7907                    {
7908                        class_matches
7909                            && wildcard_path_matches_from(
7910                                pattern,
7911                                value,
7912                                next_pattern_index,
7913                                value_index + 1,
7914                                memo,
7915                                stride,
7916                            )
7917                    } else {
7918                        value[value_index] == b'['
7919                            && wildcard_path_matches_from(
7920                                pattern,
7921                                value,
7922                                pattern_index + 1,
7923                                value_index + 1,
7924                                memo,
7925                                stride,
7926                            )
7927                    }
7928                } else {
7929                    false
7930                }
7931            }
7932            b'\\' if pattern_index + 1 < pattern.len() => {
7933                value_index < value.len()
7934                    && pattern[pattern_index + 1] == value[value_index]
7935                    && wildcard_path_matches_from(
7936                        pattern,
7937                        value,
7938                        pattern_index + 2,
7939                        value_index + 1,
7940                        memo,
7941                        stride,
7942                    )
7943            }
7944            literal => {
7945                value_index < value.len()
7946                    && literal == value[value_index]
7947                    && wildcard_path_matches_from(
7948                        pattern,
7949                        value,
7950                        pattern_index + 1,
7951                        value_index + 1,
7952                        memo,
7953                        stride,
7954                    )
7955            }
7956        }
7957    };
7958    memo[cell] = Some(matched);
7959    matched
7960}
7961
7962fn wildcard_double_star_matches(
7963    pattern: &[u8],
7964    value: &[u8],
7965    pattern_index: usize,
7966    value_index: usize,
7967    memo: &mut [Option<bool>],
7968    stride: usize,
7969) -> bool {
7970    let after_stars = pattern_index + 2;
7971    if pattern.get(after_stars) == Some(&b'/') {
7972        if wildcard_path_matches_from(pattern, value, after_stars + 1, value_index, memo, stride) {
7973            return true;
7974        }
7975        for next in value_index..value.len() {
7976            if value[next] == b'/'
7977                && wildcard_path_matches_from(
7978                    pattern,
7979                    value,
7980                    after_stars + 1,
7981                    next + 1,
7982                    memo,
7983                    stride,
7984                )
7985            {
7986                return true;
7987            }
7988        }
7989        return false;
7990    }
7991    for next in value_index..=value.len() {
7992        if wildcard_path_matches_from(pattern, value, after_stars, next, memo, stride) {
7993            return true;
7994        }
7995    }
7996    false
7997}
7998
7999fn wildcard_class_matches(pattern: &[u8], start: usize, value: u8) -> Option<(bool, usize)> {
8000    let mut index = start + 1;
8001    let negated = matches!(pattern.get(index), Some(b'!' | b'^'));
8002    if negated {
8003        index += 1;
8004    }
8005    let class_start = index;
8006    let end = pattern[class_start..]
8007        .iter()
8008        .position(|byte| *byte == b']')
8009        .map(|position| class_start + position)?;
8010    if end == class_start {
8011        return None;
8012    }
8013    let mut matched = false;
8014    while index < end {
8015        if index + 2 < end && pattern[index + 1] == b'-' {
8016            let lower = pattern[index].min(pattern[index + 2]);
8017            let upper = pattern[index].max(pattern[index + 2]);
8018            matched |= lower <= value && value <= upper;
8019            index += 3;
8020        } else {
8021            matched |= pattern[index] == value;
8022            index += 1;
8023        }
8024    }
8025    Some((if negated { !matched } else { matched }, end + 1))
8026}
8027
8028#[derive(Debug, Default)]
8029struct AttributeMatcher {
8030    patterns: Vec<AttributePattern>,
8031    attribute_order: BTreeMap<Vec<u8>, usize>,
8032    macros: BTreeMap<Vec<u8>, Vec<AttributeAssignment>>,
8033}
8034
8035#[derive(Debug)]
8036struct AttributePattern {
8037    base: Vec<u8>,
8038    pattern: Vec<u8>,
8039    anchored: bool,
8040    has_slash: bool,
8041    assignments: Vec<AttributeAssignment>,
8042}
8043
8044#[derive(Debug, Clone, PartialEq, Eq)]
8045struct AttributeAssignment {
8046    attribute: Vec<u8>,
8047    state: Option<AttributeState>,
8048}
8049
8050impl AttributeMatcher {
8051    fn from_worktree_root(root: &Path) -> Result<Self> {
8052        let mut matcher = Self::default();
8053        if !matcher.read_configured_attributes(root) {
8054            matcher.read_default_global_attributes();
8055        }
8056        collect_attribute_patterns(root, root, &mut matcher)?;
8057        read_attribute_patterns(
8058            root.join(".git").join("info").join("attributes"),
8059            &mut matcher,
8060            &[],
8061            b".git/info/attributes",
8062        );
8063        Ok(matcher)
8064    }
8065
8066    /// Builds only the repository-wide attribute sources — `core.attributesFile`
8067    /// (or the default global) and `$GIT_DIR/info/attributes` — *without* walking
8068    /// the worktree for `.gitattributes`. The caller is expected to fold each
8069    /// directory's `.gitattributes` into the matcher as it descends (see
8070    /// [`read_dir_attribute_patterns`]), so status/diff read the tree exactly once
8071    /// instead of doing a separate full-tree attribute pass. Lower-priority sources
8072    /// are added first, so in-tree patterns added during the walk take precedence —
8073    /// matching git's lookup order.
8074    fn from_worktree_base(root: &Path) -> Self {
8075        let mut matcher = Self::default();
8076        if !matcher.read_configured_attributes(root) {
8077            matcher.read_default_global_attributes();
8078        }
8079        read_attribute_patterns(
8080            root.join(".git").join("info").join("attributes"),
8081            &mut matcher,
8082            &[],
8083            b".git/info/attributes",
8084        );
8085        matcher
8086    }
8087
8088    fn attributes_for_path(
8089        &self,
8090        path: &[u8],
8091        requested: &[Vec<u8>],
8092        all: bool,
8093    ) -> Vec<AttributeCheck> {
8094        let mut states = BTreeMap::<Vec<u8>, Option<AttributeState>>::new();
8095        for pattern in &self.patterns {
8096            if !pattern.matches(path) {
8097                continue;
8098            }
8099            for assignment in &pattern.assignments {
8100                states.insert(assignment.attribute.clone(), assignment.state.clone());
8101            }
8102        }
8103        if all {
8104            let mut checks = states
8105                .into_iter()
8106                .filter_map(|(attribute, state)| {
8107                    state.map(|state| AttributeCheck {
8108                        attribute,
8109                        state: Some(state),
8110                    })
8111                })
8112                .collect::<Vec<_>>();
8113            checks.sort_by(|left, right| {
8114                attribute_all_rank(&left.attribute, &self.attribute_order)
8115                    .cmp(&attribute_all_rank(&right.attribute, &self.attribute_order))
8116                    .then_with(|| left.attribute.cmp(&right.attribute))
8117            });
8118            return checks;
8119        }
8120        requested
8121            .iter()
8122            .map(|attribute| AttributeCheck {
8123                attribute: attribute.clone(),
8124                state: states.get(attribute).cloned().flatten(),
8125            })
8126            .collect()
8127    }
8128
8129    fn push_attribute_order(&mut self, attribute: &[u8]) {
8130        let next = self.attribute_order.len();
8131        self.attribute_order
8132            .entry(attribute.to_vec())
8133            .or_insert(next);
8134    }
8135
8136    fn read_configured_attributes(&mut self, root: &Path) -> bool {
8137        let Ok(config) = sley_config::read_repo_config(&root.join(".git"), None) else {
8138            return false;
8139        };
8140        let Some(value) = config.get("core", None, "attributesFile") else {
8141            return false;
8142        };
8143        let path = expand_core_excludes_file(root, value);
8144        read_attribute_patterns(path, self, &[], value.as_bytes());
8145        true
8146    }
8147
8148    fn read_default_global_attributes(&mut self) {
8149        if let Some(config_home) = std::env::var_os("XDG_CONFIG_HOME")
8150            && !config_home.is_empty()
8151        {
8152            let path = PathBuf::from(config_home).join("git").join("attributes");
8153            let source = path.to_string_lossy().into_owned();
8154            read_attribute_patterns(path, self, &[], source.as_bytes());
8155            return;
8156        }
8157        if let Some(home) = std::env::var_os("HOME") {
8158            let path = PathBuf::from(home)
8159                .join(".config")
8160                .join("git")
8161                .join("attributes");
8162            let source = path.to_string_lossy().into_owned();
8163            read_attribute_patterns(path, self, &[], source.as_bytes());
8164        }
8165    }
8166}
8167
8168fn read_dir_ignore_patterns_for_base(
8169    dir: &Path,
8170    base: &[u8],
8171    matcher: &mut IgnoreMatcher,
8172) -> Result<()> {
8173    let mut source = base.to_vec();
8174    if !source.is_empty() {
8175        source.push(b'/');
8176    }
8177    source.extend_from_slice(b".gitignore");
8178    read_ignore_patterns_into_matcher(dir.join(".gitignore"), matcher, base, &source);
8179    Ok(())
8180}
8181
8182/// Fold `dir`'s `.gitattributes` (if any) into `matcher`, scoped to `dir`'s path
8183/// within `root`. Used both by the eager full-tree pass and by the status/diff
8184/// worktree walk as it descends, so the tree is read for attributes exactly once.
8185fn read_dir_attribute_patterns(
8186    root: &Path,
8187    dir: &Path,
8188    matcher: &mut AttributeMatcher,
8189) -> Result<()> {
8190    let relative = dir.strip_prefix(root).map_err(|_| {
8191        GitError::InvalidPath(format!("path {} is outside worktree", dir.display()))
8192    })?;
8193    let base = git_path_bytes(relative)?;
8194    read_dir_attribute_patterns_for_base(dir, &base, matcher)
8195}
8196
8197fn read_dir_attribute_patterns_for_base(
8198    dir: &Path,
8199    base: &[u8],
8200    matcher: &mut AttributeMatcher,
8201) -> Result<()> {
8202    let mut source = base.to_vec();
8203    if !source.is_empty() {
8204        source.push(b'/');
8205    }
8206    source.extend_from_slice(b".gitattributes");
8207    read_attribute_patterns(dir.join(".gitattributes"), matcher, base, &source);
8208    Ok(())
8209}
8210
8211fn collect_attribute_patterns(
8212    root: &Path,
8213    dir: &Path,
8214    matcher: &mut AttributeMatcher,
8215) -> Result<()> {
8216    read_dir_attribute_patterns(root, dir, matcher)?;
8217
8218    let mut entries = fs::read_dir(dir)?.collect::<std::result::Result<Vec<_>, _>>()?;
8219    entries.sort_by_key(|entry| entry.file_name());
8220    for entry in entries {
8221        let path = entry.path();
8222        if path.file_name().and_then(|name| name.to_str()) == Some(".git") {
8223            continue;
8224        }
8225        if entry.metadata()?.is_dir() {
8226            collect_attribute_patterns(root, &path, matcher)?;
8227        }
8228    }
8229    Ok(())
8230}
8231
8232fn read_attribute_patterns(
8233    path: impl AsRef<Path>,
8234    matcher: &mut AttributeMatcher,
8235    base: &[u8],
8236    _source: &[u8],
8237) {
8238    let Ok(contents) = fs::read(path) else {
8239        return;
8240    };
8241    read_attribute_patterns_from_bytes(&contents, matcher, base);
8242}
8243
8244fn read_attribute_patterns_from_bytes(
8245    contents: &[u8],
8246    matcher: &mut AttributeMatcher,
8247    base: &[u8],
8248) {
8249    for raw in contents.split(|byte| *byte == b'\n') {
8250        push_attribute_pattern(matcher, raw, base);
8251    }
8252}
8253
8254fn collect_attribute_patterns_from_tree(
8255    db: &FileObjectDatabase,
8256    format: ObjectFormat,
8257    tree_oid: &ObjectId,
8258    base: Vec<u8>,
8259    matcher: &mut AttributeMatcher,
8260) -> Result<()> {
8261    let object = read_expected_object(db, tree_oid, ObjectType::Tree)?;
8262    let mut entries = Tree::parse(format, &object.body)?.entries;
8263    entries.sort_by(|left, right| left.name.cmp(&right.name));
8264    for entry in &entries {
8265        if entry.name == b".gitattributes" && tree_entry_object_type(entry.mode) == ObjectType::Blob
8266        {
8267            let object = db.read_object(&entry.oid).map_err(|err| {
8268                expect_missing_object_kind(err, entry.oid, MissingObjectKind::Blob)
8269            })?;
8270            if object.object_type == ObjectType::Blob {
8271                read_attribute_patterns_from_bytes(&object.body, matcher, &base);
8272            }
8273        }
8274    }
8275    for entry in entries {
8276        if tree_entry_object_type(entry.mode) != ObjectType::Tree {
8277            continue;
8278        }
8279        let mut child_base = base.clone();
8280        if !child_base.is_empty() {
8281            child_base.push(b'/');
8282        }
8283        child_base.extend_from_slice(entry.name.as_bytes());
8284        collect_attribute_patterns_from_tree(db, format, &entry.oid, child_base, matcher)?;
8285    }
8286    Ok(())
8287}
8288
8289fn collect_attribute_patterns_from_index(
8290    git_dir: &Path,
8291    format: ObjectFormat,
8292    db: &FileObjectDatabase,
8293    matcher: &mut AttributeMatcher,
8294) -> Result<()> {
8295    let index_path = repository_index_path(git_dir);
8296    if !index_path.exists() {
8297        return Ok(());
8298    }
8299    let mut entries = Index::parse(&fs::read(index_path)?, format)?.entries;
8300    entries.sort_by(|left, right| left.path.cmp(&right.path));
8301    for entry in entries {
8302        let is_attributes_file =
8303            entry.path == b".gitattributes" || entry.path.as_bytes().ends_with(b"/.gitattributes");
8304        if index_entry_stage(&entry) != 0
8305            || tree_entry_object_type(entry.mode) != ObjectType::Blob
8306            || !is_attributes_file
8307        {
8308            continue;
8309        }
8310        let base = match entry.path.as_bytes().strip_suffix(b".gitattributes") {
8311            Some(b"") => Vec::new(),
8312            Some(parent) => parent.strip_suffix(b"/").unwrap_or(parent).to_vec(),
8313            None => continue,
8314        };
8315        let object = db
8316            .read_object(&entry.oid)
8317            .map_err(|err| expect_missing_object_kind(err, entry.oid, MissingObjectKind::Blob))?;
8318        if object.object_type == ObjectType::Blob {
8319            read_attribute_patterns_from_bytes(&object.body, matcher, &base);
8320        }
8321    }
8322    Ok(())
8323}
8324
8325fn push_attribute_pattern(matcher: &mut AttributeMatcher, raw: &[u8], base: &[u8]) {
8326    let line = raw.strip_suffix(b"\r").unwrap_or(raw);
8327    let line = trim_ascii_whitespace(line);
8328    if line.is_empty() || line.starts_with(b"#") {
8329        return;
8330    }
8331    let mut fields = line
8332        .split(|byte| byte.is_ascii_whitespace())
8333        .filter(|field| !field.is_empty());
8334    let Some(raw_pattern) = fields.next() else {
8335        return;
8336    };
8337    if let Some(macro_name) = raw_pattern.strip_prefix(b"[attr]") {
8338        if macro_name.is_empty() {
8339            return;
8340        }
8341        let mut assignments = vec![AttributeAssignment {
8342            attribute: macro_name.to_vec(),
8343            state: Some(AttributeState::Set),
8344        }];
8345        for field in fields {
8346            push_attribute_assignments(&mut assignments, field, &matcher.macros);
8347        }
8348        for assignment in &assignments {
8349            matcher.push_attribute_order(&assignment.attribute);
8350        }
8351        matcher.macros.insert(macro_name.to_vec(), assignments);
8352        return;
8353    }
8354    let mut assignments = Vec::new();
8355    for field in fields {
8356        push_attribute_assignments(&mut assignments, field, &matcher.macros);
8357    }
8358    if assignments.is_empty() {
8359        return;
8360    }
8361    for assignment in &assignments {
8362        matcher.push_attribute_order(&assignment.attribute);
8363    }
8364    let (anchored, pattern) = if let Some(pattern) = raw_pattern.strip_prefix(b"/") {
8365        (true, pattern)
8366    } else {
8367        (false, raw_pattern)
8368    };
8369    if pattern.is_empty() {
8370        return;
8371    }
8372    matcher.patterns.push(AttributePattern {
8373        base: base.to_vec(),
8374        pattern: pattern.to_vec(),
8375        anchored,
8376        has_slash: pattern.contains(&b'/'),
8377        assignments,
8378    });
8379}
8380
8381fn push_attribute_assignments(
8382    assignments: &mut Vec<AttributeAssignment>,
8383    field: &[u8],
8384    macros: &BTreeMap<Vec<u8>, Vec<AttributeAssignment>>,
8385) {
8386    if let Some(macro_assignments) = macros.get(field) {
8387        assignments.extend(macro_assignments.iter().cloned());
8388        return;
8389    }
8390    if field == b"binary" {
8391        assignments.push(AttributeAssignment {
8392            attribute: b"binary".to_vec(),
8393            state: Some(AttributeState::Set),
8394        });
8395        assignments.push(AttributeAssignment {
8396            attribute: b"diff".to_vec(),
8397            state: Some(AttributeState::Unset),
8398        });
8399        assignments.push(AttributeAssignment {
8400            attribute: b"merge".to_vec(),
8401            state: Some(AttributeState::Unset),
8402        });
8403        assignments.push(AttributeAssignment {
8404            attribute: b"text".to_vec(),
8405            state: Some(AttributeState::Unset),
8406        });
8407        return;
8408    }
8409    if let Some(attribute) = field.strip_prefix(b"-") {
8410        if !attribute.is_empty() {
8411            assignments.push(AttributeAssignment {
8412                attribute: attribute.to_vec(),
8413                state: Some(AttributeState::Unset),
8414            });
8415        }
8416        return;
8417    }
8418    if let Some(attribute) = field.strip_prefix(b"!") {
8419        if !attribute.is_empty() {
8420            assignments.push(AttributeAssignment {
8421                attribute: attribute.to_vec(),
8422                state: None,
8423            });
8424        }
8425        return;
8426    }
8427    if let Some(equal) = field.iter().position(|byte| *byte == b'=') {
8428        let attribute = &field[..equal];
8429        let value = &field[equal + 1..];
8430        if !attribute.is_empty() {
8431            assignments.push(AttributeAssignment {
8432                attribute: attribute.to_vec(),
8433                state: Some(AttributeState::Value(value.to_vec())),
8434            });
8435        }
8436        return;
8437    }
8438    assignments.push(AttributeAssignment {
8439        attribute: field.to_vec(),
8440        state: Some(AttributeState::Set),
8441    });
8442}
8443
8444fn attribute_all_rank(
8445    attribute: &[u8],
8446    order: &BTreeMap<Vec<u8>, usize>,
8447) -> (usize, usize, Vec<u8>) {
8448    let rank = match attribute {
8449        b"binary" => 0,
8450        b"diff" => 1,
8451        b"merge" => 2,
8452        b"text" => 3,
8453        b"eol" => 5,
8454        _ => 4,
8455    };
8456    let order = order.get(attribute).copied().unwrap_or(usize::MAX);
8457    (rank, order, attribute.to_vec())
8458}
8459
8460fn trim_ascii_whitespace(mut value: &[u8]) -> &[u8] {
8461    while value.first().is_some_and(u8::is_ascii_whitespace) {
8462        value = &value[1..];
8463    }
8464    while value.last().is_some_and(u8::is_ascii_whitespace) {
8465        value = &value[..value.len() - 1];
8466    }
8467    value
8468}
8469
8470impl AttributePattern {
8471    fn matches(&self, path: &[u8]) -> bool {
8472        let path = if self.base.is_empty() {
8473            path
8474        } else {
8475            let Some(rest) = path
8476                .strip_prefix(self.base.as_slice())
8477                .and_then(|rest| rest.strip_prefix(b"/"))
8478            else {
8479                return false;
8480            };
8481            rest
8482        };
8483        if self.anchored || self.has_slash {
8484            return wildcard_path_matches(&self.pattern, path);
8485        }
8486        path.rsplit(|byte| *byte == b'/')
8487            .next()
8488            .is_some_and(|basename| wildcard_path_matches(&self.pattern, basename))
8489    }
8490}
8491
8492// ---------------------------------------------------------------------------
8493// Content filtering on the blob <-> worktree boundary
8494//
8495// Git runs two kinds of conversion when content crosses between the worktree
8496// and the object database:
8497//
8498//   * the line-ending / `core.autocrlf` conversion (driven by the `text`,
8499//     `eol` attributes and the `core.autocrlf` / `core.eol` config), and
8500//   * the long-running `filter.<name>.clean` / `.smudge` driver filters
8501//     (selected by the `filter=<name>` attribute and configured commands).
8502//
8503// "clean" runs on the way *into* the object store (worktree -> blob), e.g. on
8504// `git add` / `git hash-object -w`. "smudge" runs on the way *out* (blob ->
8505// worktree), e.g. on checkout / restore. The driver filter, when present,
8506// wraps the EOL conversion: on clean git first runs the configured `clean`
8507// command and then applies CRLF->LF normalization; on smudge git first applies
8508// LF->CRLF and then runs the `smudge` command.
8509// ---------------------------------------------------------------------------
8510
8511/// The line-ending conversion that applies to a path, derived from its
8512/// attributes and the repository config.
8513#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8514enum EolConversion {
8515    /// No conversion: binary content, or text with `core.autocrlf=false` and no
8516    /// `eol`/`text=auto` request to add carriage returns.
8517    None,
8518    /// Normalize to LF on clean; no carriage returns on smudge (`eol=lf`, or
8519    /// `core.autocrlf=input`).
8520    Lf,
8521    /// Normalize to LF on clean; emit CRLF on smudge (`eol=crlf`, or
8522    /// `core.autocrlf=true`).
8523    Crlf,
8524}
8525
8526/// How git should decide whether a path is text for the purpose of EOL
8527/// conversion.
8528#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8529enum TextDecision {
8530    /// `-text` / `binary`: never convert.
8531    Binary,
8532    /// `text` is set explicitly: always treat as text.
8533    Text,
8534    /// `text=auto` (or implied by `core.autocrlf`): treat as text unless the
8535    /// content looks binary.
8536    Auto,
8537    /// No opinion from attributes or config: leave content untouched.
8538    Unspecified,
8539}
8540
8541/// The fully resolved set of conversions that apply to a single path.
8542#[derive(Debug, Clone, PartialEq, Eq)]
8543struct ContentFilterPlan {
8544    text: TextDecision,
8545    /// The conversion to apply when `text` resolves to "this is text".
8546    eol: EolConversion,
8547    /// `filter.<name>` driver, if assigned via attributes and configured.
8548    driver: Option<FilterDriver>,
8549}
8550
8551#[derive(Debug, Clone, PartialEq, Eq)]
8552struct FilterDriver {
8553    name: Vec<u8>,
8554    clean: Option<String>,
8555    smudge: Option<String>,
8556    required: bool,
8557}
8558
8559/// Decode one crlf-family attribute (`text` or its legacy alias `crlf`) into a
8560/// text decision, plus whether the value form forced an EOL direction.
8561///
8562/// Mirrors git's `git_path_check_crlf` (convert.c): a *set* attribute is text,
8563/// an *unset* one is binary, `=auto` is auto, `=input` forces LF while still
8564/// counting as text, and any other value is "undefined" — i.e. no opinion, so
8565/// the caller falls through to the next source (the `crlf` alias, then config).
8566fn decode_crlf_family_attribute(state: Option<&AttributeState>) -> (TextDecision, EolConversion) {
8567    match state {
8568        Some(AttributeState::Set) => (TextDecision::Text, EolConversion::None),
8569        Some(AttributeState::Unset) => (TextDecision::Binary, EolConversion::None),
8570        Some(AttributeState::Value(value)) if value == b"auto" => {
8571            (TextDecision::Auto, EolConversion::None)
8572        }
8573        // `crlf=input` / `text=input`: text content normalized to LF (no CR on
8574        // smudge), exactly like `core.autocrlf=input`.
8575        Some(AttributeState::Value(value)) if value == b"input" => {
8576            (TextDecision::Text, EolConversion::Lf)
8577        }
8578        // `=<other>` is CRLF_UNDEFINED in git for the `crlf` alias: no opinion.
8579        _ => (TextDecision::Unspecified, EolConversion::None),
8580    }
8581}
8582
8583impl ContentFilterPlan {
8584    /// Build the plan for `path` from the parsed attributes and repo config.
8585    fn resolve(config: &GitConfig, checks: &[AttributeCheck]) -> Self {
8586        let text_attr = checks.iter().find(|check| check.attribute == b"text");
8587        let crlf_attr = checks.iter().find(|check| check.attribute == b"crlf");
8588        let eol_attr = checks.iter().find(|check| check.attribute == b"eol");
8589        let filter_attr = checks.iter().find(|check| check.attribute == b"filter");
8590
8591        // Resolve the eol attribute first; `eol=crlf|lf` also forces text.
8592        let eol_value = eol_attr.and_then(|check| match &check.state {
8593            Some(AttributeState::Value(value)) => Some(value.clone()),
8594            _ => None,
8595        });
8596
8597        // The `text` attribute decides first; only when it is unspecified does
8598        // git consult the legacy `crlf` alias (convert.c `convert_attrs`).
8599        let mut forced_eol = EolConversion::None;
8600        let mut text = match text_attr.map(|check| &check.state) {
8601            Some(Some(AttributeState::Set)) => TextDecision::Text,
8602            Some(Some(AttributeState::Unset)) => TextDecision::Binary,
8603            Some(Some(AttributeState::Value(value))) if value == b"auto" => TextDecision::Auto,
8604            Some(Some(AttributeState::Value(value))) if value == b"input" => {
8605                forced_eol = EolConversion::Lf;
8606                TextDecision::Text
8607            }
8608            // `text=<other>` is treated by git as a set text attribute.
8609            Some(Some(AttributeState::Value(_))) => TextDecision::Text,
8610            // `!text` (unspecified) or no text attribute: fall through to `crlf`.
8611            _ => {
8612                let (decision, eol) =
8613                    decode_crlf_family_attribute(crlf_attr.and_then(|check| check.state.as_ref()));
8614                forced_eol = eol;
8615                decision
8616            }
8617        };
8618
8619        // A concrete `eol` attribute implies the path is text even when `text`
8620        // was left unspecified (git: `eol` without `text` is treated as
8621        // `text=auto`-ish; upstream forces conversion). We honour eol only when
8622        // text is not explicitly binary.
8623        let eol = match (&text, eol_value.as_deref()) {
8624            (TextDecision::Binary, _) => EolConversion::None,
8625            (_, Some(b"crlf")) => {
8626                if text == TextDecision::Unspecified {
8627                    text = TextDecision::Text;
8628                }
8629                EolConversion::Crlf
8630            }
8631            (_, Some(b"lf")) => {
8632                if text == TextDecision::Unspecified {
8633                    text = TextDecision::Text;
8634                }
8635                EolConversion::Lf
8636            }
8637            // No explicit `eol` attribute, but `text=input`/`crlf=input` already
8638            // forced the LF direction (git's CRLF_TEXT_INPUT). Honour it over the
8639            // config-derived default.
8640            _ if forced_eol == EolConversion::Lf => EolConversion::Lf,
8641            // No eol attribute: derive direction from config.
8642            _ => eol_from_config(config),
8643        };
8644
8645        // When the path is text but neither `eol` nor `core.autocrlf`/`core.eol`
8646        // asked for carriage returns, we still normalize to LF on clean. That is
8647        // modelled by `EolConversion::Lf` (clean strips CR, smudge adds none).
8648        let eol = match (&text, eol) {
8649            (TextDecision::Text | TextDecision::Auto, EolConversion::None) => EolConversion::Lf,
8650            (_, eol) => eol,
8651        };
8652
8653        // If config does not enable autocrlf and there is no eol/text opinion,
8654        // there is genuinely nothing to do.
8655        let text = match (text, eol_attr.is_some()) {
8656            (TextDecision::Unspecified, _) => {
8657                // Without any text/eol attribute, only `core.autocrlf` can make a
8658                // path eligible, and then it behaves like `text=auto`.
8659                if autocrlf_enabled(config) {
8660                    TextDecision::Auto
8661                } else {
8662                    TextDecision::Unspecified
8663                }
8664            }
8665            (text, _) => text,
8666        };
8667
8668        let driver = resolve_filter_driver(config, filter_attr);
8669
8670        ContentFilterPlan { text, eol, driver }
8671    }
8672
8673    /// Whether EOL conversion should run for the given content.
8674    fn convert_eol(&self, content: &[u8]) -> bool {
8675        match self.text {
8676            TextDecision::Binary | TextDecision::Unspecified => false,
8677            TextDecision::Text => self.eol != EolConversion::None,
8678            // `text=auto`: only when the blob does not look binary.
8679            TextDecision::Auto => self.eol != EolConversion::None && !looks_binary(content),
8680        }
8681    }
8682
8683    /// The smudge-side LF->CRLF safety check, mirroring convert.c
8684    /// `will_convert_lf_to_crlf`. Returns false (no conversion) when:
8685    ///   * there is no naked LF to convert, or
8686    ///   * the action is `text=auto`-derived (the "new safer autocrlf") AND the
8687    ///     content already contains a lone CR or a CRLF pair, or looks binary.
8688    ///
8689    /// An explicit `text`/`eol=crlf` (non-auto) path always converts naked LFs.
8690    fn will_convert_lf_to_crlf(&self, content: &[u8]) -> bool {
8691        self.will_convert_lf_to_crlf_stats(&gather_convert_stats(content))
8692    }
8693
8694    /// Stats-based variant of [`will_convert_lf_to_crlf`], mirroring convert.c
8695    /// `will_convert_lf_to_crlf(struct text_stat *, ...)`. Used by the safecrlf
8696    /// round-trip simulation, which mutates a copy of the stats rather than
8697    /// re-scanning the buffer.
8698    fn will_convert_lf_to_crlf_stats(&self, stats: &ConvertStats) -> bool {
8699        // `output_eol(crlf_action) != EOL_CRLF` short-circuits in git.
8700        if self.eol != EolConversion::Crlf {
8701            return false;
8702        }
8703        // No naked LF? Nothing to convert.
8704        if stats.lonelf == 0 {
8705            return false;
8706        }
8707        if self.text == TextDecision::Auto {
8708            // Any CR or CRLF already present: leave it untouched (irreversible).
8709            if stats.lonecr > 0 || stats.crlf > 0 {
8710                return false;
8711            }
8712            if convert_is_binary(stats) {
8713                return false;
8714            }
8715        }
8716        true
8717    }
8718
8719    /// Whether this path is a candidate for the `core.safecrlf` round-trip check
8720    /// at all: git only warns for non-`CRLF_BINARY` actions. `Binary` and
8721    /// `Unspecified` (with autocrlf off) correspond to git's `CRLF_BINARY`.
8722    fn safecrlf_applies(&self) -> bool {
8723        matches!(self.text, TextDecision::Text | TextDecision::Auto)
8724    }
8725
8726    /// Emit git's `core.safecrlf` round-trip warning for `path`, mirroring the
8727    /// stderr side-effect of convert.c `crlf_to_git` (the `CONV_EOL_RNDTRP_*`
8728    /// branch). `old_stats` are the stats of the *pre-conversion* worktree
8729    /// content (already gathered by the caller so the buffer is scanned once);
8730    /// `index_has_crlf` is whether the path's current index blob already has a
8731    /// CRLF (git's `has_crlf_in_index`, used only for the auto-crlf decision).
8732    ///
8733    /// This never inspects or alters the bytes written to the object store; it is
8734    /// purely the additive warning git prints alongside `git add`/`commit`.
8735    /// Returns `Err` only under `core.safecrlf=true` when the round-trip is
8736    /// irreversible (git `die`s).
8737    fn check_safe_crlf_stats(
8738        &self,
8739        old_stats: &ConvertStats,
8740        index_has_crlf: bool,
8741        flags: ConvFlags,
8742        path: &[u8],
8743    ) -> Result<()> {
8744        if flags == ConvFlags::Off || !self.safecrlf_applies() {
8745            return Ok(());
8746        }
8747
8748        // Replicate `crlf_to_git`'s `convert_crlf_into_lf` decision (the clean
8749        // direction). It starts as "there is a CRLF to collapse"; auto paths
8750        // suppress conversion for binary content or content whose index blob
8751        // already carries a CRLF (the "new safer autocrlf").
8752        let mut convert_crlf_into_lf = old_stats.crlf > 0;
8753        if self.text == TextDecision::Auto {
8754            if convert_is_binary(old_stats) {
8755                // git returns 0 here: no conversion *and* no warning.
8756                return Ok(());
8757            }
8758            if index_has_crlf {
8759                convert_crlf_into_lf = false;
8760            }
8761        }
8762
8763        // Simulate the round-trip on a copy of the stats.
8764        let mut new_stats = old_stats.clone();
8765        // Simulate "git add" (clean: CRLF -> LF).
8766        if convert_crlf_into_lf {
8767            new_stats.lonelf += new_stats.crlf;
8768            new_stats.crlf = 0;
8769        }
8770        // Simulate "git checkout" (smudge: LF -> CRLF).
8771        if self.will_convert_lf_to_crlf_stats(&new_stats) {
8772            new_stats.crlf += new_stats.lonelf;
8773            new_stats.lonelf = 0;
8774        }
8775        check_safe_crlf(old_stats, &new_stats, flags, path)
8776    }
8777}
8778
8779/// Derive the smudge-direction line ending from `core.autocrlf` / `core.eol`.
8780fn eol_from_config(config: &GitConfig) -> EolConversion {
8781    if let Some(value) = config.get("core", None, "autocrlf") {
8782        match value.to_ascii_lowercase().as_str() {
8783            "input" => return EolConversion::Lf,
8784            "true" | "yes" | "on" | "1" => return EolConversion::Crlf,
8785            _ => {}
8786        }
8787    }
8788    if config.get_bool("core", None, "autocrlf") == Some(true) {
8789        return EolConversion::Crlf;
8790    }
8791    match config
8792        .get("core", None, "eol")
8793        .map(|v| v.to_ascii_lowercase())
8794    {
8795        Some(ref v) if v == "crlf" => EolConversion::Crlf,
8796        Some(ref v) if v == "lf" => EolConversion::Lf,
8797        _ => EolConversion::None,
8798    }
8799}
8800
8801/// Whether `core.autocrlf` is set to anything that enables conversion
8802/// (`true` or `input`).
8803fn autocrlf_enabled(config: &GitConfig) -> bool {
8804    if let Some(value) = config.get("core", None, "autocrlf")
8805        && value.eq_ignore_ascii_case("input")
8806    {
8807        return true;
8808    }
8809    config.get_bool("core", None, "autocrlf") == Some(true)
8810}
8811
8812/// Resolve the `filter=<name>` attribute against `filter.<name>.*` config.
8813fn resolve_filter_driver(
8814    config: &GitConfig,
8815    filter_attr: Option<&AttributeCheck>,
8816) -> Option<FilterDriver> {
8817    let name = match filter_attr.map(|check| &check.state) {
8818        Some(Some(AttributeState::Value(value))) => value.clone(),
8819        // `filter` set/unset without a value selects no driver.
8820        _ => return None,
8821    };
8822    let subsection = String::from_utf8_lossy(&name).into_owned();
8823    let clean = config
8824        .get("filter", Some(&subsection), "clean")
8825        .filter(|cmd| !cmd.is_empty())
8826        .map(str::to_owned);
8827    let smudge = config
8828        .get("filter", Some(&subsection), "smudge")
8829        .filter(|cmd| !cmd.is_empty())
8830        .map(str::to_owned);
8831    let required = config
8832        .get_bool("filter", Some(&subsection), "required")
8833        .unwrap_or(false);
8834    // A filter with neither command and not required is a no-op.
8835    if clean.is_none() && smudge.is_none() && !required {
8836        return None;
8837    }
8838    Some(FilterDriver {
8839        name,
8840        clean,
8841        smudge,
8842        required,
8843    })
8844}
8845
8846/// Heuristic mirroring git's `buffer_is_binary`: content is treated as binary
8847/// when a NUL byte appears within the first 8000 bytes.
8848fn looks_binary(content: &[u8]) -> bool {
8849    const FIRST_FEW_BYTES: usize = 8000;
8850    let window = &content[..content.len().min(FIRST_FEW_BYTES)];
8851    window.contains(&0)
8852}
8853
8854/// Strip carriage returns that immediately precede a line feed (CRLF -> LF).
8855/// A lone CR (old-Mac line ending) is left untouched, matching git, which only
8856/// collapses CRLF pairs.
8857fn convert_crlf_to_lf_cow(content: Cow<'_, [u8]>) -> Cow<'_, [u8]> {
8858    if !content.windows(2).any(|window| window == b"\r\n") {
8859        return content;
8860    }
8861    let mut out = Vec::with_capacity(content.len());
8862    let mut index = 0;
8863    while index < content.len() {
8864        let byte = content[index];
8865        if byte == b'\r' && content.get(index + 1) == Some(&b'\n') {
8866            // Drop the CR; the LF is emitted on the next iteration.
8867            index += 1;
8868            continue;
8869        }
8870        out.push(byte);
8871        index += 1;
8872    }
8873    Cow::Owned(out)
8874}
8875
8876/// Convert lone LF bytes to CRLF (LF -> CRLF). An LF already preceded by a CR
8877/// is left as-is so content is not double-converted, matching git.
8878fn convert_lf_to_crlf(content: &[u8]) -> Vec<u8> {
8879    let mut out = Vec::with_capacity(content.len() + content.len() / 16);
8880    let mut prev = 0u8;
8881    for &byte in content {
8882        if byte == b'\n' && prev != b'\r' {
8883            out.push(b'\r');
8884        }
8885        out.push(byte);
8886        prev = byte;
8887    }
8888    out
8889}
8890
8891/// Run a configured `clean`/`smudge` command as a subprocess, feeding `content`
8892/// on stdin and returning its stdout. Errors carry enough context for the
8893/// caller to decide whether the failure is fatal (required filter) or should be
8894/// silently ignored (optional filter passthrough).
8895fn run_filter_command(command: &str, path: &[u8], content: &[u8]) -> Result<Vec<u8>> {
8896    // Git expands `%f` in the filter command to the path of the file being
8897    // filtered (quoted). We perform the same substitution.
8898    let display_path = String::from_utf8_lossy(path);
8899    let expanded = command.replace("%f", &shell_quote(&display_path));
8900    // Run through the platform shell so pipelines / arguments in the configured
8901    // command behave the same way git's `run_command`-with-shell does.
8902    let (shell, flag) = if cfg!(windows) {
8903        ("cmd", "/C")
8904    } else {
8905        ("/bin/sh", "-c")
8906    };
8907    let mut child = Command::new(shell)
8908        .arg(flag)
8909        .arg(&expanded)
8910        .stdin(Stdio::piped())
8911        .stdout(Stdio::piped())
8912        .stderr(Stdio::piped())
8913        .spawn()
8914        .map_err(|err| GitError::Command(format!("failed to spawn filter `{command}`: {err}")))?;
8915    // Write the content to the child's stdin on a separate thread so we never
8916    // deadlock against a filter that streams output before consuming all input.
8917    let mut stdin = child
8918        .stdin
8919        .take()
8920        .ok_or_else(|| GitError::Command(format!("filter `{command}` stdin unavailable")))?;
8921    let payload = content.to_vec();
8922    let writer = std::thread::spawn(move || {
8923        let _ = stdin.write_all(&payload);
8924        // Dropping `stdin` here closes the pipe so the child sees EOF.
8925    });
8926    let output = child
8927        .wait_with_output()
8928        .map_err(|err| GitError::Command(format!("filter `{command}` failed: {err}")))?;
8929    // Join the writer; its own errors (e.g. broken pipe) are non-fatal because
8930    // the child's exit status is the authoritative signal.
8931    let _ = writer.join();
8932    if !output.status.success() {
8933        let stderr = String::from_utf8_lossy(&output.stderr);
8934        return Err(GitError::Command(format!(
8935            "filter `{command}` exited with {}: {}",
8936            output.status,
8937            stderr.trim()
8938        )));
8939    }
8940    Ok(output.stdout)
8941}
8942
8943/// Minimal POSIX single-quote escaping for substituting `%f` into a shell
8944/// command (used only for the path passed to driver filters).
8945fn shell_quote(value: &str) -> String {
8946    let mut out = String::with_capacity(value.len() + 2);
8947    out.push('\'');
8948    for ch in value.chars() {
8949        if ch == '\'' {
8950            out.push_str("'\\''");
8951        } else {
8952            out.push(ch);
8953        }
8954    }
8955    out.push('\'');
8956    out
8957}
8958
8959/// Apply the *clean* conversion to `content` for `path` (worktree -> blob):
8960/// first the configured `filter.<name>.clean` driver (if any), then CRLF->LF
8961/// normalization when EOL conversion applies.
8962///
8963/// `config` is the repository config (`GitConfig`) and `path` is the
8964/// repository-relative path of the file (forward-slash separated, e.g.
8965/// `src/main.rs`). When no filter or EOL conversion applies the input is
8966/// returned unchanged.
8967///
8968/// A *required* driver (`filter.<name>.required=true`) whose `clean` command is
8969/// missing or fails produces a [`GitError::Command`]; a non-required driver
8970/// failure (or absence of a `clean` command) passes the content through
8971/// unfiltered, matching git.
8972pub fn apply_clean_filter(
8973    worktree_root: impl AsRef<Path>,
8974    git_dir: impl AsRef<Path>,
8975    config: &GitConfig,
8976    path: &[u8],
8977    content: &[u8],
8978) -> Result<Vec<u8>> {
8979    // On clean the worktree file exists, so the live `.gitattributes` chain is
8980    // authoritative. `git_dir` is accepted for symmetry with the smudge entry
8981    // point (which falls back to the index) and for future use.
8982    let _ = git_dir.as_ref();
8983    let checks = filter_attribute_checks(worktree_root.as_ref(), path)?;
8984    apply_clean_filter_with_attributes(config, &checks, path, content)
8985}
8986
8987/// A reusable handle that captures the worktree's `.gitattributes` chain once so
8988/// repeated clean-filter calls (e.g. `hash-object --stdin-paths` hashing many
8989/// paths in one process) don't re-walk the worktree and re-read every
8990/// `.gitattributes`/global config per path.
8991///
8992/// Build it once with [`WorktreeAttributes::from_worktree_root`], then call
8993/// [`WorktreeAttributes::apply_clean_filter`] per path. This mirrors
8994/// [`apply_clean_filter`] exactly except the expensive attribute-source scan is
8995/// amortized across calls.
8996pub struct WorktreeAttributes {
8997    matcher: AttributeMatcher,
8998}
8999
9000impl WorktreeAttributes {
9001    /// Read the worktree's attribute sources once (global/`core.attributesFile`,
9002    /// every in-tree `.gitattributes`, and `$GIT_DIR/info/attributes`).
9003    pub fn from_worktree_root(worktree_root: impl AsRef<Path>) -> Result<Self> {
9004        Ok(Self {
9005            matcher: AttributeMatcher::from_worktree_root(worktree_root.as_ref())?,
9006        })
9007    }
9008
9009    /// Apply the clean conversion to `content` for `path`, reusing the cached
9010    /// attribute chain. Behaviourally identical to [`apply_clean_filter`].
9011    pub fn apply_clean_filter(
9012        &self,
9013        config: &GitConfig,
9014        path: &[u8],
9015        content: &[u8],
9016    ) -> Result<Vec<u8>> {
9017        let checks = self
9018            .matcher
9019            .attributes_for_path(path, &filter_attribute_names(), false);
9020        apply_clean_filter_with_attributes(config, &checks, path, content)
9021    }
9022}
9023
9024/// A reusable handle that captures a *tree's* `.gitattributes` chain once so
9025/// repeated smudge-filter calls (e.g. `git archive` streaming every blob in a
9026/// tree) resolve attributes from the tree being processed rather than the live
9027/// worktree.
9028///
9029/// This is the attribute direction `git archive` uses: upstream unpacks the
9030/// archived tree into a scratch index and sets `GIT_ATTR_INDEX`, so the
9031/// `.gitattributes` that govern conversion come from the *archived tree* (plus
9032/// the global/`core.attributesFile` chain and `$GIT_DIR/info/attributes`), not
9033/// from whatever happens to be checked out. `--worktree-attributes` callers
9034/// should use [`WorktreeAttributes`] instead.
9035///
9036/// Build it once with [`TreeAttributes::from_tree`], then call
9037/// [`TreeAttributes::apply_smudge_filter`] per blob. Behaviourally this mirrors
9038/// [`apply_smudge_filter`] except the attribute source is the supplied tree and
9039/// the expensive source scan is amortized across calls.
9040pub struct TreeAttributes {
9041    matcher: AttributeMatcher,
9042}
9043
9044impl TreeAttributes {
9045    /// Read the attribute sources for `tree_oid` once: the global /
9046    /// `core.attributesFile` chain, every `.gitattributes` blob found while
9047    /// walking `tree_oid`, and `$GIT_DIR/info/attributes`.
9048    ///
9049    /// `attr_root` locates the global config (`read_configured_attributes`);
9050    /// pass the worktree root for a non-bare repo, or the git dir for a bare
9051    /// one. `git_dir` locates `info/attributes` directly (so this works for bare
9052    /// repos, where there is no nested `.git`). No worktree `.gitattributes`
9053    /// files are read — use [`WorktreeAttributes`] for the
9054    /// `--worktree-attributes` direction.
9055    pub fn from_tree(
9056        attr_root: impl AsRef<Path>,
9057        git_dir: impl AsRef<Path>,
9058        db: &FileObjectDatabase,
9059        format: ObjectFormat,
9060        tree_oid: &ObjectId,
9061    ) -> Result<Self> {
9062        let attr_root = attr_root.as_ref();
9063        let mut matcher = AttributeMatcher::default();
9064        if !matcher.read_configured_attributes(attr_root) {
9065            matcher.read_default_global_attributes();
9066        }
9067        collect_attribute_patterns_from_tree(db, format, tree_oid, Vec::new(), &mut matcher)?;
9068        read_attribute_patterns(
9069            git_dir.as_ref().join("info").join("attributes"),
9070            &mut matcher,
9071            &[],
9072            b"info/attributes",
9073        );
9074        Ok(Self { matcher })
9075    }
9076
9077    /// Apply the smudge conversion (blob -> worktree: EOL `LF`->`CRLF` plus any
9078    /// configured `filter.<name>.smudge` driver) to `content` for `path`,
9079    /// reusing the cached attribute chain. Behaviourally identical to
9080    /// [`apply_smudge_filter`] except attributes come from the tree this handle
9081    /// was built from.
9082    pub fn apply_smudge_filter(
9083        &self,
9084        config: &GitConfig,
9085        path: &[u8],
9086        content: &[u8],
9087    ) -> Result<Vec<u8>> {
9088        let checks = self
9089            .matcher
9090            .attributes_for_path(path, &filter_attribute_names(), false);
9091        apply_smudge_filter_with_attributes(config, &checks, path, content)
9092    }
9093
9094    /// True when `path` has the `export-subst` attribute set (git's
9095    /// `check_attr_export_subst`), meaning `git archive` should run
9096    /// `$Format:…$` keyword substitution on its content.
9097    pub fn export_subst_for_path(&self, path: &[u8]) -> bool {
9098        self.attribute_is_set(path, b"export-subst")
9099    }
9100
9101    /// True when `path` has the `export-ignore` attribute set (git's
9102    /// `check_attr_export_ignore`), meaning `git archive` should omit the path
9103    /// (and, for a directory, its whole subtree) from the archive.
9104    pub fn export_ignore_for_path(&self, path: &[u8]) -> bool {
9105        self.attribute_is_set(path, b"export-ignore")
9106    }
9107
9108    fn attribute_is_set(&self, path: &[u8], attribute: &[u8]) -> bool {
9109        let requested = [attribute.to_vec()];
9110        let checks = self.matcher.attributes_for_path(path, &requested, false);
9111        matches!(
9112            checks.first().and_then(|check| check.state.as_ref()),
9113            Some(AttributeState::Set)
9114        )
9115    }
9116
9117    /// The `diff` attribute state for `path` (`Set` for `diff`, `Unset` for
9118    /// `-diff`, `Value(name)` for `diff=<name>`, `None` when unspecified). Used
9119    /// by `git archive`'s zip backend to classify text vs. binary via the
9120    /// path's userdiff driver.
9121    pub fn diff_attribute_for_path(&self, path: &[u8]) -> Option<AttributeState> {
9122        let requested = [b"diff".to_vec()];
9123        let checks = self.matcher.attributes_for_path(path, &requested, false);
9124        checks.into_iter().next().and_then(|check| check.state)
9125    }
9126}
9127
9128/// Like [`apply_clean_filter`] but takes already-resolved attribute checks,
9129/// letting callers that have computed attributes once reuse them.
9130pub fn apply_clean_filter_with_attributes(
9131    config: &GitConfig,
9132    attributes: &[AttributeCheck],
9133    path: &[u8],
9134    content: &[u8],
9135) -> Result<Vec<u8>> {
9136    Ok(apply_clean_filter_with_attributes_cow(config, attributes, path, content)?.into_owned())
9137}
9138
9139/// Borrow-first variant of [`apply_clean_filter_with_attributes`].
9140///
9141/// When no filter or EOL conversion changes the content, the returned value
9142/// borrows `content`; callers that can consume a [`Cow`] avoid allocating for
9143/// the common pass-through case.
9144pub fn apply_clean_filter_with_attributes_cow<'a>(
9145    config: &GitConfig,
9146    attributes: &[AttributeCheck],
9147    path: &[u8],
9148    content: &'a [u8],
9149) -> Result<Cow<'a, [u8]>> {
9150    apply_clean_filter_with_attributes_cow_safecrlf(
9151        config,
9152        attributes,
9153        path,
9154        content,
9155        ConvFlags::Off,
9156        SafeCrlfIndexBlob::None,
9157    )
9158}
9159
9160/// How the safecrlf check should learn whether this path's *current index blob*
9161/// already contains a CRLF (git's `has_crlf_in_index`). Only consulted on the
9162/// `text=auto` / `core.autocrlf` path.
9163pub enum SafeCrlfIndexBlob<'a> {
9164    /// No index blob is available (the staging caller has none, or safecrlf is
9165    /// off) — treated as "no CRLF in index".
9166    None,
9167    /// The path's current index blob, read on demand from this object database
9168    /// only when the auto-crlf decision actually needs it.
9169    Lookup {
9170        odb: &'a FileObjectDatabase,
9171        oid: ObjectId,
9172    },
9173}
9174
9175impl SafeCrlfIndexBlob<'_> {
9176    fn has_crlf(&self) -> bool {
9177        match self {
9178            SafeCrlfIndexBlob::None => false,
9179            SafeCrlfIndexBlob::Lookup { odb, oid } => has_crlf_in_index(odb, oid),
9180        }
9181    }
9182}
9183
9184/// [`apply_clean_filter_with_attributes_cow`] plus git's additive `core.safecrlf`
9185/// round-trip warning (convert.c `crlf_to_git`).
9186///
9187/// The conversion result is byte-for-byte identical to the plain variant;
9188/// `flags`/`index_blob` only drive the stderr warning git prints when a
9189/// CRLF<->LF round-trip would not be reversible. The warning is computed on the
9190/// *post-driver, pre-EOL-conversion* content, matching git's ordering in
9191/// `convert_to_git` (apply_filter -> crlf_to_git).
9192pub fn apply_clean_filter_with_attributes_cow_safecrlf<'a>(
9193    config: &GitConfig,
9194    attributes: &[AttributeCheck],
9195    path: &[u8],
9196    content: &'a [u8],
9197    flags: ConvFlags,
9198    index_blob: SafeCrlfIndexBlob<'_>,
9199) -> Result<Cow<'a, [u8]>> {
9200    let plan = ContentFilterPlan::resolve(config, attributes);
9201    let mut data = Cow::Borrowed(content);
9202    if let Some(driver) = &plan.driver {
9203        data = run_driver(driver, driver.clean.as_deref(), path, data)?;
9204    }
9205    // The safecrlf check scans the (post-driver) buffer once for line-ending
9206    // stats. Gate it tightly so the extra scan never runs on the dominant
9207    // pass-through paths: only when safecrlf is enabled, the path is a real
9208    // conversion candidate (not `CRLF_BINARY`), and the buffer is non-empty.
9209    if flags != ConvFlags::Off && !data.is_empty() && plan.safecrlf_applies() {
9210        let old_stats = gather_convert_stats(&data);
9211        plan.check_safe_crlf_stats(&old_stats, index_blob.has_crlf(), flags, path)?;
9212    }
9213    if plan.convert_eol(&data) {
9214        data = convert_crlf_to_lf_cow(data);
9215    }
9216    Ok(data)
9217}
9218
9219/// Apply the *smudge* conversion to `content` for `path` (blob -> worktree):
9220/// first LF->CRLF when EOL conversion applies, then the configured
9221/// `filter.<name>.smudge` driver (if any).
9222///
9223/// Semantics mirror [`apply_clean_filter`]: a required driver with a missing or
9224/// failing `smudge` command errors, while a non-required one passes the content
9225/// through.
9226pub fn apply_smudge_filter(
9227    worktree_root: impl AsRef<Path>,
9228    git_dir: impl AsRef<Path>,
9229    format: ObjectFormat,
9230    config: &GitConfig,
9231    path: &[u8],
9232    content: &[u8],
9233) -> Result<Vec<u8>> {
9234    // On smudge (checkout) the worktree file may not exist yet, so resolve the
9235    // attributes from the `.gitattributes` recorded in the index.
9236    let checks =
9237        smudge_attribute_checks_from_index(worktree_root.as_ref(), git_dir.as_ref(), format, path)?;
9238    apply_smudge_filter_with_attributes(config, &checks, path, content)
9239}
9240
9241/// Like [`apply_smudge_filter`] but takes already-resolved attribute checks.
9242pub fn apply_smudge_filter_with_attributes(
9243    config: &GitConfig,
9244    attributes: &[AttributeCheck],
9245    path: &[u8],
9246    content: &[u8],
9247) -> Result<Vec<u8>> {
9248    Ok(apply_smudge_filter_with_attributes_cow(config, attributes, path, content)?.into_owned())
9249}
9250
9251/// Borrow-first variant of [`apply_smudge_filter_with_attributes`].
9252///
9253/// When no filter or EOL conversion changes the content, the returned value
9254/// borrows `content`; callers that can consume a [`Cow`] avoid allocating for
9255/// the common pass-through case.
9256pub fn apply_smudge_filter_with_attributes_cow<'a>(
9257    config: &GitConfig,
9258    attributes: &[AttributeCheck],
9259    path: &[u8],
9260    content: &'a [u8],
9261) -> Result<Cow<'a, [u8]>> {
9262    let plan = ContentFilterPlan::resolve(config, attributes);
9263    let mut data = Cow::Borrowed(content);
9264    if plan.eol == EolConversion::Crlf
9265        && plan.convert_eol(&data)
9266        && plan.will_convert_lf_to_crlf(&data)
9267    {
9268        data = Cow::Owned(convert_lf_to_crlf(&data));
9269    }
9270    if let Some(driver) = &plan.driver {
9271        data = run_driver(driver, driver.smudge.as_deref(), path, data)?;
9272    }
9273    Ok(data)
9274}
9275
9276/// Execute one direction of a driver filter, honouring the `required` flag.
9277fn run_driver<'a>(
9278    driver: &FilterDriver,
9279    command: Option<&str>,
9280    path: &[u8],
9281    content: Cow<'a, [u8]>,
9282) -> Result<Cow<'a, [u8]>> {
9283    let Some(command) = command else {
9284        // No command in this direction. Required filters must error; optional
9285        // ones pass content through unchanged.
9286        if driver.required {
9287            return Err(GitError::Command(format!(
9288                "required filter `{}` has no configured command for this direction",
9289                String::from_utf8_lossy(&driver.name)
9290            )));
9291        }
9292        return Ok(content);
9293    };
9294    match run_filter_command(command, path, &content) {
9295        Ok(output) => Ok(Cow::Owned(output)),
9296        Err(err) => {
9297            if driver.required {
9298                Err(err)
9299            } else {
9300                // Non-required filter failure: fall back to the unfiltered
9301                // content, matching git's behaviour.
9302                Ok(content)
9303            }
9304        }
9305    }
9306}
9307
9308/// Compute the attributes relevant to content filtering (`text`, `eol`,
9309/// `filter`) for `path` from the worktree `.gitattributes` chain.
9310fn filter_attribute_checks(worktree_root: &Path, path: &[u8]) -> Result<Vec<AttributeCheck>> {
9311    let requested = filter_attribute_names();
9312    let mut matcher = AttributeMatcher::default();
9313    if !matcher.read_configured_attributes(worktree_root) {
9314        matcher.read_default_global_attributes();
9315    }
9316    read_dir_attribute_patterns_for_base(worktree_root, &[], &mut matcher)?;
9317    let mut prefix = Vec::new();
9318    let mut parts = path.split(|byte| *byte == b'/').peekable();
9319    while let Some(part) = parts.next() {
9320        if parts.peek().is_none() {
9321            break;
9322        }
9323        if !prefix.is_empty() {
9324            prefix.push(b'/');
9325        }
9326        prefix.extend_from_slice(part);
9327        let dir = worktree_root.join(repo_path_to_os_path(&prefix)?);
9328        read_dir_attribute_patterns_for_base(&dir, &prefix, &mut matcher)?;
9329    }
9330    read_attribute_patterns(
9331        worktree_root.join(".git").join("info").join("attributes"),
9332        &mut matcher,
9333        &[],
9334        b".git/info/attributes",
9335    );
9336    Ok(matcher.attributes_for_path(path, &requested, false))
9337}
9338
9339/// Compute filtering attributes for a checkout (blob -> worktree).
9340///
9341/// `git checkout -- <pathspec>` / `git restore` materialize through git's
9342/// **default** attr direction, which is `GIT_ATTR_CHECKIN` (attr.c: the static
9343/// `direction` is zero-initialized and `builtin/checkout.c` never overrides it
9344/// for the pathspec path). Under that direction `read_attr` reads each
9345/// `.gitattributes` frame from the **worktree file first**, falling back to the
9346/// staged blob only when no worktree file exists at that directory level
9347/// (sparse-checkout). This is the precedence the smudge filter must use:
9348/// t0027 commits an *empty* root `.gitattributes`, then overwrites the worktree
9349/// copy with `*.txt text eol=crlf` *without re-staging* — and git's checkout
9350/// still honours the worktree copy. Reading the index alone (or index-first)
9351/// made checkout under-convert line endings, because the staged blob was empty.
9352fn smudge_attribute_checks_from_index(
9353    worktree_root: &Path,
9354    git_dir: &Path,
9355    format: ObjectFormat,
9356    path: &[u8],
9357) -> Result<Vec<AttributeCheck>> {
9358    let requested = filter_attribute_names();
9359    let mut matcher = AttributeMatcher::default();
9360    if !matcher.read_configured_attributes(worktree_root) {
9361        matcher.read_default_global_attributes();
9362    }
9363
9364    // Build the set of `.gitattributes` blobs the index carries, keyed by the
9365    // directory they govern, so each ancestry frame can prefer the staged copy.
9366    let index_attributes = index_gitattributes_by_base(git_dir, format)?;
9367
9368    // Walk root -> ... -> the file's parent directory, folding each frame's
9369    // `.gitattributes` in shallow-to-deep order so deeper directories win.
9370    fold_checkout_attribute_frame(worktree_root, &[], &index_attributes, &mut matcher)?;
9371    let mut prefix = Vec::new();
9372    let mut parts = path.split(|byte| *byte == b'/').peekable();
9373    while let Some(part) = parts.next() {
9374        if parts.peek().is_none() {
9375            break;
9376        }
9377        if !prefix.is_empty() {
9378            prefix.push(b'/');
9379        }
9380        prefix.extend_from_slice(part);
9381        let dir = worktree_root.join(repo_path_to_os_path(&prefix)?);
9382        fold_checkout_attribute_frame(&dir, &prefix, &index_attributes, &mut matcher)?;
9383    }
9384
9385    read_attribute_patterns(
9386        worktree_root.join(".git").join("info").join("attributes"),
9387        &mut matcher,
9388        &[],
9389        b".git/info/attributes",
9390    );
9391    Ok(matcher.attributes_for_path(path, &requested, false))
9392}
9393
9394/// Fold the `.gitattributes` governing directory `base` (whose on-disk location
9395/// is `dir`) into `matcher`, preferring the worktree file and falling back to
9396/// the staged blob. Mirrors one attr-stack frame under `GIT_ATTR_CHECKIN`
9397/// (git's default direction, used by `checkout -- <pathspec>` / `restore`).
9398fn fold_checkout_attribute_frame(
9399    dir: &Path,
9400    base: &[u8],
9401    index_attributes: &BTreeMap<Vec<u8>, Vec<u8>>,
9402    matcher: &mut AttributeMatcher,
9403) -> Result<()> {
9404    let worktree_file = dir.join(".gitattributes");
9405    if let Ok(contents) = fs::read(&worktree_file) {
9406        // A worktree `.gitattributes` exists at this level: it wins outright
9407        // (git only consults the index when the worktree file is absent).
9408        read_attribute_patterns_from_bytes(&contents, matcher, base);
9409    } else if let Some(contents) = index_attributes.get(base) {
9410        read_attribute_patterns_from_bytes(contents, matcher, base);
9411    }
9412    Ok(())
9413}
9414
9415/// Read every staged `.gitattributes` blob, keyed by the repo-relative directory
9416/// it governs (`""` for the worktree root). Stage-0 blob entries only.
9417fn index_gitattributes_by_base(
9418    git_dir: &Path,
9419    format: ObjectFormat,
9420) -> Result<BTreeMap<Vec<u8>, Vec<u8>>> {
9421    let mut map = BTreeMap::new();
9422    let index_path = repository_index_path(git_dir);
9423    if !index_path.exists() {
9424        return Ok(map);
9425    }
9426    let db = FileObjectDatabase::from_git_dir(git_dir, format);
9427    let entries = Index::parse(&fs::read(index_path)?, format)?.entries;
9428    for entry in entries {
9429        let is_attributes_file =
9430            entry.path == b".gitattributes" || entry.path.as_bytes().ends_with(b"/.gitattributes");
9431        if index_entry_stage(&entry) != 0
9432            || tree_entry_object_type(entry.mode) != ObjectType::Blob
9433            || !is_attributes_file
9434        {
9435            continue;
9436        }
9437        let base = match entry.path.as_bytes().strip_suffix(b".gitattributes") {
9438            Some(b"") => Vec::new(),
9439            Some(parent) => parent.strip_suffix(b"/").unwrap_or(parent).to_vec(),
9440            None => continue,
9441        };
9442        let object = db
9443            .read_object(&entry.oid)
9444            .map_err(|err| expect_missing_object_kind(err, entry.oid, MissingObjectKind::Blob))?;
9445        if object.object_type == ObjectType::Blob {
9446            map.insert(base, object.body.clone());
9447        }
9448    }
9449    Ok(map)
9450}
9451
9452fn filter_attribute_names() -> Vec<Vec<u8>> {
9453    // `crlf` is git's legacy alias for `text` (convert.c registers both); it is
9454    // consulted as a fallback when `text` is unspecified, so we must resolve it.
9455    vec![
9456        b"text".to_vec(),
9457        b"crlf".to_vec(),
9458        b"eol".to_vec(),
9459        b"filter".to_vec(),
9460    ]
9461}
9462
9463// ---------------------------------------------------------------------------
9464// `ls-files --eol` line-ending information
9465//
9466// Git's `git ls-files --eol` prints, for each path, three fields:
9467//   i/<stat>  — line-ending statistics of the *index* blob content
9468//   w/<stat>  — line-ending statistics of the *worktree* file content
9469//   attr/<a>  — the resolved crlf/eol attribute action (attributes only, no
9470//               config) — `get_convert_attr_ascii` in convert.c
9471// The two stat fields mirror `gather_convert_stats_ascii`; the attr field
9472// mirrors `convert_attrs` up to `ca->attr_action` (i.e. *before* the config
9473// derived `text` -> input/crlf substitution and the `core.autocrlf` fallback).
9474// ---------------------------------------------------------------------------
9475
9476/// Line-ending statistics of a byte buffer, mirroring convert.c `gather_stats`.
9477#[derive(Clone)]
9478struct ConvertStats {
9479    nul: u32,
9480    lonecr: u32,
9481    lonelf: u32,
9482    crlf: u32,
9483    printable: u32,
9484    nonprintable: u32,
9485}
9486
9487fn gather_convert_stats(buf: &[u8]) -> ConvertStats {
9488    let mut stats = ConvertStats {
9489        nul: 0,
9490        lonecr: 0,
9491        lonelf: 0,
9492        crlf: 0,
9493        printable: 0,
9494        nonprintable: 0,
9495    };
9496    let mut i = 0;
9497    while i < buf.len() {
9498        let c = buf[i];
9499        if c == b'\r' {
9500            if buf.get(i + 1) == Some(&b'\n') {
9501                stats.crlf += 1;
9502                i += 1;
9503            } else {
9504                stats.lonecr += 1;
9505            }
9506            i += 1;
9507            continue;
9508        }
9509        if c == b'\n' {
9510            stats.lonelf += 1;
9511            i += 1;
9512            continue;
9513        }
9514        if c == 127 {
9515            // DEL
9516            stats.nonprintable += 1;
9517        } else if c < 32 {
9518            match c {
9519                // BS, HT, ESC and FF are printable.
9520                0x08 | 0x09 | 0x1b | 0x0c => stats.printable += 1,
9521                0 => {
9522                    stats.nul += 1;
9523                    stats.nonprintable += 1;
9524                }
9525                _ => stats.nonprintable += 1,
9526            }
9527        } else {
9528            stats.printable += 1;
9529        }
9530        i += 1;
9531    }
9532    // A trailing EOF (^Z, 0x1a) is not counted as non-printable.
9533    if buf.last() == Some(&0x1a) {
9534        stats.nonprintable = stats.nonprintable.saturating_sub(1);
9535    }
9536    stats
9537}
9538
9539/// Mirror of convert.c `has_crlf_in_index`: whether the blob currently recorded
9540/// in the index for this path is non-binary text containing a CRLF. Used only by
9541/// the auto-crlf safecrlf decision to keep an already-CRLF index blob from being
9542/// silently collapsed. A missing/unreadable blob (or a non-blob entry) counts as
9543/// "no CRLF", matching git's `read_blob_data_from_index` returning NULL.
9544fn has_crlf_in_index(odb: &FileObjectDatabase, oid: &ObjectId) -> bool {
9545    let Ok(object) = odb.read_object(oid) else {
9546        return false;
9547    };
9548    if object.object_type != ObjectType::Blob {
9549        return false;
9550    }
9551    let data = &object.body;
9552    // git short-circuits on the first '\r' via memchr before gathering stats.
9553    if !data.contains(&b'\r') {
9554        return false;
9555    }
9556    let stats = gather_convert_stats(data);
9557    !convert_is_binary(&stats) && stats.crlf > 0
9558}
9559
9560/// Mirror of convert.c `convert_is_binary`: a lone CR or NUL, or a high
9561/// non-printable ratio, marks the content as binary.
9562fn convert_is_binary(stats: &ConvertStats) -> bool {
9563    if stats.lonecr > 0 {
9564        return true;
9565    }
9566    if stats.nul > 0 {
9567        return true;
9568    }
9569    (stats.printable >> 7) < stats.nonprintable
9570}
9571
9572/// The `core.safecrlf` round-trip-warning mode, mirroring git's
9573/// `global_conv_flags_eol` (environment.c). git's *default* — when
9574/// `core.safecrlf` is unset — is [`ConvFlags::Warn`], so the warning fires even
9575/// without any explicit config.
9576#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9577pub enum ConvFlags {
9578    /// `core.safecrlf=false`: never warn.
9579    Off,
9580    /// `core.safecrlf=warn` (and the unset default): emit a warning when a
9581    /// CRLF<->LF round-trip would not be reversible.
9582    Warn,
9583    /// `core.safecrlf=true`: die instead of warn.
9584    Die,
9585}
9586
9587impl ConvFlags {
9588    /// Resolve `core.safecrlf` from config, mirroring environment.c
9589    /// `git_default_core_config`: `warn` -> [`ConvFlags::Warn`], a boolean-true
9590    /// value -> [`ConvFlags::Die`], a boolean-false value -> [`ConvFlags::Off`].
9591    /// When the key is absent git leaves `global_conv_flags_eol` at its initial
9592    /// [`ConvFlags::Warn`], so unset also resolves to [`ConvFlags::Warn`].
9593    pub fn from_config(config: &GitConfig) -> Self {
9594        match config.get("core", None, "safecrlf") {
9595            Some(value) if value.eq_ignore_ascii_case("warn") => ConvFlags::Warn,
9596            Some(_) => {
9597                if config.get_bool("core", None, "safecrlf") == Some(true) {
9598                    ConvFlags::Die
9599                } else {
9600                    ConvFlags::Off
9601                }
9602            }
9603            None => ConvFlags::Warn,
9604        }
9605    }
9606}
9607
9608/// Mirror of convert.c `check_global_conv_flags_eol`: compare the pre-conversion
9609/// `old_stats` against the simulated round-trip `new_stats` and, when the
9610/// CRLF/LF content would not survive a clean+smudge cycle, warn (or die under
9611/// `core.safecrlf=true`).
9612///
9613/// Returns `Err(GitError::Exit(128))` when `flags` is [`ConvFlags::Die`] and the
9614/// round-trip is irreversible (git `die`s with exit 128 here); otherwise prints
9615/// the warning to stderr and returns `Ok(())`. This is a pure stderr-side
9616/// effect: it never changes the bytes written to the object store.
9617fn check_safe_crlf(
9618    old_stats: &ConvertStats,
9619    new_stats: &ConvertStats,
9620    flags: ConvFlags,
9621    path: &[u8],
9622) -> Result<()> {
9623    if flags == ConvFlags::Off {
9624        return Ok(());
9625    }
9626    let display = String::from_utf8_lossy(path);
9627    if old_stats.crlf > 0 && new_stats.crlf == 0 {
9628        // CRLFs would not be restored by checkout.
9629        match flags {
9630            ConvFlags::Die => {
9631                eprintln!("fatal: CRLF would be replaced by LF in {display}");
9632                return Err(GitError::Exit(128));
9633            }
9634            ConvFlags::Warn => {
9635                eprintln!(
9636                    "warning: in the working copy of '{display}', CRLF will be replaced by LF the next time Git touches it"
9637                );
9638            }
9639            ConvFlags::Off => unreachable!("handled above"),
9640        }
9641    } else if old_stats.lonelf > 0 && new_stats.lonelf == 0 {
9642        // CRLFs would be added by checkout.
9643        match flags {
9644            ConvFlags::Die => {
9645                eprintln!("fatal: LF would be replaced by CRLF in {display}");
9646                return Err(GitError::Exit(128));
9647            }
9648            ConvFlags::Warn => {
9649                eprintln!(
9650                    "warning: in the working copy of '{display}', LF will be replaced by CRLF the next time Git touches it"
9651                );
9652            }
9653            ConvFlags::Off => unreachable!("handled above"),
9654        }
9655    }
9656    Ok(())
9657}
9658
9659/// Compute the `i/` or `w/` stat string for `content`, mirroring
9660/// convert.c `gather_convert_stats_ascii`.
9661fn convert_stats_ascii(content: &[u8]) -> &'static str {
9662    if content.is_empty() {
9663        return "none";
9664    }
9665    let stats = gather_convert_stats(content);
9666    if convert_is_binary(&stats) {
9667        return "-text";
9668    }
9669    match (stats.lonelf > 0, stats.crlf > 0) {
9670        (true, false) => "lf",
9671        (false, true) => "crlf",
9672        (true, true) => "mixed",
9673        (false, false) => "none",
9674    }
9675}
9676
9677/// The resolved crlf/eol attribute action for a path, mirroring convert.c
9678/// `convert_attrs` up to `ca->attr_action` (attributes only, no config), and
9679/// `get_convert_attr_ascii` for the ascii spelling.
9680fn convert_attr_ascii(checks: &[AttributeCheck]) -> &'static str {
9681    fn state_of<'a>(checks: &'a [AttributeCheck], name: &[u8]) -> Option<&'a AttributeState> {
9682        checks
9683            .iter()
9684            .find(|check| check.attribute == name)
9685            .and_then(|check| check.state.as_ref())
9686    }
9687
9688    // git_path_check_crlf: ATTR_TRUE -> TEXT, ATTR_FALSE -> BINARY,
9689    // ATTR_UNSET -> (fall through), "input" -> TEXT_INPUT, "auto" -> AUTO,
9690    // anything else -> UNDEFINED.
9691    #[derive(Clone, Copy, PartialEq)]
9692    enum Action {
9693        Undefined,
9694        Binary,
9695        Text,
9696        TextInput,
9697        TextCrlf,
9698        Auto,
9699        AutoCrlf,
9700        AutoInput,
9701    }
9702    fn check_crlf(state: Option<&AttributeState>) -> Action {
9703        match state {
9704            Some(AttributeState::Set) => Action::Text,
9705            Some(AttributeState::Unset) => Action::Binary,
9706            Some(AttributeState::Value(value)) if value == b"input" => Action::TextInput,
9707            Some(AttributeState::Value(value)) if value == b"auto" => Action::Auto,
9708            // ATTR_UNSET / any other value -> CRLF_UNDEFINED.
9709            _ => Action::Undefined,
9710        }
9711    }
9712
9713    // Resolve from the `text` attribute, then fall back to the legacy `crlf`
9714    // alias only when `text` left the action undefined.
9715    let mut action = check_crlf(state_of(checks, b"text"));
9716    if action == Action::Undefined {
9717        action = check_crlf(state_of(checks, b"crlf"));
9718    }
9719
9720    if action != Action::Binary {
9721        // git_path_check_eol: only "lf"/"crlf" values matter.
9722        let eol = match state_of(checks, b"eol") {
9723            Some(AttributeState::Value(value)) if value == b"lf" => Some(false),
9724            Some(AttributeState::Value(value)) if value == b"crlf" => Some(true),
9725            _ => None,
9726        };
9727        action = match (action, eol) {
9728            (Action::Auto, Some(false)) => Action::AutoInput,
9729            (Action::Auto, Some(true)) => Action::AutoCrlf,
9730            (_, Some(false)) if action != Action::Auto => Action::TextInput,
9731            (_, Some(true)) if action != Action::Auto => Action::TextCrlf,
9732            _ => action,
9733        };
9734    }
9735
9736    match action {
9737        Action::Undefined => "",
9738        Action::Binary => "-text",
9739        Action::Text => "text",
9740        Action::TextInput => "text eol=lf",
9741        Action::TextCrlf => "text eol=crlf",
9742        Action::Auto => "text=auto",
9743        Action::AutoCrlf => "text=auto eol=crlf",
9744        Action::AutoInput => "text=auto eol=lf",
9745    }
9746}
9747
9748/// The three `ls-files --eol` fields for a single path.
9749pub struct EolInfo {
9750    /// Stat of the index blob (`i/...`); empty when there is no index blob.
9751    pub index: &'static str,
9752    /// Stat of the worktree file (`w/...`); empty when the file is absent.
9753    pub worktree: &'static str,
9754    /// Resolved crlf/eol attribute action (`attr/...`).
9755    pub attr: &'static str,
9756}
9757
9758impl EolInfo {
9759    /// Format as git's `ls-files --eol` prefix: `i/%-5s w/%-5s attr/%-17s\t`.
9760    pub fn format_prefix(&self) -> String {
9761        format!(
9762            "i/{:<5} w/{:<5} attr/{:<17}\t",
9763            self.index, self.worktree, self.attr
9764        )
9765    }
9766}
9767
9768/// Compute the `ls-files --eol` info for `path`.
9769///
9770/// `index_content` is the raw index blob bytes (None when the path has no
9771/// index entry or is not a regular file). The worktree file is read from
9772/// `worktree_root/path`; if it is absent or not a regular file the `w/` field
9773/// is empty. Attributes are resolved from the worktree `.gitattributes` chain
9774/// via `attr_checks`.
9775pub fn eol_info_for_path(
9776    worktree_root: impl AsRef<Path>,
9777    path: &[u8],
9778    index_content: Option<&[u8]>,
9779    attr_checks: &[AttributeCheck],
9780) -> EolInfo {
9781    let index = index_content.map(convert_stats_ascii).unwrap_or("");
9782
9783    let worktree_root = worktree_root.as_ref();
9784    let worktree = match repo_path_to_os_path(path) {
9785        Ok(rel) => {
9786            let absolute = worktree_root.join(rel);
9787            match fs::symlink_metadata(&absolute) {
9788                // git: only regular files get a `w/` stat (lstat + S_ISREG).
9789                Ok(meta) if meta.file_type().is_file() => match fs::read(&absolute) {
9790                    Ok(content) => convert_stats_ascii_owned(&content),
9791                    Err(_) => "",
9792                },
9793                _ => "",
9794            }
9795        }
9796        Err(_) => "",
9797    };
9798
9799    let attr = convert_attr_ascii(attr_checks);
9800
9801    EolInfo {
9802        index,
9803        worktree,
9804        attr,
9805    }
9806}
9807
9808/// `convert_stats_ascii` over an owned buffer; the result is a `'static` str so
9809/// the buffer can be dropped.
9810fn convert_stats_ascii_owned(content: &[u8]) -> &'static str {
9811    convert_stats_ascii(content)
9812}
9813
9814/// Resolve the crlf/eol/text/filter attributes for `path` from the worktree
9815/// `.gitattributes` chain (the set `ls-files --eol` needs for its `attr/`
9816/// field).
9817pub fn eol_attribute_checks(
9818    worktree_root: impl AsRef<Path>,
9819    path: &[u8],
9820) -> Result<Vec<AttributeCheck>> {
9821    filter_attribute_checks(worktree_root.as_ref(), path)
9822}
9823
9824pub fn deleted_index_entries(
9825    worktree_root: impl AsRef<Path>,
9826    git_dir: impl AsRef<Path>,
9827    format: ObjectFormat,
9828) -> Result<Vec<IndexEntry>> {
9829    let worktree_root = worktree_root.as_ref();
9830    let git_dir = git_dir.as_ref();
9831    let index_path = repository_index_path(git_dir);
9832    if !index_path.exists() {
9833        return Ok(Vec::new());
9834    }
9835    let index = Index::parse(&fs::read(index_path)?, format)?;
9836    let mut deleted = Vec::new();
9837    for entry in index.entries {
9838        if !worktree_path(worktree_root, entry.path.as_bytes())?.exists() {
9839            deleted.push(entry);
9840        }
9841    }
9842    Ok(deleted)
9843}
9844
9845pub fn modified_index_entries(
9846    worktree_root: impl AsRef<Path>,
9847    git_dir: impl AsRef<Path>,
9848    format: ObjectFormat,
9849) -> Result<Vec<IndexEntry>> {
9850    let worktree_root = worktree_root.as_ref();
9851    let git_dir = git_dir.as_ref();
9852    let index_path = repository_index_path(git_dir);
9853    if !index_path.exists() {
9854        return Ok(Vec::new());
9855    }
9856    let index = Index::parse(&fs::read(&index_path)?, format)?;
9857    // Reuse the same racy-git stat shortcut here: build the cache from the index
9858    // we just parsed (no second parse) so the worktree walk can skip re-hashing
9859    // unchanged files. A cached oid is only trusted on a non-racy stat match, so
9860    // genuinely modified files still fall through to a hash and are reported.
9861    let stat_cache = IndexStatCache::from_index(&index, &index_path);
9862    let worktree = worktree_entries_with_stat_cache(
9863        worktree_root,
9864        git_dir,
9865        format,
9866        Some(&stat_cache),
9867        None,
9868        None,
9869    )?;
9870    let mut modified = Vec::new();
9871    for entry in index.entries {
9872        let Some(worktree_entry) = worktree.get(entry.path.as_bytes()) else {
9873            modified.push(entry);
9874            continue;
9875        };
9876        if worktree_entry.mode != entry.mode || worktree_entry.oid != entry.oid {
9877            modified.push(entry);
9878        }
9879    }
9880    Ok(modified)
9881}
9882
9883pub fn checkout_branch(
9884    worktree_root: impl AsRef<Path>,
9885    git_dir: impl AsRef<Path>,
9886    format: ObjectFormat,
9887    branch: &str,
9888    committer: Vec<u8>,
9889) -> Result<CheckoutResult> {
9890    let worktree_root = worktree_root.as_ref();
9891    let git_dir = git_dir.as_ref();
9892    let branch_ref = branch_ref_name(branch)?;
9893    let refs = FileRefStore::new(git_dir, format);
9894    let target = match sley_refs::resolve_ref_peeled(&refs, &branch_ref)? {
9895        Some(oid) => oid,
9896        None => {
9897            checkout_switch_head_symbolic(&refs, branch_ref, committer, branch, None, None)?;
9898            return Ok(CheckoutResult {
9899                branch: branch.into(),
9900                oid: ObjectId::null(format),
9901                files: 0,
9902            });
9903        }
9904    };
9905    let current_head = resolve_head_commit_oid(git_dir, format)?;
9906    let files = if current_head == Some(target) {
9907        0
9908    } else {
9909        checkout_commit_to_index_and_worktree(worktree_root, git_dir, format, &target)?
9910    };
9911    checkout_switch_head_symbolic(
9912        &refs,
9913        branch_ref,
9914        committer,
9915        branch,
9916        Some(target),
9917        Some(target),
9918    )?;
9919    Ok(CheckoutResult {
9920        branch: branch.into(),
9921        oid: target,
9922        files,
9923    })
9924}
9925
9926pub fn checkout_detached(
9927    worktree_root: impl AsRef<Path>,
9928    git_dir: impl AsRef<Path>,
9929    format: ObjectFormat,
9930    target: &ObjectId,
9931    committer: Vec<u8>,
9932    message: Vec<u8>,
9933) -> Result<CheckoutResult> {
9934    let worktree_root = worktree_root.as_ref();
9935    let git_dir = git_dir.as_ref();
9936    let files = checkout_commit_to_index_and_worktree(worktree_root, git_dir, format, target)?;
9937    let refs = FileRefStore::new(git_dir, format);
9938    let zero = ObjectId::null(format);
9939    let mut tx = refs.transaction();
9940    tx.update(RefUpdate {
9941        name: "HEAD".into(),
9942        expected: None,
9943        new: RefTarget::Direct(*target),
9944        reflog: Some(ReflogEntry {
9945            old_oid: zero,
9946            new_oid: *target,
9947            committer,
9948            message,
9949        }),
9950    });
9951    tx.commit()?;
9952    Ok(CheckoutResult {
9953        branch: target.to_string(),
9954        oid: *target,
9955        files,
9956    })
9957}
9958
9959/// Like [`checkout_branch`], but runs the smudge-side content filters
9960/// (`core.autocrlf`/`text`/`eol` EOL conversion and `filter.<name>.smudge`
9961/// drivers) on each blob as it is written to the worktree. `config` is the
9962/// repository config used to resolve the filters.
9963pub fn checkout_branch_filtered(
9964    worktree_root: impl AsRef<Path>,
9965    git_dir: impl AsRef<Path>,
9966    format: ObjectFormat,
9967    branch: &str,
9968    committer: Vec<u8>,
9969    config: &GitConfig,
9970) -> Result<CheckoutResult> {
9971    let worktree_root = worktree_root.as_ref();
9972    let git_dir = git_dir.as_ref();
9973    let branch_ref = branch_ref_name(branch)?;
9974    let refs = FileRefStore::new(git_dir, format);
9975    let target = match sley_refs::resolve_ref_peeled(&refs, &branch_ref)? {
9976        Some(oid) => oid,
9977        None => {
9978            checkout_switch_head_symbolic(&refs, branch_ref, committer, branch, None, None)?;
9979            return Ok(CheckoutResult {
9980                branch: branch.into(),
9981                oid: ObjectId::null(format),
9982                files: 0,
9983            });
9984        }
9985    };
9986    let current_head = resolve_head_commit_oid(git_dir, format)?;
9987    let files = if current_head == Some(target) {
9988        0
9989    } else {
9990        checkout_commit_to_index_and_worktree_filtered(
9991            worktree_root,
9992            git_dir,
9993            format,
9994            &target,
9995            Some(config),
9996        )?
9997    };
9998    checkout_switch_head_symbolic(
9999        &refs,
10000        branch_ref,
10001        committer,
10002        branch,
10003        Some(target),
10004        Some(target),
10005    )?;
10006    Ok(CheckoutResult {
10007        branch: branch.into(),
10008        oid: target,
10009        files,
10010    })
10011}
10012
10013/// Like [`checkout_detached`], but runs the smudge-side content filters (see
10014/// [`checkout_branch_filtered`]).
10015pub fn checkout_detached_filtered(
10016    worktree_root: impl AsRef<Path>,
10017    git_dir: impl AsRef<Path>,
10018    format: ObjectFormat,
10019    target: &ObjectId,
10020    committer: Vec<u8>,
10021    message: Vec<u8>,
10022    config: &GitConfig,
10023) -> Result<CheckoutResult> {
10024    let worktree_root = worktree_root.as_ref();
10025    let git_dir = git_dir.as_ref();
10026    let files = checkout_commit_to_index_and_worktree_filtered(
10027        worktree_root,
10028        git_dir,
10029        format,
10030        target,
10031        Some(config),
10032    )?;
10033    let refs = FileRefStore::new(git_dir, format);
10034    let zero = ObjectId::null(format);
10035    let mut tx = refs.transaction();
10036    tx.update(RefUpdate {
10037        name: "HEAD".into(),
10038        expected: None,
10039        new: RefTarget::Direct(*target),
10040        reflog: Some(ReflogEntry {
10041            old_oid: zero,
10042            new_oid: *target,
10043            committer,
10044            message,
10045        }),
10046    });
10047    tx.commit()?;
10048    Ok(CheckoutResult {
10049        branch: target.to_string(),
10050        oid: *target,
10051        files,
10052    })
10053}
10054
10055fn checkout_commit_to_index_and_worktree(
10056    worktree_root: &Path,
10057    git_dir: &Path,
10058    format: ObjectFormat,
10059    target: &ObjectId,
10060) -> Result<usize> {
10061    checkout_commit_to_index_and_worktree_filtered(worktree_root, git_dir, format, target, None)
10062}
10063
10064/// Like [`checkout_commit_to_index_and_worktree`] but optionally runs the
10065/// smudge-side content filters (see [`apply_smudge_filter`]) on each blob before
10066/// it is written to the worktree. Attribute lookups use the `.gitattributes`
10067/// recorded in the *target tree* so the rules of the checked-out commit apply.
10068fn checkout_commit_to_index_and_worktree_filtered(
10069    worktree_root: &Path,
10070    git_dir: &Path,
10071    format: ObjectFormat,
10072    target: &ObjectId,
10073    smudge_config: Option<&GitConfig>,
10074) -> Result<usize> {
10075    let mut dirty = false;
10076    stream_short_status(worktree_root, git_dir, format, |entry| {
10077        if !status_row_is_untracked_or_ignored(entry) {
10078            dirty = true;
10079            return Ok(StreamControl::Stop);
10080        }
10081        Ok(StreamControl::Continue)
10082    })?;
10083    if dirty {
10084        return Err(GitError::Transaction(
10085            "checkout requires a clean working tree".into(),
10086        ));
10087    }
10088    let db = FileObjectDatabase::from_git_dir(git_dir, format);
10089    let commit = read_commit(&db, format, target)?;
10090    let mut target_entries = BTreeMap::new();
10091    collect_tree_entries(&db, format, &commit.tree, &mut target_entries)?;
10092
10093    let attributes = smudge_config
10094        .map(|_| build_tree_attribute_matcher(worktree_root, &db, format, &commit.tree))
10095        .transpose()?;
10096
10097    for path in read_index_entries(git_dir, format)?.keys() {
10098        if !target_entries.contains_key(path) {
10099            remove_worktree_file(worktree_root, path)?;
10100        }
10101    }
10102
10103    let mut index_entries = Vec::new();
10104    for (path, entry) in &target_entries {
10105        // Gitlinks go through the shared materialization step (mkdir + zeroed
10106        // stat); smudge filters never apply to a submodule directory.
10107        if sley_index::is_gitlink(entry.mode) {
10108            index_entries.push(materialize_tree_entry(&db, worktree_root, path, entry)?);
10109            continue;
10110        }
10111        let object = read_expected_object(&db, &entry.oid, ObjectType::Blob)?;
10112        let body: Cow<'_, [u8]> = match (smudge_config, &attributes) {
10113            (Some(config), Some(matcher)) => {
10114                let checks = matcher.attributes_for_path(path, &filter_attribute_names(), false);
10115                apply_smudge_filter_with_attributes_cow(config, &checks, path, &object.body)?
10116            }
10117            _ => Cow::Borrowed(&object.body),
10118        };
10119        let file_path = worktree_path(worktree_root, path)?;
10120        prepare_blob_parent_dirs(worktree_root, &file_path)?;
10121        remove_existing_worktree_path(&file_path)?;
10122        fs::write(&file_path, &body)?;
10123        set_worktree_file_mode(&file_path, entry.mode)?;
10124        let metadata = fs::metadata(&file_path)?;
10125        let mut index_entry = index_entry_from_metadata(path.clone(), entry.oid, &metadata);
10126        index_entry.mode = entry.mode;
10127        index_entries.push(index_entry);
10128    }
10129    index_entries.sort_by(|left, right| left.path.cmp(&right.path));
10130    fs::write(
10131        repository_index_path(git_dir),
10132        Index {
10133            version: 2,
10134            entries: index_entries,
10135            extensions: Vec::new(),
10136            checksum: None,
10137        }
10138        .write(format)?,
10139    )?;
10140    Ok(target_entries.len())
10141}
10142
10143/// Build an [`AttributeMatcher`] from the `.gitattributes` files contained in a
10144/// tree, plus the repo-level (`core.attributesFile`, `.git/info/attributes`)
10145/// sources, mirroring [`standard_attributes_for_path_from_tree`].
10146fn build_tree_attribute_matcher(
10147    worktree_root: &Path,
10148    db: &FileObjectDatabase,
10149    format: ObjectFormat,
10150    tree_oid: &ObjectId,
10151) -> Result<AttributeMatcher> {
10152    let mut matcher = AttributeMatcher::default();
10153    if !matcher.read_configured_attributes(worktree_root) {
10154        matcher.read_default_global_attributes();
10155    }
10156    collect_attribute_patterns_from_tree(db, format, tree_oid, Vec::new(), &mut matcher)?;
10157    read_attribute_patterns(
10158        worktree_root.join(".git").join("info").join("attributes"),
10159        &mut matcher,
10160        &[],
10161        b".git/info/attributes",
10162    );
10163    Ok(matcher)
10164}
10165
10166/// Sparse- and skip-worktree-aware variant of
10167/// [`checkout_commit_to_index_and_worktree`].
10168///
10169/// When `sparse` is `None` this behaves like the plain checkout except that it
10170/// preserves any pre-existing skip-worktree bits (so an already-sparse worktree
10171/// is not silently re-expanded). When `sparse` is `Some`, every target path is
10172/// additionally classified against the patterns: in-cone paths are written and
10173/// have their skip-worktree bit cleared, while out-of-cone paths are left out
10174/// of the worktree, get their skip-worktree bit set, and have any stale file
10175/// removed.
10176fn checkout_commit_to_index_and_worktree_sparse(
10177    worktree_root: &Path,
10178    git_dir: &Path,
10179    format: ObjectFormat,
10180    target: &ObjectId,
10181    sparse: Option<(&SparseCheckout, SparseCheckoutMode)>,
10182) -> Result<usize> {
10183    let previously_skipped = skip_worktree_paths(git_dir, format)?;
10184    let db = FileObjectDatabase::from_git_dir(git_dir, format);
10185    let commit = read_commit(&db, format, target)?;
10186    let mut target_entries = BTreeMap::new();
10187    collect_tree_entries(&db, format, &commit.tree, &mut target_entries)?;
10188
10189    // Honor skip-worktree: a path whose worktree file is intentionally absent
10190    // must not be treated as a dirty (deleted) change blocking the checkout.
10191    let mut dirty = false;
10192    stream_short_status(worktree_root, git_dir, format, |entry| {
10193        if previously_skipped.contains(entry.path) {
10194            return Ok(StreamControl::Continue);
10195        }
10196        // Submodule state never blocks a checkout: upstream unpack-trees
10197        // treats gitlinks as always up-to-date (ie_match_stat refuses to pay
10198        // for a submodule dirtiness probe), so new commits / dirty content in
10199        // a submodule must not fail the branch switch.
10200        if entry.index_mode.is_some_and(sley_index::is_gitlink)
10201            || entry.worktree_mode.is_some_and(sley_index::is_gitlink)
10202        {
10203            return Ok(StreamControl::Continue);
10204        }
10205        // An untracked embedded repository where the target tree records a
10206        // gitlink is reused as-is (upstream entry.c write_entry: mkdir with
10207        // EEXIST is success), so it does not block the checkout either.
10208        if entry.index == b'?' && entry.worktree == b'?' {
10209            let path = entry
10210                .path
10211                .strip_suffix(b"/")
10212                .unwrap_or(entry.path);
10213            if target_entries
10214                .get(path)
10215                .is_some_and(|target| sley_index::is_gitlink(target.mode))
10216            {
10217                return Ok(StreamControl::Continue);
10218            }
10219        }
10220        dirty = true;
10221        Ok(StreamControl::Stop)
10222    })?;
10223    if dirty {
10224        return Err(GitError::Transaction(
10225            "checkout requires a clean working tree".into(),
10226        ));
10227    }
10228
10229    let matcher = sparse.map(|(spec, mode)| SparseMatcher::new(spec, mode));
10230
10231    for path in read_index_entries(git_dir, format)?.keys() {
10232        if target_entries.contains_key(path) {
10233            continue;
10234        }
10235        // Do not disturb the worktree state of an intentionally skipped path.
10236        if previously_skipped.contains(path) {
10237            continue;
10238        }
10239        remove_worktree_file(worktree_root, path)?;
10240    }
10241
10242    let mut index_entries = Vec::new();
10243    for (path, entry) in &target_entries {
10244        let in_cone = matcher.as_ref().is_none_or(|matcher| {
10245            // A path already marked skip-worktree stays out unless it now
10246            // matches the sparse cone, mirroring upstream "honor skip-worktree".
10247            matcher.includes_file(path)
10248        });
10249        let index_entry = if in_cone {
10250            // `materialize_tree_entry` leaves flags_extended at 0, so the
10251            // skip-worktree bit is already clear for in-cone paths.
10252            materialize_tree_entry(&db, worktree_root, path, entry)?
10253        } else {
10254            // Out of cone: ensure no stale worktree file remains and synthesize
10255            // an index entry straight from the tree (no worktree metadata),
10256            // then mark it skip-worktree.
10257            remove_worktree_file(worktree_root, path)?;
10258            let mut index_entry = restored_head_index_entry(worktree_root, &db, path, entry)?;
10259            set_skip_worktree(&mut index_entry);
10260            index_entry
10261        };
10262        index_entries.push(index_entry);
10263    }
10264    index_entries.sort_by(|left, right| left.path.cmp(&right.path));
10265    let mut index = Index {
10266        version: 2,
10267        entries: index_entries,
10268        extensions: Vec::new(),
10269        checksum: None,
10270    };
10271    normalize_index_version_for_extended_flags(&mut index);
10272    fs::write(repository_index_path(git_dir), index.write(format)?)?;
10273    Ok(target_entries.len())
10274}
10275
10276fn skip_worktree_paths(git_dir: &Path, format: ObjectFormat) -> Result<BTreeSet<Vec<u8>>> {
10277    let index_path = repository_index_path(git_dir);
10278    if !index_path.exists() {
10279        return Ok(BTreeSet::new());
10280    }
10281    let index = Index::parse(&fs::read(index_path)?, format)?;
10282    Ok(index
10283        .entries
10284        .into_iter()
10285        .filter(index_entry_skip_worktree)
10286        .map(|entry| entry.path.into_bytes())
10287        .collect())
10288}
10289
10290pub fn restore_worktree_paths(
10291    worktree_root: impl AsRef<Path>,
10292    git_dir: impl AsRef<Path>,
10293    format: ObjectFormat,
10294    paths: &[PathBuf],
10295) -> Result<RestoreResult> {
10296    restore_worktree_paths_inner(
10297        worktree_root.as_ref(),
10298        git_dir.as_ref(),
10299        format,
10300        paths,
10301        None,
10302    )
10303}
10304
10305/// Like [`restore_worktree_paths`], applying the smudge-side content filters
10306/// (CRLF / ident / filter drivers) the way a checkout writes blobs.
10307pub fn restore_worktree_paths_filtered(
10308    worktree_root: impl AsRef<Path>,
10309    git_dir: impl AsRef<Path>,
10310    format: ObjectFormat,
10311    paths: &[PathBuf],
10312    config: &GitConfig,
10313) -> Result<RestoreResult> {
10314    restore_worktree_paths_inner(
10315        worktree_root.as_ref(),
10316        git_dir.as_ref(),
10317        format,
10318        paths,
10319        Some(config),
10320    )
10321}
10322
10323fn restore_worktree_paths_inner(
10324    worktree_root: &Path,
10325    git_dir: &Path,
10326    format: ObjectFormat,
10327    paths: &[PathBuf],
10328    smudge_config: Option<&GitConfig>,
10329) -> Result<RestoreResult> {
10330    let index_path = repository_index_path(git_dir);
10331    if !index_path.exists() {
10332        return Err(GitError::Exit(1));
10333    }
10334    let mut index = Index::parse(&fs::read(&index_path)?, format)?;
10335    let db = FileObjectDatabase::from_git_dir(git_dir, format);
10336    let mut restored = BTreeSet::new();
10337    for path in paths {
10338        let absolute = if path.is_absolute() {
10339            path.clone()
10340        } else {
10341            worktree_root.join(path)
10342        };
10343        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
10344            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
10345        })?;
10346        let git_path = git_path_bytes(relative)?;
10347        let recursive = path == Path::new(".")
10348            || path.to_string_lossy().ends_with('/')
10349            || absolute.is_dir()
10350            || index_has_entry_under(&index.entries, &git_path);
10351        let mut matched = false;
10352        let matched_positions = index
10353            .entries
10354            .iter()
10355            .enumerate()
10356            .filter_map(|(position, entry)| {
10357                (entry.path.as_bytes() == git_path.as_slice()
10358                    || (recursive && index_entry_is_under_path(entry.path.as_bytes(), &git_path)))
10359                .then_some(position)
10360            })
10361            .collect::<Vec<_>>();
10362        for position in matched_positions {
10363            let refreshed = restore_index_entry(
10364                worktree_root,
10365                git_dir,
10366                format,
10367                &db,
10368                &index.entries[position],
10369                smudge_config,
10370            )?;
10371            restored.insert(index.entries[position].path.clone());
10372            matched = true;
10373            if let Some(refreshed) = refreshed {
10374                index.entries[position] = refreshed;
10375            }
10376        }
10377        if !matched {
10378            eprintln!(
10379                "error: pathspec '{}' did not match any file(s) known to git",
10380                path.display()
10381            );
10382            return Err(GitError::Exit(1));
10383        }
10384    }
10385    fs::write(&index_path, index.write(format)?)?;
10386    Ok(RestoreResult {
10387        restored: restored.len(),
10388    })
10389}
10390
10391pub fn restore_index_paths_from_head(
10392    worktree_root: impl AsRef<Path>,
10393    git_dir: impl AsRef<Path>,
10394    format: ObjectFormat,
10395    paths: &[PathBuf],
10396) -> Result<RestoreResult> {
10397    let worktree_root = worktree_root.as_ref();
10398    let git_dir = git_dir.as_ref();
10399    let index_path = repository_index_path(git_dir);
10400    let index = if index_path.exists() {
10401        Index::parse(&fs::read(&index_path)?, format)?
10402    } else {
10403        Index {
10404            version: 2,
10405            entries: Vec::new(),
10406            extensions: Vec::new(),
10407            checksum: None,
10408        }
10409    };
10410    let db = FileObjectDatabase::from_git_dir(git_dir, format);
10411    let head_entries = head_tree_entries(git_dir, format, &db)?;
10412    restore_index_paths_from_entries(
10413        worktree_root,
10414        git_dir,
10415        format,
10416        &db,
10417        index,
10418        &head_entries,
10419        paths,
10420    )
10421}
10422
10423pub fn restore_index_paths_from_tree(
10424    worktree_root: impl AsRef<Path>,
10425    git_dir: impl AsRef<Path>,
10426    format: ObjectFormat,
10427    tree_oid: &ObjectId,
10428    paths: &[PathBuf],
10429) -> Result<RestoreResult> {
10430    let worktree_root = worktree_root.as_ref();
10431    let git_dir = git_dir.as_ref();
10432    let index_path = repository_index_path(git_dir);
10433    let index = if index_path.exists() {
10434        Index::parse(&fs::read(&index_path)?, format)?
10435    } else {
10436        Index {
10437            version: 2,
10438            entries: Vec::new(),
10439            extensions: Vec::new(),
10440            checksum: None,
10441        }
10442    };
10443    let db = FileObjectDatabase::from_git_dir(git_dir, format);
10444    let source_entries = tree_entries(&db, format, tree_oid)?;
10445    restore_index_paths_from_entries(
10446        worktree_root,
10447        git_dir,
10448        format,
10449        &db,
10450        index,
10451        &source_entries,
10452        paths,
10453    )
10454}
10455
10456fn restore_index_paths_from_entries(
10457    worktree_root: &Path,
10458    git_dir: &Path,
10459    format: ObjectFormat,
10460    db: &FileObjectDatabase,
10461    index: Index,
10462    source_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
10463    paths: &[PathBuf],
10464) -> Result<RestoreResult> {
10465    let mut index_entries = index
10466        .entries
10467        .into_iter()
10468        .map(|entry| (entry.path.as_bytes().to_vec(), entry))
10469        .collect::<BTreeMap<_, _>>();
10470    let mut restored = BTreeSet::new();
10471    for path in paths {
10472        let absolute = if path.is_absolute() {
10473            path.clone()
10474        } else {
10475            worktree_root.join(path)
10476        };
10477        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
10478            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
10479        })?;
10480        let git_path = git_path_bytes(relative)?;
10481        let recursive = path == Path::new(".")
10482            || path.to_string_lossy().ends_with('/')
10483            || absolute.is_dir()
10484            || index_entries
10485                .keys()
10486                .any(|entry| index_entry_is_under_path(entry, &git_path))
10487            || source_entries
10488                .keys()
10489                .any(|entry| index_entry_is_under_path(entry, &git_path));
10490        let mut matched_paths = BTreeSet::new();
10491        for path in index_entries.keys().chain(source_entries.keys()) {
10492            if path.as_slice() == git_path.as_slice()
10493                || (recursive && index_entry_is_under_path(path, &git_path))
10494            {
10495                matched_paths.insert(path.clone());
10496            }
10497        }
10498        if matched_paths.is_empty() {
10499            eprintln!(
10500                "error: pathspec '{}' did not match any file(s) known to git",
10501                path.display()
10502            );
10503            return Err(GitError::Exit(1));
10504        }
10505        for path in matched_paths {
10506            if let Some(entry) = source_entries.get(&path) {
10507                // git's pathspec reset (`reset_index` → diff against the source
10508                // tree) only rewrites entries that actually CHANGE: an entry whose
10509                // oid and mode already equal the source is left untouched, so its
10510                // cached stat is preserved and `git diff-files` stays clean (t7102
10511                // "resetting an unmodified path is a no-op"). Only when the entry
10512                // genuinely changes does git write a fresh, stat-zeroed entry.
10513                let unchanged = index_entries.get(&path).is_some_and(|existing| {
10514                    existing.oid == entry.oid && existing.mode == entry.mode
10515                });
10516                if !unchanged {
10517                    index_entries.insert(
10518                        path.clone(),
10519                        restored_head_index_entry(worktree_root, db, &path, entry)?,
10520                    );
10521                }
10522            } else {
10523                index_entries.remove(&path);
10524            }
10525            restored.insert(path);
10526        }
10527    }
10528    let mut entries = index_entries.into_values().collect::<Vec<_>>();
10529    entries.sort_by(|left, right| left.path.cmp(&right.path));
10530    fs::write(
10531        repository_index_path(git_dir),
10532        Index {
10533            version: 2,
10534            entries,
10535            extensions: Vec::new(),
10536            checksum: None,
10537        }
10538        .write(format)?,
10539    )?;
10540    Ok(RestoreResult {
10541        restored: restored.len(),
10542    })
10543}
10544
10545pub fn restore_index_and_worktree_paths_from_head(
10546    worktree_root: impl AsRef<Path>,
10547    git_dir: impl AsRef<Path>,
10548    format: ObjectFormat,
10549    paths: &[PathBuf],
10550) -> Result<RestoreResult> {
10551    let worktree_root = worktree_root.as_ref();
10552    let git_dir = git_dir.as_ref();
10553    let index_path = repository_index_path(git_dir);
10554    let index = if index_path.exists() {
10555        Index::parse(&fs::read(&index_path)?, format)?
10556    } else {
10557        Index {
10558            version: 2,
10559            entries: Vec::new(),
10560            extensions: Vec::new(),
10561            checksum: None,
10562        }
10563    };
10564    let db = FileObjectDatabase::from_git_dir(git_dir, format);
10565    let head_entries = head_tree_entries(git_dir, format, &db)?;
10566    restore_index_and_worktree_paths_from_entries(
10567        worktree_root,
10568        git_dir,
10569        format,
10570        &db,
10571        index,
10572        &head_entries,
10573        paths,
10574    )
10575}
10576
10577pub fn restore_index_and_worktree_paths_from_tree(
10578    worktree_root: impl AsRef<Path>,
10579    git_dir: impl AsRef<Path>,
10580    format: ObjectFormat,
10581    tree_oid: &ObjectId,
10582    paths: &[PathBuf],
10583) -> Result<RestoreResult> {
10584    let worktree_root = worktree_root.as_ref();
10585    let git_dir = git_dir.as_ref();
10586    let index_path = repository_index_path(git_dir);
10587    let index = if index_path.exists() {
10588        Index::parse(&fs::read(&index_path)?, format)?
10589    } else {
10590        Index {
10591            version: 2,
10592            entries: Vec::new(),
10593            extensions: Vec::new(),
10594            checksum: None,
10595        }
10596    };
10597    let db = FileObjectDatabase::from_git_dir(git_dir, format);
10598    let source_entries = tree_entries(&db, format, tree_oid)?;
10599    restore_index_and_worktree_paths_from_entries(
10600        worktree_root,
10601        git_dir,
10602        format,
10603        &db,
10604        index,
10605        &source_entries,
10606        paths,
10607    )
10608}
10609
10610fn restore_index_and_worktree_paths_from_entries(
10611    worktree_root: &Path,
10612    git_dir: &Path,
10613    format: ObjectFormat,
10614    db: &FileObjectDatabase,
10615    index: Index,
10616    source_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
10617    paths: &[PathBuf],
10618) -> Result<RestoreResult> {
10619    let mut index_entries = index
10620        .entries
10621        .into_iter()
10622        .map(|entry| (entry.path.as_bytes().to_vec(), entry))
10623        .collect::<BTreeMap<_, _>>();
10624    let mut restored = BTreeSet::new();
10625    for path in paths {
10626        let absolute = if path.is_absolute() {
10627            path.clone()
10628        } else {
10629            worktree_root.join(path)
10630        };
10631        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
10632            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
10633        })?;
10634        let git_path = git_path_bytes(relative)?;
10635        let recursive = path == Path::new(".")
10636            || path.to_string_lossy().ends_with('/')
10637            || absolute.is_dir()
10638            || index_entries
10639                .keys()
10640                .any(|entry| index_entry_is_under_path(entry, &git_path))
10641            || source_entries
10642                .keys()
10643                .any(|entry| index_entry_is_under_path(entry, &git_path));
10644        let mut matched_paths = BTreeSet::new();
10645        for path in index_entries.keys().chain(source_entries.keys()) {
10646            if path.as_slice() == git_path.as_slice()
10647                || (recursive && index_entry_is_under_path(path, &git_path))
10648            {
10649                matched_paths.insert(path.clone());
10650            }
10651        }
10652        if matched_paths.is_empty() {
10653            eprintln!(
10654                "error: pathspec '{}' did not match any file(s) known to git",
10655                path.display()
10656            );
10657            return Err(GitError::Exit(1));
10658        }
10659        for path in matched_paths {
10660            if let Some(entry) = source_entries.get(&path) {
10661                index_entries.insert(
10662                    path.clone(),
10663                    restore_head_entry_to_worktree_and_index(worktree_root, db, &path, entry)?,
10664                );
10665            } else {
10666                index_entries.remove(&path);
10667                remove_worktree_file(worktree_root, &path)?;
10668            }
10669            restored.insert(path);
10670        }
10671    }
10672    let mut entries = index_entries.into_values().collect::<Vec<_>>();
10673    entries.sort_by(|left, right| left.path.cmp(&right.path));
10674    fs::write(
10675        repository_index_path(git_dir),
10676        Index {
10677            version: 2,
10678            entries,
10679            extensions: Vec::new(),
10680            checksum: None,
10681        }
10682        .write(format)?,
10683    )?;
10684    Ok(RestoreResult {
10685        restored: restored.len(),
10686    })
10687}
10688
10689pub fn reset_index_and_worktree_to_commit(
10690    worktree_root: impl AsRef<Path>,
10691    git_dir: impl AsRef<Path>,
10692    format: ObjectFormat,
10693    commit_oid: &ObjectId,
10694) -> Result<RestoreResult> {
10695    let worktree_root = worktree_root.as_ref();
10696    let git_dir = git_dir.as_ref();
10697    let db = FileObjectDatabase::from_git_dir(git_dir, format);
10698    let commit = read_commit(&db, format, commit_oid)?;
10699    let mut target_entries = BTreeMap::new();
10700    collect_tree_entries(&db, format, &commit.tree, &mut target_entries)?;
10701    let config = sley_config::read_repo_config(git_dir, None).unwrap_or_default();
10702    let attributes = build_tree_attribute_matcher(worktree_root, &db, format, &commit.tree)?;
10703
10704    // git's `reset --hard` runs a one-way merge through unpack-trees: EVERY path
10705    // present in the current index (at ANY stage) that the target tree does not
10706    // track is removed from the worktree. A conflicted D/F merge can leave a
10707    // path like `dir~HEAD` at stage 2 only — those entries are dropped by the
10708    // stage-0-only `read_index_entries`, so iterate the RAW index paths here
10709    // (deduped across stages) to match git and delete the moved-aside file.
10710    for path in current_index_paths(git_dir, format, &db)? {
10711        if !target_entries.contains_key(&path) {
10712            remove_worktree_file(worktree_root, &path)?;
10713        }
10714    }
10715
10716    let mut index_entries = Vec::new();
10717    for (path, entry) in &target_entries {
10718        index_entries.push(materialize_tree_entry_filtered(
10719            &db,
10720            worktree_root,
10721            path,
10722            entry,
10723            &config,
10724            &attributes,
10725        )?);
10726    }
10727    index_entries.sort_by(|left, right| left.path.cmp(&right.path));
10728    fs::write(
10729        repository_index_path(git_dir),
10730        Index {
10731            version: 2,
10732            entries: index_entries,
10733            extensions: Vec::new(),
10734            checksum: None,
10735        }
10736        .write(format)?,
10737    )?;
10738    Ok(RestoreResult {
10739        restored: target_entries.len(),
10740    })
10741}
10742
10743/// All paths the current index references, deduped across stages (a conflicted
10744/// path appears at stages 1–3; we want it listed once). Unlike
10745/// `read_index_entries`, which filters to stage 0, this keeps conflicted paths
10746/// so a `reset --hard` worktree sweep removes moved-aside files (`dir~HEAD`) the
10747/// target tree doesn't track — matching git's one-way unpack-trees behavior.
10748fn current_index_paths(
10749    git_dir: &Path,
10750    format: ObjectFormat,
10751    db: &FileObjectDatabase,
10752) -> Result<BTreeSet<Vec<u8>>> {
10753    let (index, _stat_cache, _head_matches) = read_index_with_stat_cache(git_dir, format, db)?;
10754    Ok(index
10755        .entries
10756        .into_iter()
10757        .map(|entry| entry.path.into_bytes())
10758        .collect())
10759}
10760
10761/// Write one target tree entry into the worktree and return its index entry —
10762/// the shared materialization step for every checkout/reset worktree rebuild.
10763///
10764/// Gitlinks (mode 160000) never touch the object database: their oid names a
10765/// commit in the *submodule's* repository, not an object here. Upstream
10766/// (entry.c `write_entry` S_IFGITLINK) just mkdirs the path — an
10767/// already-populated submodule is left untouched (EEXIST is success) — and
10768/// records the oid in the index with a zeroed stat so status re-evaluates the
10769/// gitlink against the embedded repository's HEAD.
10770fn materialize_tree_entry(
10771    db: &FileObjectDatabase,
10772    worktree_root: &Path,
10773    path: &[u8],
10774    entry: &TrackedEntry,
10775) -> Result<IndexEntry> {
10776    if sley_index::is_gitlink(entry.mode) {
10777        let dir_path = worktree_path(worktree_root, path)?;
10778        fs::create_dir_all(&dir_path)?;
10779        return Ok(IndexEntry {
10780            ctime_seconds: 0,
10781            ctime_nanoseconds: 0,
10782            mtime_seconds: 0,
10783            mtime_nanoseconds: 0,
10784            dev: 0,
10785            ino: 0,
10786            mode: entry.mode,
10787            uid: 0,
10788            gid: 0,
10789            size: 0,
10790            oid: entry.oid,
10791            flags: path.len().min(0x0fff) as u16,
10792            flags_extended: 0,
10793            path: BString::from(path),
10794        });
10795    }
10796    let file_path = write_worktree_blob_entry(db, worktree_root, path, entry)?;
10797    let metadata = fs::symlink_metadata(&file_path)?;
10798    let mut index_entry = index_entry_from_metadata(path.to_vec(), entry.oid, &metadata);
10799    index_entry.mode = entry.mode;
10800    Ok(index_entry)
10801}
10802
10803fn materialize_tree_entry_filtered(
10804    db: &FileObjectDatabase,
10805    worktree_root: &Path,
10806    path: &[u8],
10807    entry: &TrackedEntry,
10808    config: &GitConfig,
10809    attributes: &AttributeMatcher,
10810) -> Result<IndexEntry> {
10811    if sley_index::is_gitlink(entry.mode) || (entry.mode & 0o170000) == 0o120000 {
10812        return materialize_tree_entry(db, worktree_root, path, entry);
10813    }
10814    let object = read_expected_object(db, &entry.oid, ObjectType::Blob)?;
10815    let checks = attributes.attributes_for_path(path, &filter_attribute_names(), false);
10816    let body = apply_smudge_filter_with_attributes_cow(config, &checks, path, &object.body)?;
10817    let file_path = worktree_path(worktree_root, path)?;
10818    prepare_blob_parent_dirs(worktree_root, &file_path)?;
10819    remove_existing_worktree_path(&file_path)?;
10820    fs::write(&file_path, &body)?;
10821    set_worktree_file_mode(&file_path, entry.mode)?;
10822    let metadata = fs::symlink_metadata(&file_path)?;
10823    let mut index_entry = index_entry_from_metadata(path.to_vec(), entry.oid, &metadata);
10824    index_entry.mode = entry.mode;
10825    Ok(index_entry)
10826}
10827
10828/// Materialize a blob (or symlink) tree entry into the worktree at `path`,
10829/// returning the absolute path written. Shared by every checkout/reset worktree
10830/// rebuild so the type-change handling is identical everywhere.
10831///
10832/// Mirrors git's entry.c `write_entry`: it unlinks whatever currently occupies
10833/// the path before creating the new object, so a type transition (regular file ⇄
10834/// symlink, or a stale symlink/directory in the way) is overwritten rather than
10835/// left in place or failing with EEXIST. A plain `fs::write` follows an existing
10836/// symlink and would write *through* it (leaving the link), so the unlink is
10837/// load-bearing for the symlink-stash / reset-hard type-change cases.
10838fn write_worktree_blob_entry(
10839    db: &FileObjectDatabase,
10840    worktree_root: &Path,
10841    path: &[u8],
10842    entry: &TrackedEntry,
10843) -> Result<PathBuf> {
10844    let object = read_expected_object(db, &entry.oid, ObjectType::Blob)?;
10845    let file_path = worktree_path(worktree_root, path)?;
10846    // Clear any non-directory blocking an ancestor component (prior tree had
10847    // `dir` as a FILE, target wants `dir/<child>`), creating the parent dirs.
10848    prepare_blob_parent_dirs(worktree_root, &file_path)?;
10849    // Clear whatever sits at the leaf — including a directory where the target
10850    // wants a plain file (reverse D/F) — before writing.
10851    remove_existing_worktree_path(&file_path)?;
10852    if (entry.mode & 0o170000) == 0o120000 {
10853        // Symlink entry (mode 120000): the blob body is the link target.
10854        #[cfg(unix)]
10855        {
10856            use std::os::unix::ffi::OsStringExt;
10857            let target =
10858                std::path::PathBuf::from(std::ffi::OsString::from_vec(object.body.clone()));
10859            std::os::unix::fs::symlink(&target, &file_path)?;
10860        }
10861        #[cfg(not(unix))]
10862        fs::write(&file_path, &object.body)?;
10863    } else {
10864        fs::write(&file_path, &object.body)?;
10865        set_worktree_file_mode(&file_path, entry.mode)?;
10866    }
10867    Ok(file_path)
10868}
10869
10870/// Create the ancestor directories of a worktree blob path, removing any
10871/// regular file or symlink that occupies an ancestor *component* first.
10872///
10873/// Mirrors git's `entry.c` `create_directories`: it walks each path component
10874/// between `worktree_root` and the leaf and, for each, if a non-directory (a
10875/// regular file or symlink left by a prior tree where `dir` was a FILE) blocks
10876/// it, unlinks the blocker before `mkdir`. A plain `fs::create_dir_all` fails
10877/// with `ENOTDIR`/`EEXIST` on such a D/F transition; this is the directory-side
10878/// of git's force-checkout D/F clearing.
10879///
10880/// `worktree_root` itself is never touched. Only components strictly between the
10881/// root and the leaf are cleared, matching `create_directories`' `base_dir_len`
10882/// boundary.
10883fn prepare_blob_parent_dirs(worktree_root: &Path, file_path: &Path) -> Result<()> {
10884    let parent = match file_path.parent() {
10885        Some(parent) => parent,
10886        None => return Ok(()),
10887    };
10888    // Fast path: parent already a directory (the overwhelmingly common case).
10889    if parent.is_dir() {
10890        return Ok(());
10891    }
10892    // Collect the ancestor chain from worktree_root (exclusive) down to `parent`
10893    // (inclusive). We can't `create_dir_all` blindly because a non-directory may
10894    // sit on one of these components; walk them and clear blockers as git does.
10895    let mut components: Vec<&Path> = Vec::new();
10896    let mut cursor = Some(parent);
10897    while let Some(dir) = cursor {
10898        if dir == worktree_root {
10899            break;
10900        }
10901        components.push(dir);
10902        cursor = dir.parent();
10903        if cursor.is_none() {
10904            break;
10905        }
10906    }
10907    // Walk root → leaf so each parent exists before its child.
10908    for dir in components.iter().rev() {
10909        match fs::symlink_metadata(dir) {
10910            Ok(metadata) if metadata.is_dir() => {}
10911            Ok(_) => {
10912                // A regular file or symlink occupies this component (the prior
10913                // tree had `dir` as a FILE). Unlink it, then create the dir.
10914                fs::remove_file(dir)?;
10915                fs::create_dir(dir)?;
10916            }
10917            Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
10918                fs::create_dir(dir)?;
10919            }
10920            Err(err) => return Err(err.into()),
10921        }
10922    }
10923    Ok(())
10924}
10925
10926/// Remove whatever currently occupies a worktree path before writing a new
10927/// object there — a symlink (even a dangling one, which `Path::exists` misses),
10928/// a regular file, or a directory subtree. Uses `symlink_metadata` (lstat) so a
10929/// symlink is removed as the link, never followed.
10930fn remove_existing_worktree_path(file_path: &Path) -> Result<()> {
10931    let metadata = match fs::symlink_metadata(file_path) {
10932        Ok(metadata) => metadata,
10933        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(()),
10934        Err(err) => return Err(err.into()),
10935    };
10936    if metadata.is_dir() {
10937        // A directory in the way of a file (D/F transition) or a populated
10938        // gitlink: remove the subtree so the file can be created.
10939        match fs::remove_dir_all(file_path) {
10940            Ok(()) => {}
10941            Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
10942            Err(err) => return Err(err.into()),
10943        }
10944    } else {
10945        fs::remove_file(file_path)?;
10946    }
10947    Ok(())
10948}
10949
10950/// chmod a freshly-materialized worktree blob to match its tree/index entry mode.
10951///
10952/// `fs::write` truncates an existing file *in place*, preserving its prior
10953/// permission bits. For a mode-only diff (identical oid, 100644 vs 100755) that
10954/// leaves the wrong exec bit on disk — which is exactly the `reset --hard` /
10955/// checkout bug this guards against. git's checkout path unlinks+recreates the
10956/// file precisely to "get the new one with the right permissions" (entry.c
10957/// `write_entry`); we instead chmod the just-written file.
10958///
10959/// Mirrors the observable result of git's `create_file` (entry.c):
10960/// `(mode & 0100) ? 0777 : 0666` masked by the standard umask (0022), i.e. 0755
10961/// for an executable entry and 0644 otherwise. Only regular-file entries (100644
10962/// / 100755) are chmod'd; gitlinks and symlinks have no meaningful exec bit.
10963///
10964/// We set the perms directly (rather than relying on a fresh `open(2)` to apply
10965/// the umask) because `fs::write` truncates an existing file in place, leaving its
10966/// old permission bits — the very thing that breaks a mode-only checkout/reset.
10967/// Matching git's default-umask output keeps the worktree byte-for-byte aligned
10968/// with the oracle, which is what the parity suite asserts.
10969#[cfg(unix)]
10970fn set_worktree_file_mode(file_path: &Path, entry_mode: u32) -> Result<()> {
10971    use std::os::unix::fs::PermissionsExt;
10972    let perms = match entry_mode {
10973        0o100755 => 0o755,
10974        0o100644 => 0o644,
10975        _ => return Ok(()),
10976    };
10977    fs::set_permissions(file_path, fs::Permissions::from_mode(perms))?;
10978    Ok(())
10979}
10980
10981#[cfg(not(unix))]
10982fn set_worktree_file_mode(_file_path: &Path, _entry_mode: u32) -> Result<()> {
10983    Ok(())
10984}
10985
10986/// Materialize a tree object into the index and worktree.
10987pub fn checkout_tree_to_index_and_worktree(
10988    worktree_root: impl AsRef<Path>,
10989    git_dir: impl AsRef<Path>,
10990    format: ObjectFormat,
10991    tree_oid: &ObjectId,
10992) -> Result<RestoreResult> {
10993    let worktree_root = worktree_root.as_ref();
10994    let git_dir = git_dir.as_ref();
10995    let db = FileObjectDatabase::from_git_dir(git_dir, format);
10996    let mut target_entries = BTreeMap::new();
10997    collect_tree_entries(&db, format, tree_oid, &mut target_entries)?;
10998
10999    for path in read_index_entries(git_dir, format)?.keys() {
11000        if !target_entries.contains_key(path) {
11001            remove_worktree_file(worktree_root, path)?;
11002        }
11003    }
11004
11005    let mut index_entries = Vec::new();
11006    for (path, entry) in &target_entries {
11007        index_entries.push(materialize_tree_entry(&db, worktree_root, path, entry)?);
11008    }
11009    index_entries.sort_by(|left, right| left.path.cmp(&right.path));
11010    fs::write(
11011        repository_index_path(git_dir),
11012        Index {
11013            version: 2,
11014            entries: index_entries,
11015            extensions: Vec::new(),
11016            checksum: None,
11017        }
11018        .write(format)?,
11019    )?;
11020    Ok(RestoreResult {
11021        restored: target_entries.len(),
11022    })
11023}
11024
11025pub fn reset_index_to_commit(
11026    worktree_root: impl AsRef<Path>,
11027    git_dir: impl AsRef<Path>,
11028    format: ObjectFormat,
11029    commit_oid: &ObjectId,
11030) -> Result<RestoreResult> {
11031    let worktree_root = worktree_root.as_ref();
11032    let git_dir = git_dir.as_ref();
11033    let db = FileObjectDatabase::from_git_dir(git_dir, format);
11034    let commit = read_commit(&db, format, commit_oid)?;
11035    let mut target_entries = BTreeMap::new();
11036    collect_tree_entries(&db, format, &commit.tree, &mut target_entries)?;
11037    // git's `reset --mixed` preserves the skip-worktree bit on entries that survive
11038    // the reset (t7102 "--mixed preserves skip-worktree"): carry it forward from the
11039    // pre-reset index keyed by path, so reconstructed entries keep CE_SKIP_WORKTREE.
11040    let index_path = repository_index_path(git_dir);
11041    let prior_skip_worktree: BTreeSet<Vec<u8>> = match fs::read(&index_path) {
11042        Ok(bytes) => Index::parse(&bytes, format)?
11043            .entries
11044            .iter()
11045            .filter(|entry| entry.is_skip_worktree())
11046            .map(|entry| entry.path.as_bytes().to_vec())
11047            .collect(),
11048        Err(err) if err.kind() == std::io::ErrorKind::NotFound => BTreeSet::new(),
11049        Err(err) => return Err(err.into()),
11050    };
11051    let mut index_entries = Vec::new();
11052    for (path, entry) in &target_entries {
11053        let mut restored = restored_head_index_entry(worktree_root, &db, path, entry)?;
11054        if prior_skip_worktree.contains(path) {
11055            restored.set_skip_worktree(true);
11056        }
11057        index_entries.push(restored);
11058    }
11059    index_entries.sort_by(|left, right| left.path.cmp(&right.path));
11060    let mut index = Index {
11061        version: 2,
11062        entries: index_entries,
11063        extensions: Vec::new(),
11064        checksum: None,
11065    };
11066    index.upgrade_version_for_flags();
11067    fs::write(&index_path, index.write(format)?)?;
11068    Ok(RestoreResult {
11069        restored: target_entries.len(),
11070    })
11071}
11072
11073/// Build a fresh in-memory index that mirrors the tree `tree_oid`, the way
11074/// `git read-tree <tree>` does: every blob, symlink, and gitlink leaf (found by
11075/// recursing subtrees) becomes a stage-0 entry carrying the tree mode and oid,
11076/// with a fully zeroed stat (so nothing is treated as stat-clean) and size 0.
11077/// Entries are sorted by path; the index is version 2 with no extensions.
11078///
11079/// This does not touch the worktree or write anything to disk — serialize the
11080/// result with [`Index::write`] (and persist it) when you want to replace
11081/// `.git/index`.
11082pub fn index_from_tree(
11083    db: &FileObjectDatabase,
11084    format: ObjectFormat,
11085    tree_oid: &ObjectId,
11086) -> Result<Index> {
11087    let mut entries: Vec<IndexEntry> = Vec::new();
11088    if *tree_oid != ObjectId::empty_tree(format) {
11089        let mut tree_entries = BTreeMap::new();
11090        collect_tree_entries(db, format, tree_oid, &mut tree_entries)?;
11091        entries.reserve(tree_entries.len());
11092        for (path, entry) in tree_entries {
11093            let name_len = (path.len().min(0x0fff)) as u16;
11094            entries.push(IndexEntry {
11095                ctime_seconds: 0,
11096                ctime_nanoseconds: 0,
11097                mtime_seconds: 0,
11098                mtime_nanoseconds: 0,
11099                dev: 0,
11100                ino: 0,
11101                mode: entry.mode,
11102                uid: 0,
11103                gid: 0,
11104                size: 0,
11105                oid: entry.oid,
11106                flags: name_len,
11107                flags_extended: 0,
11108                path: path.into(),
11109            });
11110        }
11111    }
11112    // git orders index entries by path bytes; BTreeMap already yields that, but
11113    // sort explicitly so the contract holds regardless of how entries arrive.
11114    entries.sort_by(|left, right| left.path.cmp(&right.path));
11115    Ok(Index {
11116        version: 2,
11117        entries,
11118        extensions: Vec::new(),
11119        checksum: None,
11120    })
11121}
11122
11123/// Enforces a [`SparseCheckout`] against the current index and worktree.
11124///
11125/// Every stage-0 index entry is classified with the sparse patterns (see
11126/// [`SparseCheckoutMode`] for the matching semantics):
11127///
11128/// * **In cone**: the skip-worktree bit is cleared and, if the worktree file is
11129///   missing, it is re-materialized from the entry's blob in the object
11130///   database. Existing worktree files are left untouched so local content is
11131///   preserved.
11132/// * **Out of cone**: the skip-worktree bit is set and any existing worktree
11133///   file is removed (empty parent directories are pruned).
11134///
11135/// Returns `true` when `path` is inside the sparse-checkout described by
11136/// `sparse` under the given matching `mode`. This is the engine behind
11137/// `git sparse-checkout check-rules`: a path is "in" the sparse-checkout when
11138/// the compiled matcher would keep its worktree file. Cone and full (gitignore)
11139/// grammars are both handled, exactly as the apply engine interprets them, so
11140/// `check-rules` and `set`/`reapply` agree by construction.
11141pub fn path_in_sparse_checkout(path: &[u8], sparse: &SparseCheckout, mode: SparseCheckoutMode) -> bool {
11142    SparseMatcher::new(sparse, mode).includes_file(path)
11143}
11144
11145/// Conflicted entries (stage != 0) are never given the skip-worktree bit and
11146/// are left alone, matching upstream Git. The index is rewritten in place.
11147pub fn apply_sparse_checkout(
11148    worktree_root: impl AsRef<Path>,
11149    git_dir: impl AsRef<Path>,
11150    format: ObjectFormat,
11151    sparse: &SparseCheckout,
11152) -> Result<ApplySparseResult> {
11153    apply_sparse_checkout_with_mode(
11154        worktree_root,
11155        git_dir,
11156        format,
11157        sparse,
11158        SparseCheckoutMode::Auto,
11159    )
11160}
11161
11162/// Like [`apply_sparse_checkout`] but lets the caller force the pattern
11163/// interpretation instead of auto-detecting it.
11164pub fn apply_sparse_checkout_with_mode(
11165    worktree_root: impl AsRef<Path>,
11166    git_dir: impl AsRef<Path>,
11167    format: ObjectFormat,
11168    sparse: &SparseCheckout,
11169    mode: SparseCheckoutMode,
11170) -> Result<ApplySparseResult> {
11171    let worktree_root = worktree_root.as_ref();
11172    let git_dir = git_dir.as_ref();
11173    let index_path = repository_index_path(git_dir);
11174    let mut index = if index_path.exists() {
11175        Index::parse(&fs::read(&index_path)?, format)?
11176    } else {
11177        return Ok(ApplySparseResult {
11178            materialized: Vec::new(),
11179            skipped: Vec::new(),
11180            not_up_to_date: Vec::new(),
11181        });
11182    };
11183    let matcher = SparseMatcher::new(sparse, mode);
11184    let db = FileObjectDatabase::from_git_dir(git_dir, format);
11185    // Expand any collapsed sparse-directory entries to a full index before we
11186    // reconcile per-path: the apply loop reasons about individual blob paths, so
11187    // it must never see a sparse-dir entry. (Re-collapse happens at the end when
11188    // a sparse index is requested.)
11189    if index.entries.iter().any(IndexEntry::is_sparse_dir) {
11190        expand_sparse_index(&mut index, &db, format)?;
11191    }
11192    let mut materialized = Vec::new();
11193    let mut skipped = Vec::new();
11194    let mut not_up_to_date = Vec::new();
11195    for entry in &mut index.entries {
11196        // Never touch conflicted entries.
11197        if index_entry_stage(entry) != 0 {
11198            continue;
11199        }
11200        if matcher.includes_file(entry.path.as_bytes()) {
11201            clear_skip_worktree(entry);
11202            let file_path = worktree_path(worktree_root, entry.path.as_bytes())?;
11203            if !file_path.exists() {
11204                materialize_index_entry_file(&db, worktree_root, &file_path, entry)?;
11205            }
11206            materialized.push(entry.path.as_bytes().to_vec());
11207        } else {
11208            // The path is out of cone, so its worktree file should be removed and
11209            // the entry marked skip-worktree. But git refuses to delete a file
11210            // that is *not up to date* with the index (e.g. one that reappeared in
11211            // the worktree after the path was already sparse): it leaves the file,
11212            // leaves the skip-worktree bit clear, and reports the path in its "not
11213            // up to date" warning. Mirror that to avoid silent data loss.
11214            let file_path = worktree_path(worktree_root, entry.path.as_bytes())?;
11215            match fs::symlink_metadata(&file_path) {
11216                Ok(metadata) if !worktree_entry_is_uptodate(entry, &metadata) => {
11217                    clear_skip_worktree(entry);
11218                    not_up_to_date.push(entry.path.as_bytes().to_vec());
11219                }
11220                _ => {
11221                    set_skip_worktree(entry);
11222                    remove_worktree_file(worktree_root, entry.path.as_bytes())?;
11223                    skipped.push(entry.path.as_bytes().to_vec());
11224                }
11225            }
11226        }
11227    }
11228    not_up_to_date.sort();
11229    normalize_index_version_for_extended_flags(&mut index);
11230    // When a sparse index was requested (cone mode + index.sparse), collapse the
11231    // fully-out-of-cone directories into single sparse-directory entries and
11232    // mark the index with the `sdir` extension. Otherwise ensure the index is
11233    // written full (and any prior `sdir` marker is cleared).
11234    if sparse.sparse_index {
11235        collapse_to_sparse_index(&mut index, &matcher, &db, format)?;
11236    } else {
11237        index.clear_sparse_extension()?;
11238    }
11239    fs::write(index_path, index.write(format)?)?;
11240    Ok(ApplySparseResult {
11241        materialized,
11242        skipped,
11243        not_up_to_date,
11244    })
11245}
11246
11247/// Expands every sparse-directory entry in `index` back into the full set of
11248/// blob (and nested-directory) entries it collapses, reading each directory's
11249/// tree from `db`. After this the index contains no sparse-directory entries and
11250/// carries no `sdir` marker — it is a full index that any per-path command can
11251/// operate on without sparse-index awareness.
11252///
11253/// This is the **close-the-class** primitive: a command never needs to special-
11254/// case a sparse index, because the moment it loads the index it expands to the
11255/// full form. The collapsed shape is purely an on-disk storage optimization.
11256pub fn expand_sparse_index(
11257    index: &mut Index,
11258    db: &FileObjectDatabase,
11259    format: ObjectFormat,
11260) -> Result<bool> {
11261    if !index.entries.iter().any(IndexEntry::is_sparse_dir) {
11262        // Still strip a stray `sdir` marker so the written index is recorded full.
11263        let had_marker = index.is_sparse();
11264        index.clear_sparse_extension()?;
11265        return Ok(had_marker);
11266    }
11267    let mut expanded: Vec<IndexEntry> = Vec::with_capacity(index.entries.len());
11268    for entry in std::mem::take(&mut index.entries) {
11269        if !entry.is_sparse_dir() {
11270            expanded.push(entry);
11271            continue;
11272        }
11273        // The sparse-dir path ends in `/`; its OID is the directory's tree.
11274        let dir = entry.path.as_bytes();
11275        let dir_prefix = dir; // includes the trailing slash
11276        for (rel, (mode, oid)) in sley_diff_merge::flatten_tree(db, format, &entry.oid)? {
11277            let mut full_path = dir_prefix.to_vec();
11278            full_path.extend_from_slice(&rel);
11279            let mut blob = blank_sparse_blob_entry(format, &full_path, mode, oid);
11280            // Re-collapsed entries are skip-worktree (they live outside the cone).
11281            blob.set_skip_worktree(true);
11282            expanded.push(blob);
11283        }
11284    }
11285    expanded.sort_by(|a, b| a.path.as_bytes().cmp(b.path.as_bytes()));
11286    index.entries = expanded;
11287    index.clear_sparse_extension()?;
11288    normalize_index_version_for_extended_flags(index);
11289    Ok(true)
11290}
11291
11292/// Builds a minimal index entry for an expanded sparse blob: zeroed stat fields
11293/// (the file is not in the worktree), the given mode/oid, and a fresh name
11294/// length. Stat fields are zero because a skip-worktree file has no on-disk
11295/// presence to record.
11296fn blank_sparse_blob_entry(
11297    format: ObjectFormat,
11298    path: &[u8],
11299    mode: u32,
11300    oid: ObjectId,
11301) -> IndexEntry {
11302    let _ = format;
11303    let mut entry = IndexEntry {
11304        ctime_seconds: 0,
11305        ctime_nanoseconds: 0,
11306        mtime_seconds: 0,
11307        mtime_nanoseconds: 0,
11308        dev: 0,
11309        ino: 0,
11310        mode,
11311        uid: 0,
11312        gid: 0,
11313        size: 0,
11314        oid,
11315        flags: 0,
11316        flags_extended: 0,
11317        path: path.into(),
11318    };
11319    entry.refresh_name_length();
11320    entry
11321}
11322
11323/// Collapses fully-out-of-cone directories in `index` into single sparse-
11324/// directory entries (mode `040000`, skip-worktree, the directory tree's OID),
11325/// then marks the index with the `sdir` extension. A directory is collapsible
11326/// when *every* entry under it is skip-worktree and stage 0 — i.e. nothing in it
11327/// is in the cone or conflicted. The shallowest such directory subsumes deeper
11328/// ones, matching git's `convert_to_sparse` cache-tree walk.
11329fn collapse_to_sparse_index(
11330    index: &mut Index,
11331    matcher: &SparseMatcher,
11332    db: &FileObjectDatabase,
11333    format: ObjectFormat,
11334) -> Result<()> {
11335    // First expand any pre-existing sparse-dir entries so the collapse decision
11336    // sees a uniform full index (idempotent re-collapse).
11337    if index.entries.iter().any(IndexEntry::is_sparse_dir) {
11338        expand_sparse_index(index, db, format)?;
11339    }
11340
11341    // Any unmerged (stage != 0) entry forbids a sparse index entirely (the cache
11342    // tree cannot be built), so stay full — matching git's bail.
11343    if index.entries.iter().any(|e| index_entry_stage(e) != 0) {
11344        index.clear_sparse_extension()?;
11345        return Ok(());
11346    }
11347
11348    index
11349        .entries
11350        .sort_by(|a, b| a.path.as_bytes().cmp(b.path.as_bytes()));
11351
11352    // Determine, for every directory prefix, whether it contains any in-cone
11353    // path. A directory with no in-cone descendant is collapsible.
11354    use std::collections::BTreeMap;
11355    let mut dir_has_in_cone: BTreeMap<Vec<u8>, bool> = BTreeMap::new();
11356    for entry in &index.entries {
11357        let path = entry.path.as_bytes();
11358        let in_cone = matcher.includes_file(path);
11359        let mut start = 0usize;
11360        while let Some(rel) = path.get(start..).and_then(|s| s.iter().position(|b| *b == b'/')) {
11361            let end = start + rel;
11362            let dir = path[..end].to_vec();
11363            let flag = dir_has_in_cone.entry(dir).or_insert(false);
11364            *flag = *flag || in_cone;
11365            start = end + 1;
11366        }
11367    }
11368
11369    // The collapsible directories are those with no in-cone descendant; keep only
11370    // the shallowest (a directory whose ancestor is also collapsible is subsumed).
11371    let collapsible: Vec<Vec<u8>> = {
11372        let all: Vec<Vec<u8>> = dir_has_in_cone
11373            .iter()
11374            .filter(|(_, has)| !**has)
11375            .map(|(dir, _)| dir.clone())
11376            .collect();
11377        all.iter()
11378            .filter(|dir| {
11379                !all.iter().any(|other| {
11380                    other != *dir
11381                        && dir
11382                            .strip_prefix(other.as_slice())
11383                            .is_some_and(|rest| rest.first() == Some(&b'/'))
11384                })
11385            })
11386            .cloned()
11387            .collect()
11388    };
11389    if collapsible.is_empty() {
11390        index.clear_sparse_extension()?;
11391        return Ok(());
11392    }
11393
11394    let mut checker = db.presence_checker();
11395    let mut new_entries: Vec<IndexEntry> = Vec::with_capacity(index.entries.len());
11396    let mut consumed: std::collections::HashSet<Vec<u8>> = std::collections::HashSet::new();
11397    for dir in &collapsible {
11398        // Gather the entries that live strictly under this directory.
11399        let mut subtree: Vec<&IndexEntry> = index
11400            .entries
11401            .iter()
11402            .filter(|e| {
11403                e.path
11404                    .as_bytes()
11405                    .strip_prefix(dir.as_slice())
11406                    .is_some_and(|rest| rest.first() == Some(&b'/'))
11407            })
11408            .collect();
11409        if subtree.is_empty() {
11410            continue;
11411        }
11412        subtree.sort_by(|a, b| a.path.as_bytes().cmp(b.path.as_bytes()));
11413        // Build the subtree object and capture its OID.
11414        let mut prefix = dir.clone();
11415        prefix.push(b'/');
11416        let tree_entries: Vec<WriteTreeEntry<'_>> = subtree
11417            .iter()
11418            .map(|e| WriteTreeEntry {
11419                path: e.path.as_bytes(),
11420                mode: e.mode,
11421                oid: e.oid.clone(),
11422            })
11423            .collect();
11424        let tree_oid =
11425            write_tree_entries_stream(&tree_entries, &prefix, None, db, &mut checker, false)?;
11426        // Mark every consumed path so the second pass drops them.
11427        for e in &subtree {
11428            consumed.insert(e.path.as_bytes().to_vec());
11429        }
11430        // The sparse-dir entry's name is the directory path WITH a trailing slash.
11431        let mut sparse_path = dir.clone();
11432        sparse_path.push(b'/');
11433        let mut sparse_entry =
11434            blank_sparse_blob_entry(format, &sparse_path, SPARSE_DIR_MODE, tree_oid);
11435        sparse_entry.set_skip_worktree(true);
11436        new_entries.push(sparse_entry);
11437    }
11438    // Carry forward every entry that was not collapsed.
11439    for entry in &index.entries {
11440        if consumed.contains(entry.path.as_bytes()) {
11441            continue;
11442        }
11443        new_entries.push(entry.clone());
11444    }
11445    new_entries.sort_by(|a, b| a.path.as_bytes().cmp(b.path.as_bytes()));
11446    index.entries = new_entries;
11447    index.set_sparse_extension();
11448    normalize_index_version_for_extended_flags(index);
11449    Ok(())
11450}
11451
11452/// Whether the worktree file described by `metadata` is up to date with `entry`'s
11453/// cached index stat, using the size + mtime heuristic at the core of git's
11454/// `ie_match_stat`. A freshly-checked-out (clean) file matches; a file that was
11455/// deleted and later recreated — as happens when an out-of-cone path reappears in
11456/// the worktree — gets a fresh mtime and so reads as modified, which is exactly
11457/// the state git declines to overwrite during a sparse update.
11458fn worktree_entry_is_uptodate(entry: &IndexEntry, metadata: &fs::Metadata) -> bool {
11459    if u64::from(entry.size) != metadata.len() {
11460        return false;
11461    }
11462    let Some((mtime_seconds, mtime_nanoseconds)) = file_mtime_parts(metadata) else {
11463        // Without a usable mtime we cannot prove the file is clean; treat it as
11464        // not up to date so a present file is never silently discarded.
11465        return false;
11466    };
11467    u64::from(entry.mtime_seconds) == mtime_seconds
11468        && u64::from(entry.mtime_nanoseconds) == mtime_nanoseconds
11469}
11470
11471fn worktree_entry_ref_is_uptodate(entry: &IndexEntryRef<'_>, metadata: &fs::Metadata) -> bool {
11472    if u64::from(entry.size) != metadata.len() {
11473        return false;
11474    }
11475    let Some((mtime_seconds, mtime_nanoseconds)) = file_mtime_parts(metadata) else {
11476        return false;
11477    };
11478    u64::from(entry.mtime_seconds) == mtime_seconds
11479        && u64::from(entry.mtime_nanoseconds) == mtime_nanoseconds
11480}
11481
11482/// The file's modification time split into whole seconds and the sub-second
11483/// nanosecond remainder, matching how git stores `mtime` in the index.
11484fn file_mtime_parts(metadata: &fs::Metadata) -> Option<(u64, u64)> {
11485    let modified = metadata.modified().ok()?;
11486    let duration = modified.duration_since(UNIX_EPOCH).ok()?;
11487    Some((duration.as_secs(), u64::from(duration.subsec_nanos())))
11488}
11489
11490/// Write a git metadata file through a sibling `.lock` file and atomic rename.
11491///
11492/// This helper is intended for small repository/worktree metadata files such as
11493/// `HEAD`, `config.worktree`, or state files under `.git/`. It deliberately does
11494/// not try to replace object or pack writers, which have their own durability
11495/// and naming rules.
11496pub fn write_metadata_file_atomic(
11497    path: impl AsRef<Path>,
11498    bytes: &[u8],
11499    options: AtomicMetadataWriteOptions,
11500) -> Result<AtomicMetadataWriteResult> {
11501    let path = path.as_ref();
11502    let parent = path.parent().ok_or_else(|| {
11503        GitError::InvalidPath(format!("metadata path has no parent: {}", path.display()))
11504    })?;
11505    if !parent.as_os_str().is_empty() {
11506        fs::create_dir_all(parent)?;
11507    }
11508    let lock_path = metadata_lock_path(path)?;
11509    let mut lock = match fs::OpenOptions::new()
11510        .write(true)
11511        .create_new(true)
11512        .open(&lock_path)
11513    {
11514        Ok(lock) => lock,
11515        Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => {
11516            return Err(GitError::Transaction(format!(
11517                "metadata lock already exists: {}",
11518                lock_path.display()
11519            )));
11520        }
11521        Err(err) => return Err(err.into()),
11522    };
11523    if let Err(err) = lock.write_all(bytes) {
11524        let _ = fs::remove_file(&lock_path);
11525        return Err(err.into());
11526    }
11527    if options.fsync_file
11528        && let Err(err) = lock.sync_all()
11529    {
11530        let _ = fs::remove_file(&lock_path);
11531        return Err(err.into());
11532    }
11533    drop(lock);
11534    if let Err(err) = fs::rename(&lock_path, path) {
11535        let _ = fs::remove_file(&lock_path);
11536        return Err(err.into());
11537    }
11538    if options.fsync_dir
11539        && let Ok(dir) = fs::File::open(parent)
11540    {
11541        dir.sync_all()?;
11542    }
11543    let metadata = fs::metadata(path)?;
11544    Ok(AtomicMetadataWriteResult {
11545        path: path.to_path_buf(),
11546        len: metadata.len(),
11547        mtime: file_mtime_parts(&metadata),
11548    })
11549}
11550
11551fn metadata_lock_path(path: &Path) -> Result<PathBuf> {
11552    let file_name = path.file_name().ok_or_else(|| {
11553        GitError::InvalidPath(format!("metadata path has no filename: {}", path.display()))
11554    })?;
11555    let mut lock_name = file_name.to_os_string();
11556    lock_name.push(".lock");
11557    Ok(path.with_file_name(lock_name))
11558}
11559
11560/// Checks out `target` like [`checkout_detached`], but materializes the
11561/// worktree through the supplied [`SparseCheckout`]: out-of-cone paths are not
11562/// written, get their skip-worktree bit set, and have any stale worktree file
11563/// removed. Existing public checkout entry points are unchanged; this is an
11564/// additive sparse-aware variant.
11565///
11566/// The pattern interpretation is auto-detected ([`SparseCheckoutMode::Auto`]);
11567/// to reconcile an existing checkout under an explicit mode use
11568/// [`apply_sparse_checkout_with_mode`].
11569pub fn checkout_detached_sparse(
11570    worktree_root: impl AsRef<Path>,
11571    git_dir: impl AsRef<Path>,
11572    format: ObjectFormat,
11573    target: &ObjectId,
11574    committer: Vec<u8>,
11575    message: Vec<u8>,
11576    sparse: &SparseCheckout,
11577) -> Result<CheckoutResult> {
11578    let worktree_root = worktree_root.as_ref();
11579    let git_dir = git_dir.as_ref();
11580    let files = checkout_commit_to_index_and_worktree_sparse(
11581        worktree_root,
11582        git_dir,
11583        format,
11584        target,
11585        Some((sparse, SparseCheckoutMode::Auto)),
11586    )?;
11587    let refs = FileRefStore::new(git_dir, format);
11588    let zero = ObjectId::null(format);
11589    let mut tx = refs.transaction();
11590    tx.update(RefUpdate {
11591        name: "HEAD".into(),
11592        expected: None,
11593        new: RefTarget::Direct(*target),
11594        reflog: Some(ReflogEntry {
11595            old_oid: zero,
11596            new_oid: *target,
11597            committer,
11598            message,
11599        }),
11600    });
11601    tx.commit()?;
11602    Ok(CheckoutResult {
11603        branch: target.to_string(),
11604        oid: *target,
11605        files,
11606    })
11607}
11608
11609fn materialize_index_entry_file(
11610    db: &FileObjectDatabase,
11611    worktree_root: &Path,
11612    file_path: &Path,
11613    entry: &IndexEntry,
11614) -> Result<()> {
11615    // A gitlink (mode 160000) has no blob in this object store and materializes
11616    // as a directory (git's `write_entry` S_IFGITLINK arm: mkdir, never read an
11617    // object). Single gitlink rule via `sley_index::is_gitlink`; without it a
11618    // sparse re-materialization of a submodule path would fail with "not found:
11619    // blob object <commit-oid>".
11620    if sley_index::is_gitlink(entry.mode) {
11621        prepare_blob_parent_dirs(worktree_root, file_path)?;
11622        fs::create_dir_all(file_path)?;
11623        return Ok(());
11624    }
11625    let object = read_expected_object(db, &entry.oid, ObjectType::Blob)?;
11626    prepare_blob_parent_dirs(worktree_root, file_path)?;
11627    remove_existing_worktree_path(file_path)?;
11628    fs::write(file_path, &object.body)?;
11629    set_worktree_file_mode(file_path, entry.mode)?;
11630    Ok(())
11631}
11632
11633fn set_skip_worktree(entry: &mut IndexEntry) {
11634    entry.flags |= INDEX_FLAG_EXTENDED;
11635    entry.flags_extended |= INDEX_EXTENDED_FLAG_SKIP_WORKTREE;
11636}
11637
11638fn clear_skip_worktree(entry: &mut IndexEntry) {
11639    entry.flags_extended &= !INDEX_EXTENDED_FLAG_SKIP_WORKTREE;
11640    if entry.flags_extended == 0 {
11641        entry.flags &= !INDEX_FLAG_EXTENDED;
11642    }
11643}
11644
11645pub fn restore_worktree_paths_from_head(
11646    worktree_root: impl AsRef<Path>,
11647    git_dir: impl AsRef<Path>,
11648    format: ObjectFormat,
11649    paths: &[PathBuf],
11650) -> Result<RestoreResult> {
11651    let worktree_root = worktree_root.as_ref();
11652    let git_dir = git_dir.as_ref();
11653    let index_path = repository_index_path(git_dir);
11654    let index = if index_path.exists() {
11655        Index::parse(&fs::read(&index_path)?, format)?
11656    } else {
11657        Index {
11658            version: 2,
11659            entries: Vec::new(),
11660            extensions: Vec::new(),
11661            checksum: None,
11662        }
11663    };
11664    let db = FileObjectDatabase::from_git_dir(git_dir, format);
11665    let head_entries = head_tree_entries(git_dir, format, &db)?;
11666    restore_worktree_paths_from_entries(worktree_root, &db, index, &head_entries, paths)
11667}
11668
11669pub fn restore_worktree_paths_from_tree(
11670    worktree_root: impl AsRef<Path>,
11671    git_dir: impl AsRef<Path>,
11672    format: ObjectFormat,
11673    tree_oid: &ObjectId,
11674    paths: &[PathBuf],
11675) -> Result<RestoreResult> {
11676    let worktree_root = worktree_root.as_ref();
11677    let git_dir = git_dir.as_ref();
11678    let index_path = repository_index_path(git_dir);
11679    let index = if index_path.exists() {
11680        Index::parse(&fs::read(&index_path)?, format)?
11681    } else {
11682        Index {
11683            version: 2,
11684            entries: Vec::new(),
11685            extensions: Vec::new(),
11686            checksum: None,
11687        }
11688    };
11689    let db = FileObjectDatabase::from_git_dir(git_dir, format);
11690    let source_entries = tree_entries(&db, format, tree_oid)?;
11691    restore_worktree_paths_from_entries(worktree_root, &db, index, &source_entries, paths)
11692}
11693
11694fn restore_worktree_paths_from_entries(
11695    worktree_root: &Path,
11696    db: &FileObjectDatabase,
11697    index: Index,
11698    source_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
11699    paths: &[PathBuf],
11700) -> Result<RestoreResult> {
11701    let index_entries = index
11702        .entries
11703        .into_iter()
11704        .map(|entry| entry.path.into_bytes())
11705        .collect::<BTreeSet<_>>();
11706    let mut restored = BTreeSet::new();
11707    for path in paths {
11708        let absolute = if path.is_absolute() {
11709            path.clone()
11710        } else {
11711            worktree_root.join(path)
11712        };
11713        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
11714            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
11715        })?;
11716        let git_path = git_path_bytes(relative)?;
11717        let recursive = path == Path::new(".")
11718            || path.to_string_lossy().ends_with('/')
11719            || absolute.is_dir()
11720            || index_entries
11721                .iter()
11722                .any(|entry| index_entry_is_under_path(entry, &git_path))
11723            || source_entries
11724                .keys()
11725                .any(|entry| index_entry_is_under_path(entry, &git_path));
11726        let mut matched_paths = BTreeSet::new();
11727        for path in index_entries.iter().chain(source_entries.keys()) {
11728            if path.as_slice() == git_path.as_slice()
11729                || (recursive && index_entry_is_under_path(path, &git_path))
11730            {
11731                matched_paths.insert(path.clone());
11732            }
11733        }
11734        if matched_paths.is_empty() {
11735            eprintln!(
11736                "error: pathspec '{}' did not match any file(s) known to git",
11737                path.display()
11738            );
11739            return Err(GitError::Exit(1));
11740        }
11741        for path in matched_paths {
11742            if let Some(entry) = source_entries.get(&path) {
11743                restore_head_entry_to_worktree(worktree_root, db, &path, entry)?;
11744            } else {
11745                remove_worktree_file(worktree_root, &path)?;
11746            }
11747            restored.insert(path);
11748        }
11749    }
11750    Ok(RestoreResult {
11751        restored: restored.len(),
11752    })
11753}
11754
11755pub fn remove_index_and_worktree_paths(
11756    worktree_root: impl AsRef<Path>,
11757    git_dir: impl AsRef<Path>,
11758    format: ObjectFormat,
11759    paths: &[PathBuf],
11760    options: RemoveOptions,
11761    config_parameters_env: Option<&str>,
11762) -> Result<RemoveResult> {
11763    let worktree_root = worktree_root.as_ref();
11764    let git_dir = git_dir.as_ref();
11765    let index_path = repository_index_path(git_dir);
11766    let index = if index_path.exists() {
11767        Index::parse(&fs::read(&index_path)?, format)?
11768    } else {
11769        Index {
11770            version: 2,
11771            entries: Vec::new(),
11772            extensions: Vec::new(),
11773            checksum: None,
11774        }
11775    };
11776    let db = FileObjectDatabase::from_git_dir(git_dir, format);
11777    let head_entries = head_tree_entries(git_dir, format, &db)?;
11778    // Stat cache for the local-modification check (git's `ie_match_stat`):
11779    // proves a path unchanged from the cached stat without reading its blob, so
11780    // a `git rm --cached` of an untouched path whose blob was removed still
11781    // succeeds (cf. t1450-fsck cell 90). (`sley_index::IndexStatCache` is a
11782    // distinct type from this crate's same-named probe helper above.)
11783    let rm_stat_cache = sley_index::IndexStatCache::from_index(&index, &index_path);
11784    let Index {
11785        version: index_version,
11786        entries: index_entry_list,
11787        extensions: index_extensions,
11788        ..
11789    } = index;
11790    // The set of distinct index paths (any stage) — used for membership tests.
11791    let index_paths: BTreeSet<Vec<u8>> = index_entry_list
11792        .iter()
11793        .map(|entry| entry.path.as_bytes().to_vec())
11794        .collect();
11795    // Paths tracked as a gitlink (mode 160000) at stage 0. Removing one of these
11796    // from the worktree is a *submodule* removal: git's builtin/rm.c flags the
11797    // entry `is_submodule = S_ISGITLINK(ce->ce_mode)` and removes the populated
11798    // submodule *directory* via `remove_dir_recursively` rather than `unlink`,
11799    // which would fail with EISDIR ("Is a directory") on the submodule checkout.
11800    // That EISDIR is exactly the gate that blocked the t1013/t7112/t6438/t2013
11801    // submodule setups. Use the single `sley_index::is_gitlink` rule — no new
11802    // predicate. (Unmerged gitlinks have no stage-0 entry and are not submodule
11803    // removals here, matching git, which keys `is_submodule` off the matched ce.)
11804    let gitlink_paths: BTreeSet<Vec<u8>> = index_entry_list
11805        .iter()
11806        .filter(|entry| entry.stage() == Stage::Normal && sley_index::is_gitlink(entry.mode))
11807        .map(|entry| entry.path.as_bytes().to_vec())
11808        .collect();
11809    // Paths selected for removal. A single selected path removes ALL of its
11810    // stage entries (so resolving an unmerged path by removal drops stages
11811    // 1/2/3 together), matching git's name-keyed removal.
11812    let mut selected = BTreeSet::new();
11813    for path in paths {
11814        let absolute = if path.is_absolute() {
11815            path.clone()
11816        } else {
11817            worktree_root.join(path)
11818        };
11819        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
11820            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
11821        })?;
11822        // A pathspec with a trailing slash (e.g. `git rm dir/`) only matches a
11823        // directory: it must never match a same-named tracked file. `Path`'s
11824        // component iterator drops the slash, so capture it before it is lost.
11825        let has_trailing_slash = path_has_trailing_separator(&absolute);
11826        let git_path = git_path_bytes(relative)?;
11827        if !has_trailing_slash && index_paths.contains(&git_path) {
11828            selected.insert(git_path);
11829            continue;
11830        }
11831        // A wildcard pathspec (e.g. `git rm "*"` or `git rm "dir/*.c"`) matches
11832        // index entries by git's pathspec matcher rather than by literal path or
11833        // directory prefix. Try the glob match first when the spec contains
11834        // wildcard metacharacters; a glob match removes the entries directly
11835        // (no `-r` needed — the pathspec already names the files).
11836        if pathspec_is_glob(&git_path) {
11837            let glob_matched = index_paths
11838                .iter()
11839                .filter(|entry| {
11840                    pathspec_item_matches(&git_path, entry, PathspecMatchMagic::default())
11841                })
11842                .cloned()
11843                .collect::<Vec<_>>();
11844            if !glob_matched.is_empty() {
11845                selected.extend(glob_matched);
11846                continue;
11847            }
11848            if options.ignore_unmatch {
11849                continue;
11850            }
11851            eprintln!(
11852                "fatal: pathspec '{}' did not match any files",
11853                String::from_utf8_lossy(&git_path)
11854            );
11855            return Err(GitError::Exit(128));
11856        }
11857        let matched = index_paths
11858            .iter()
11859            .filter(|entry| index_entry_is_under_path(entry, &git_path))
11860            .cloned()
11861            .collect::<Vec<_>>();
11862        if matched.is_empty() {
11863            if options.ignore_unmatch {
11864                continue;
11865            }
11866            eprintln!(
11867                "fatal: pathspec '{}' did not match any files",
11868                String::from_utf8_lossy(&git_path)
11869            );
11870            return Err(GitError::Exit(128));
11871        }
11872        if !options.recursive {
11873            eprintln!(
11874                "fatal: not removing '{}' recursively without -r",
11875                String::from_utf8_lossy(&git_path)
11876            );
11877            return Err(GitError::Exit(128));
11878        }
11879        selected.extend(matched);
11880    }
11881
11882    // `git rm` runs the local-modification safety check unless `-f` is given —
11883    // even for `--cached`. The check (a faithful port of builtin/rm.c's
11884    // `check_local_mod`) buckets each selected path into one of three error
11885    // classes and prints all of them at once (collected, not fail-fast), so a
11886    // single `git rm a b c` reports every offending path. See the message
11887    // assertions in t3600-rm.sh.
11888    if !options.force {
11889        let config =
11890            sley_config::read_repo_config(git_dir, config_parameters_env).unwrap_or_default();
11891        // advice.rmhints (default true) gates the parenthetical "(use ...)" hints.
11892        let show_hints = config.get_bool("advice", None, "rmhints").unwrap_or(true);
11893        // Map each selected path to its stage-0 index entry for the check; an
11894        // unmerged path (no stage 0) is skipped, exactly like git's loop
11895        // (index_name_pos fails, and a non-gitlink ours entry `continue`s).
11896        let stage0: BTreeMap<&[u8], &IndexEntry> = index_entry_list
11897            .iter()
11898            .filter(|entry| entry.stage() == Stage::Normal)
11899            .map(|entry| (entry.path.as_bytes(), entry))
11900            .collect();
11901        let mut files_staged: Vec<&[u8]> = Vec::new();
11902        let mut files_cached: Vec<&[u8]> = Vec::new();
11903        let mut files_local: Vec<&[u8]> = Vec::new();
11904        for path in &selected {
11905            let Some(index_entry) = stage0.get(path.as_slice()) else {
11906                // Unmerged path with no stage-0 entry: resolving by removal is
11907                // safe and not warning-worthy.
11908                continue;
11909            };
11910            let worktree_file = worktree_path(worktree_root, path)?;
11911            // Is the worktree path different from the index?
11912            //
11913            // Mirror builtin/rm.c's `check_local_mod`: when `lstat` fails with a
11914            // "missing file" error (ENOENT *or* ENOTDIR — the path vanished, or a
11915            // leading component became a file) the file has already gone from the
11916            // working tree, so git `continue`s and never buckets the path. Same
11917            // for a tracked plain path that is now a directory on disk: git
11918            // treats that as ENOENT and skips it (the later worktree-removal step
11919            // is what fails on a non-empty directory).
11920            let local_changes = match fs::symlink_metadata(&worktree_file) {
11921                Err(err)
11922                    if matches!(
11923                        err.kind(),
11924                        std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
11925                    ) || err.raw_os_error() == Some(20) =>
11926                {
11927                    // ENOENT/ENOTDIR: already gone — not warning-worthy.
11928                    continue;
11929                }
11930                Err(err) => return Err(err.into()),
11931                Ok(meta) if meta.is_dir() => continue,
11932                Ok(meta) => {
11933                    // git refreshes the index before `check_local_mod`, so a path
11934                    // whose stat changed but whose content is unchanged is up to
11935                    // date. We mirror that: a clean cached stat short-circuits to
11936                    // "unchanged"; otherwise re-hash the (clean-filtered) worktree
11937                    // content and compare to the index entry's *cached oid* (git's
11938                    // refresh `hash_object`), NOT the stored blob. Comparing to the
11939                    // oid — not the blob bytes — means a removed object does not
11940                    // abort the check (the worktree may still hash to the cached
11941                    // oid), so `git rm --cached` of a path whose blob was deleted
11942                    // still succeeds.
11943                    match rm_stat_cache.index_entry_worktree_stat_verdict(index_entry, &meta) {
11944                        sley_index::StatVerdict::Clean => false,
11945                        sley_index::StatVerdict::Dirty
11946                        | sley_index::StatVerdict::RacyNeedsContentCheck => {
11947                            let worktree_bytes = apply_clean_filter(
11948                                worktree_root,
11949                                git_dir,
11950                                &config,
11951                                path,
11952                                &fs::read(&worktree_file)?,
11953                            )?;
11954                            let worktree_oid = EncodedObject::new(ObjectType::Blob, worktree_bytes)
11955                                .object_id(format)?;
11956                            worktree_oid != index_entry.oid
11957                        }
11958                    }
11959                }
11960            };
11961            // Is the index different from the HEAD commit? (Before the first
11962            // commit, anything staged is treated as changed from HEAD.)
11963            let staged_changes = match head_entries.get(path) {
11964                Some(head_entry) => {
11965                    head_entry.oid != index_entry.oid || head_entry.mode != index_entry.mode
11966                }
11967                None => true,
11968            };
11969            if local_changes && staged_changes {
11970                // `git rm --cached` of an intent-to-add entry is safe.
11971                if !options.cached || !index_entry.is_intent_to_add() {
11972                    files_staged.push(path);
11973                }
11974            } else if !options.cached {
11975                if staged_changes {
11976                    files_cached.push(path);
11977                }
11978                if local_changes {
11979                    files_local.push(path);
11980                }
11981            }
11982        }
11983        let mut errs = false;
11984        print_rm_error_files(
11985            &files_staged,
11986            "the following file has staged content different from both the\nfile and the HEAD:",
11987            "the following files have staged content different from both the\nfile and the HEAD:",
11988            "\n(use -f to force removal)",
11989            show_hints,
11990            &mut errs,
11991        );
11992        print_rm_error_files(
11993            &files_cached,
11994            "the following file has changes staged in the index:",
11995            "the following files have changes staged in the index:",
11996            "\n(use --cached to keep the file, or -f to force removal)",
11997            show_hints,
11998            &mut errs,
11999        );
12000        print_rm_error_files(
12001            &files_local,
12002            "the following file has local modifications:",
12003            "the following files have local modifications:",
12004            "\n(use --cached to keep the file, or -f to force removal)",
12005            show_hints,
12006            &mut errs,
12007        );
12008        if errs {
12009            return Err(GitError::Exit(1));
12010        }
12011    }
12012
12013    if options.dry_run {
12014        return Ok(RemoveResult {
12015            removed: selected.into_iter().collect(),
12016        });
12017    }
12018    // Mirror builtin/rm.c's ordering: remove the worktree files BEFORE writing
12019    // the new index. If the very first removal fails (and nothing has been
12020    // removed yet), abort without committing the index, so a `git rm d` where
12021    // `d` is now a non-empty directory fails AND leaves the index untouched.
12022    // Once any file has been removed we commit to finishing (git does the same).
12023    if !options.cached {
12024        let mut removed_any = false;
12025        for path in &selected {
12026            let is_gitlink = gitlink_paths.contains(path);
12027            match remove_tracked_worktree_path(worktree_root, path, is_gitlink)? {
12028                true => removed_any = true,
12029                false if !removed_any => {
12030                    eprintln!(
12031                        "fatal: git rm: '{}': Is a directory",
12032                        String::from_utf8_lossy(path)
12033                    );
12034                    return Err(GitError::Exit(128));
12035                }
12036                false => {}
12037            }
12038        }
12039    }
12040    // Keep every entry whose path was not selected, preserving original order
12041    // and all stages of unmerged paths that were not removed.
12042    let entries = index_entry_list
12043        .into_iter()
12044        .filter(|entry| !selected.contains(entry.path.as_bytes()))
12045        .collect::<Vec<_>>();
12046    // Removing entries invalidates the cache-tree (`TREE` extension): a stale
12047    // cached subtree id makes `git diff --cached`/`git status` short-circuit the
12048    // comparison of an affected directory against HEAD and miss the deletion
12049    // (observed: `git rm dir/nested.txt` left a valid `dir/` cache-tree, so the
12050    // deletion never showed in the cached diff). Git invalidates the cache-tree
12051    // on any index mutation; drop it so it is rebuilt on the next write, exactly
12052    // like the `add` path does above.
12053    let extensions = index_extensions_without_cache_tree(&index_extensions);
12054    fs::write(
12055        index_path,
12056        Index {
12057            version: index_version,
12058            entries,
12059            extensions,
12060            checksum: None,
12061        }
12062        .write(format)?,
12063    )?;
12064    Ok(RemoveResult {
12065        removed: selected.into_iter().collect(),
12066    })
12067}
12068
12069/// Remove a tracked path from the working tree, mirroring builtin/rm.c's
12070/// removal loop. For a plain path this is `remove_path`: unlink the file and
12071/// prune now-empty parent directories. For a gitlink (`is_gitlink`, mode
12072/// 160000) it is the submodule branch — git removes the populated submodule
12073/// *directory* with `remove_dir_recursively` (NOT `unlink`, which fails EISDIR),
12074/// descending into and deleting the nested `.git` because the `git rm` call site
12075/// passes `flag` *without* `REMOVE_DIR_KEEP_NESTED_GIT`; it `die`s only if that
12076/// recursive removal genuinely fails.
12077///
12078/// Returns `Ok(true)` when the path was removed, `Ok(false)` when a *plain* path
12079/// could not be unlinked because it is a directory (the caller decides whether
12080/// that aborts the run). A path that has already vanished is a no-op success.
12081fn remove_tracked_worktree_path(root: &Path, path: &[u8], is_gitlink: bool) -> Result<bool> {
12082    let file = worktree_path(root, path)?;
12083    match fs::symlink_metadata(&file) {
12084        Err(err)
12085            if matches!(
12086                err.kind(),
12087                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
12088            ) =>
12089        {
12090            return Ok(true);
12091        }
12092        Err(err) if err.raw_os_error() == Some(20) => return Ok(true), // ENOTDIR
12093        Err(err) => return Err(err.into()),
12094        Ok(meta) if meta.is_dir() => {
12095            if is_gitlink {
12096                // Submodule removal. Mirror builtin/rm.c's `is_submodule` branch:
12097                // `remove_dir_recursively(&buf, force ? REMOVE_DIR_PURGE_ORIGINAL_CWD : 0)`.
12098                // No `REMOVE_DIR_KEEP_NESTED_GIT` flag, so the whole subtree —
12099                // including the nested `.git` of the populated submodule — is
12100                // removed. git `die`s ("could not remove '<path>'") if the
12101                // recursive removal fails; propagate the IO error to match.
12102                fs::remove_dir_all(&file)?;
12103                prune_empty_parents(root, file.parent())?;
12104                return Ok(true);
12105            }
12106            // A directory in the worktree where a plain file is tracked cannot
12107            // be unlinked (git's remove_path fails on EISDIR). Report it so the
12108            // caller can abort the removal without committing the index.
12109            return Ok(false);
12110        }
12111        Ok(_) => {}
12112    }
12113    fs::remove_file(&file)?;
12114    prune_empty_parents(root, file.parent())?;
12115    Ok(true)
12116}
12117
12118/// Print one batched `git rm` safety error block (mirrors builtin/rm.c's
12119/// `print_error_files`): the main message, the indented list of offending
12120/// paths, and — when `advice.rmhints` is enabled — the trailing hint. Sets
12121/// `*errs` so the caller can fail after collecting every class.
12122fn print_rm_error_files(
12123    files: &[&[u8]],
12124    singular: &str,
12125    plural: &str,
12126    hint: &str,
12127    show_hints: bool,
12128    errs: &mut bool,
12129) {
12130    if files.is_empty() {
12131        return;
12132    }
12133    let mut message = String::from(if files.len() == 1 { singular } else { plural });
12134    for path in files {
12135        message.push_str("\n    ");
12136        message.push_str(&String::from_utf8_lossy(path));
12137    }
12138    if show_hints {
12139        message.push_str(hint);
12140    }
12141    eprintln!("error: {message}");
12142    *errs = true;
12143}
12144
12145pub fn move_index_and_worktree_path(
12146    worktree_root: impl AsRef<Path>,
12147    git_dir: impl AsRef<Path>,
12148    format: ObjectFormat,
12149    source: &Path,
12150    destination: &Path,
12151    options: MoveOptions,
12152) -> Result<MoveResult> {
12153    let worktree_root = worktree_root.as_ref();
12154    let git_dir = git_dir.as_ref();
12155    let index_path = repository_index_path(git_dir);
12156    let mut index = if index_path.exists() {
12157        Index::parse(&fs::read(&index_path)?, format)?
12158    } else {
12159        Index {
12160            version: 2,
12161            entries: Vec::new(),
12162            extensions: Vec::new(),
12163            checksum: None,
12164        }
12165    };
12166    let source_absolute = if source.is_absolute() {
12167        source.to_path_buf()
12168    } else {
12169        worktree_root.join(source)
12170    };
12171    let destination_absolute = if destination.is_absolute() {
12172        destination.to_path_buf()
12173    } else {
12174        worktree_root.join(destination)
12175    };
12176    let destination_absolute = if destination_absolute.is_dir() {
12177        let Some(file_name) = source_absolute.file_name() else {
12178            return Err(GitError::InvalidPath(format!(
12179                "invalid source path {}",
12180                source.display()
12181            )));
12182        };
12183        destination_absolute.join(file_name)
12184    } else {
12185        destination_absolute
12186    };
12187    let source_relative = source_absolute.strip_prefix(worktree_root).map_err(|_| {
12188        GitError::InvalidPath(format!("path {} is outside worktree", source.display()))
12189    })?;
12190    let destination_relative = destination_absolute
12191        .strip_prefix(worktree_root)
12192        .map_err(|_| {
12193            GitError::InvalidPath(format!(
12194                "path {} is outside worktree",
12195                destination.display()
12196            ))
12197        })?;
12198    let source_path = git_path_bytes(source_relative)?;
12199    let destination_path = git_path_bytes(destination_relative)?;
12200    let destination_has_trailing_separator = path_has_trailing_separator(&destination_absolute);
12201    if destination_has_trailing_separator && !destination_absolute.is_dir() {
12202        if options.skip_errors {
12203            return Ok(MoveResult {
12204                source: source_path,
12205                destination: destination_path,
12206                skipped: true,
12207                fatal: None,
12208                details: Vec::new(),
12209            });
12210        }
12211        let mut destination = String::from_utf8_lossy(&destination_path).into_owned();
12212        destination.push('/');
12213        if options.dry_run {
12214            let fatal = format!(
12215                "fatal: destination directory does not exist, source={}, destination={destination}",
12216                String::from_utf8_lossy(&source_path),
12217            );
12218            return Ok(MoveResult {
12219                source: source_path,
12220                destination: destination.clone().into_bytes(),
12221                skipped: false,
12222                fatal: Some(fatal),
12223                details: Vec::new(),
12224            });
12225        }
12226        eprintln!(
12227            "fatal: destination directory does not exist, source={}, destination={destination}",
12228            String::from_utf8_lossy(&source_path),
12229        );
12230        return Err(GitError::Exit(128));
12231    }
12232    if destination_absolute.exists() {
12233        if !options.force {
12234            if options.skip_errors {
12235                return Ok(MoveResult {
12236                    source: source_path,
12237                    destination: destination_path,
12238                    skipped: true,
12239                    fatal: None,
12240                    details: Vec::new(),
12241                });
12242            }
12243            if options.dry_run {
12244                let fatal = format!(
12245                    "fatal: destination exists, source={}, destination={}",
12246                    String::from_utf8_lossy(&source_path),
12247                    String::from_utf8_lossy(&destination_path)
12248                );
12249                return Ok(MoveResult {
12250                    source: source_path,
12251                    destination: destination_path,
12252                    skipped: false,
12253                    fatal: Some(fatal),
12254                    details: Vec::new(),
12255                });
12256            }
12257            eprintln!(
12258                "fatal: destination exists, source={}, destination={}",
12259                String::from_utf8_lossy(&source_path),
12260                String::from_utf8_lossy(&destination_path)
12261            );
12262            return Err(GitError::Exit(128));
12263        }
12264        if !options.dry_run && destination_absolute.is_dir() {
12265            fs::remove_dir_all(&destination_absolute)?;
12266        } else if !options.dry_run {
12267            fs::remove_file(&destination_absolute)?;
12268        }
12269    }
12270    let directory_prefix = {
12271        let mut prefix = source_path.clone();
12272        prefix.push(b'/');
12273        prefix
12274    };
12275    let directory_entries: Vec<_> = index
12276        .entries
12277        .iter()
12278        .filter(|entry| entry.path.as_bytes().starts_with(&directory_prefix))
12279        .cloned()
12280        .collect();
12281    if !directory_entries.is_empty() {
12282        let details: Vec<_> = directory_entries
12283            .iter()
12284            .map(|entry| {
12285                let suffix = &entry.path.as_bytes()[source_path.len()..];
12286                let mut destination = destination_path.clone();
12287                destination.extend_from_slice(suffix);
12288                MoveDetail {
12289                    source: entry.path.as_bytes().to_vec(),
12290                    destination,
12291                    skipped: false,
12292                }
12293            })
12294            .collect();
12295        if options.dry_run {
12296            return Ok(MoveResult {
12297                source: source_path,
12298                destination: destination_path,
12299                skipped: false,
12300                fatal: None,
12301                details,
12302            });
12303        }
12304        fs::rename(&source_absolute, &destination_absolute)?;
12305        let moved_paths: Vec<_> = details
12306            .iter()
12307            .map(|detail| detail.destination.clone())
12308            .collect();
12309        index.entries.retain(|entry| {
12310            !entry.path.as_bytes().starts_with(&directory_prefix)
12311                && !moved_paths
12312                    .iter()
12313                    .any(|m| m.as_slice() == entry.path.as_bytes())
12314        });
12315        for (source_entry, detail) in directory_entries.into_iter().zip(details.iter()) {
12316            let relative_path = git_path_to_relative_path(&detail.destination)?;
12317            let metadata = fs::metadata(worktree_root.join(relative_path))?;
12318            let mut destination_entry =
12319                index_entry_from_metadata(detail.destination.clone(), source_entry.oid, &metadata);
12320            destination_entry.mode = source_entry.mode;
12321            index.entries.push(destination_entry);
12322        }
12323        index
12324            .entries
12325            .sort_by(|left, right| left.path.cmp(&right.path));
12326        index.extensions.clear();
12327        fs::write(index_path, index.write(format)?)?;
12328        return Ok(MoveResult {
12329            source: source_path,
12330            destination: destination_path,
12331            skipped: false,
12332            fatal: None,
12333            details,
12334        });
12335    }
12336
12337    let Some(position) = index
12338        .entries
12339        .iter()
12340        .position(|entry| entry.path == source_path)
12341    else {
12342        if options.skip_errors {
12343            return Ok(MoveResult {
12344                source: source_path,
12345                destination: destination_path,
12346                skipped: true,
12347                fatal: None,
12348                details: Vec::new(),
12349            });
12350        }
12351        let source_kind = if source_absolute.exists() {
12352            "not under version control"
12353        } else {
12354            "bad source"
12355        };
12356        if options.dry_run {
12357            let fatal = format!(
12358                "fatal: {source_kind}, source={}, destination={}",
12359                String::from_utf8_lossy(&source_path),
12360                String::from_utf8_lossy(&destination_path)
12361            );
12362            return Ok(MoveResult {
12363                source: source_path,
12364                destination: destination_path,
12365                skipped: false,
12366                fatal: Some(fatal),
12367                details: Vec::new(),
12368            });
12369        }
12370        eprintln!(
12371            "fatal: {source_kind}, source={}, destination={}",
12372            String::from_utf8_lossy(&source_path),
12373            String::from_utf8_lossy(&destination_path)
12374        );
12375        return Err(GitError::Exit(128));
12376    };
12377    if options.dry_run {
12378        return Ok(MoveResult {
12379            source: source_path,
12380            destination: destination_path,
12381            skipped: false,
12382            fatal: None,
12383            details: Vec::new(),
12384        });
12385    }
12386    if let Some(parent) = destination_absolute.parent()
12387        && !parent.exists()
12388    {
12389        if options.skip_errors {
12390            return Ok(MoveResult {
12391                source: source_path,
12392                destination: destination_path,
12393                skipped: true,
12394                fatal: None,
12395                details: Vec::new(),
12396            });
12397        }
12398        eprintln!(
12399            "fatal: renaming '{}' failed: No such file or directory",
12400            String::from_utf8_lossy(&source_path)
12401        );
12402        return Err(GitError::Exit(128));
12403    }
12404    fs::rename(&source_absolute, &destination_absolute)?;
12405    let metadata = fs::metadata(&destination_absolute)?;
12406    let source_entry = index.entries.remove(position);
12407    let mut destination_entry =
12408        index_entry_from_metadata(destination_path.clone(), source_entry.oid, &metadata);
12409    destination_entry.mode = source_entry.mode;
12410    index.entries.retain(|entry| entry.path != destination_path);
12411    index.entries.push(destination_entry);
12412    index
12413        .entries
12414        .sort_by(|left, right| left.path.cmp(&right.path));
12415    index.extensions.clear();
12416    fs::write(index_path, index.write(format)?)?;
12417    Ok(MoveResult {
12418        source: source_path,
12419        destination: destination_path,
12420        skipped: false,
12421        fatal: None,
12422        details: Vec::new(),
12423    })
12424}
12425
12426fn restore_index_entry(
12427    worktree_root: &Path,
12428    git_dir: &Path,
12429    format: ObjectFormat,
12430    db: &FileObjectDatabase,
12431    entry: &IndexEntry,
12432    smudge_config: Option<&GitConfig>,
12433) -> Result<Option<IndexEntry>> {
12434    // A gitlink (mode 160000) names a commit in the submodule's repository, not
12435    // a blob here — reading it as a blob fails ("not found: blob object"). git's
12436    // `checkout_entry` S_IFGITLINK arm just ensures the submodule directory
12437    // exists and never touches an object; the submodule's content is `submodule
12438    // update` territory. Single gitlink rule via `sley_index::is_gitlink`.
12439    if sley_index::is_gitlink(entry.mode) {
12440        let dir_path = worktree_path(worktree_root, entry.path.as_bytes())?;
12441        fs::create_dir_all(&dir_path)?;
12442        return Ok(None);
12443    }
12444    let object = read_expected_object(db, &entry.oid, ObjectType::Blob)?;
12445    let body: Cow<'_, [u8]> = match smudge_config {
12446        Some(config) => {
12447            let checks = smudge_attribute_checks_from_index(
12448                worktree_root,
12449                git_dir,
12450                format,
12451                entry.path.as_bytes(),
12452            )?;
12453            apply_smudge_filter_with_attributes_cow(
12454                config,
12455                &checks,
12456                entry.path.as_bytes(),
12457                &object.body,
12458            )?
12459        }
12460        None => Cow::Borrowed(&object.body),
12461    };
12462    let file_path = worktree_path(worktree_root, entry.path.as_bytes())?;
12463    prepare_blob_parent_dirs(worktree_root, &file_path)?;
12464    remove_existing_worktree_path(&file_path)?;
12465    fs::write(&file_path, &body)?;
12466    set_worktree_file_mode(&file_path, entry.mode)?;
12467    let metadata = fs::symlink_metadata(&file_path)?;
12468    Ok(Some(index_entry_with_refreshed_stat(entry, &metadata)))
12469}
12470
12471fn index_entry_with_refreshed_stat(entry: &IndexEntry, metadata: &fs::Metadata) -> IndexEntry {
12472    let mut refreshed = index_entry_from_metadata(entry.path.clone(), entry.oid, metadata);
12473    refreshed.mode = entry.mode;
12474    refreshed.flags = entry.flags;
12475    refreshed.flags_extended = entry.flags_extended;
12476    refreshed
12477}
12478
12479fn restored_head_index_entry(
12480    _worktree_root: &Path,
12481    _db: &FileObjectDatabase,
12482    path: &[u8],
12483    entry: &TrackedEntry,
12484) -> Result<IndexEntry> {
12485    // This restores the index from a tree (reset --mixed / stash / sparse) WITHOUT
12486    // rewriting the worktree file, so the file on disk may hold different content
12487    // than `entry.oid`. Crucially we must NOT copy the worktree file's stat onto
12488    // this entry: that would make the cached stat match a file whose real content
12489    // hashes to a DIFFERENT oid, breaking git's "stat-match implies oid-match"
12490    // invariant that the status stat-cache relies on. Leave the whole stat tuple
12491    // zeroed, including size, so `reset --mixed --no-refresh` remains stat-dirty
12492    // until an explicit/default refresh validates it (t7102 cell 28).
12493    Ok(IndexEntry {
12494        ctime_seconds: 0,
12495        ctime_nanoseconds: 0,
12496        mtime_seconds: 0,
12497        mtime_nanoseconds: 0,
12498        dev: 0,
12499        ino: 0,
12500        mode: entry.mode,
12501        uid: 0,
12502        gid: 0,
12503        size: 0,
12504        oid: entry.oid,
12505        flags: path.len().min(0x0fff) as u16,
12506        flags_extended: 0,
12507        path: BString::from(path),
12508    })
12509}
12510
12511fn restore_head_entry_to_worktree(
12512    worktree_root: &Path,
12513    db: &FileObjectDatabase,
12514    path: &[u8],
12515    entry: &TrackedEntry,
12516) -> Result<()> {
12517    // Route through the single gitlink-aware materializer: a gitlink has no blob
12518    // here, so `write_worktree_blob_entry` would fail reading the commit-oid as
12519    // a blob. `materialize_tree_entry` owns the gitlink-vs-blob decision (mkdir
12520    // the submodule dir) in ONE place. The returned index entry is unused on
12521    // this worktree-only restore path.
12522    materialize_tree_entry(db, worktree_root, path, entry)?;
12523    Ok(())
12524}
12525
12526fn restore_head_entry_to_worktree_and_index(
12527    worktree_root: &Path,
12528    db: &FileObjectDatabase,
12529    path: &[u8],
12530    entry: &TrackedEntry,
12531) -> Result<IndexEntry> {
12532    // Route through the single gitlink-aware materializer rather than calling
12533    // `write_worktree_blob_entry` directly: a gitlink (mode 160000) has no blob
12534    // in this object store, so the blob read would fail with "not found: blob
12535    // object <commit-oid>". `materialize_tree_entry` owns the
12536    // gitlink-vs-blob/symlink decision (mkdir the submodule dir, never read an
12537    // object) in ONE place, so `checkout <tree> -- <gitlink-path>` /
12538    // `restore --source` inherit the same gitlink correctness as `reset --hard`.
12539    materialize_tree_entry(db, worktree_root, path, entry)
12540}
12541
12542fn index_has_entry_under(entries: &[IndexEntry], directory: &[u8]) -> bool {
12543    entries
12544        .iter()
12545        .any(|entry| index_entry_is_under_path(entry.path.as_bytes(), directory))
12546}
12547
12548fn index_entry_is_under_path(entry_path: &[u8], directory: &[u8]) -> bool {
12549    if directory.is_empty() {
12550        return true;
12551    }
12552    entry_path
12553        .strip_prefix(directory)
12554        .and_then(|rest| rest.strip_prefix(b"/"))
12555        .is_some()
12556}
12557
12558fn index_entry_from_metadata(
12559    path: impl Into<BString>,
12560    oid: ObjectId,
12561    metadata: &fs::Metadata,
12562) -> IndexEntry {
12563    let modified = metadata.modified().ok();
12564    let duration = modified
12565        .and_then(|time| time.duration_since(UNIX_EPOCH).ok())
12566        .unwrap_or_default();
12567    let mode = file_mode(metadata);
12568    let path = path.into();
12569    let flags = path.len().min(0x0fff) as u16;
12570    let mut entry = IndexEntry {
12571        ctime_seconds: duration.as_secs().min(u32::MAX as u64) as u32,
12572        ctime_nanoseconds: duration.subsec_nanos(),
12573        mtime_seconds: duration.as_secs().min(u32::MAX as u64) as u32,
12574        mtime_nanoseconds: duration.subsec_nanos(),
12575        dev: 0,
12576        ino: 0,
12577        mode,
12578        uid: 0,
12579        gid: 0,
12580        size: index_size_from_metadata(metadata),
12581        oid,
12582        flags,
12583        flags_extended: 0,
12584        path,
12585    };
12586    apply_unix_metadata_to_index_entry(&mut entry, metadata);
12587    entry
12588}
12589
12590#[cfg(unix)]
12591fn apply_unix_metadata_to_index_entry(entry: &mut IndexEntry, metadata: &fs::Metadata) {
12592    use std::os::unix::fs::MetadataExt;
12593
12594    entry.ctime_seconds = metadata.ctime().min(u32::MAX as i64).max(0) as u32;
12595    entry.ctime_nanoseconds = metadata.ctime_nsec().min(u32::MAX as i64).max(0) as u32;
12596    entry.dev = metadata.dev() as u32;
12597    entry.ino = metadata.ino() as u32;
12598    entry.uid = metadata.uid();
12599    entry.gid = metadata.gid();
12600}
12601
12602#[cfg(not(unix))]
12603fn apply_unix_metadata_to_index_entry(_entry: &mut IndexEntry, _metadata: &fs::Metadata) {}
12604
12605fn index_size_from_metadata(metadata: &fs::Metadata) -> u32 {
12606    metadata.len().min(u32::MAX as u64) as u32
12607}
12608
12609fn read_expected_object(
12610    db: &FileObjectDatabase,
12611    oid: &ObjectId,
12612    expected: ObjectType,
12613) -> Result<std::sync::Arc<EncodedObject>> {
12614    let object = db
12615        .read_object(oid)
12616        .map_err(|err| expect_missing_object_kind(err, *oid, missing_kind_for_type(expected)))?;
12617    if object.object_type != expected {
12618        return Err(GitError::InvalidObject(format!(
12619            "expected {} {}, found {}",
12620            expected.as_str(),
12621            oid,
12622            object.object_type.as_str()
12623        )));
12624    }
12625    Ok(object)
12626}
12627
12628fn expect_missing_object_kind(
12629    err: GitError,
12630    oid: ObjectId,
12631    expected: MissingObjectKind,
12632) -> GitError {
12633    match err.not_found_kind() {
12634        Some(sley_core::NotFoundKind::Object { .. }) => GitError::object_kind_not_found_in(
12635            oid,
12636            expected,
12637            MissingObjectContext::WorktreeMaterialize,
12638        ),
12639        _ => err,
12640    }
12641}
12642
12643fn missing_kind_for_type(object_type: ObjectType) -> MissingObjectKind {
12644    match object_type {
12645        ObjectType::Blob => MissingObjectKind::Blob,
12646        ObjectType::Tree => MissingObjectKind::Tree,
12647        ObjectType::Commit => MissingObjectKind::Commit,
12648        ObjectType::Tag => MissingObjectKind::Tag,
12649    }
12650}
12651
12652fn read_commit(db: &FileObjectDatabase, format: ObjectFormat, oid: &ObjectId) -> Result<Commit> {
12653    let object = read_expected_object(db, oid, ObjectType::Commit)?;
12654    Commit::parse(format, &object.body)
12655}
12656
12657#[derive(Debug, Clone, PartialEq, Eq)]
12658struct TrackedEntry {
12659    mode: u32,
12660    oid: ObjectId,
12661}
12662
12663/// git's racy-git stat cache: the stage-0 index entries keyed by path (so the
12664/// worktree walk can reuse a cached oid when a file's stat shows it is unchanged
12665/// since it was staged) plus the index *file's* own mtime, which git uses as the
12666/// racy-clean reference timestamp.
12667///
12668/// SAFETY INVARIANT: trusting a cached oid by stat alone is only sound because
12669/// every code path that stamps a worktree stat onto an index entry also hashed
12670/// that exact file content (see `index_entry_from_metadata`), while tree-sourced
12671/// restores (reset --mixed / stash / sparse) leave the stat zeroed
12672/// (`restored_head_index_entry`). So a non-zero, non-racy stat match implies the
12673/// cached oid is the file's true content. When that does not hold we fall through
12674/// to a full read+filter+hash, so a modified file is never reported clean.
12675#[derive(Debug, Clone, Default)]
12676struct IndexStatCache {
12677    entries: HashMap<Vec<u8>, IndexEntry>,
12678    /// The index file's modification time as `(seconds, nanoseconds)`, or `None`
12679    /// when it could not be determined. Used as git's racy-clean reference.
12680    index_mtime: Option<(u64, u64)>,
12681}
12682
12683impl IndexStatCache {
12684    /// Builds the cache from an already-parsed index plus the path of the index
12685    /// file on disk (whose mtime becomes the racy-clean reference). Only stage-0
12686    /// entries are retained; higher merge stages never describe a worktree file.
12687    fn from_index(index: &Index, index_path: &Path) -> Self {
12688        let index_mtime = fs::metadata(index_path)
12689            .ok()
12690            .and_then(|metadata| file_mtime_parts(&metadata));
12691        Self::from_index_mtime(index, index_mtime)
12692    }
12693
12694    fn from_index_mtime(index: &Index, index_mtime: Option<(u64, u64)>) -> Self {
12695        IndexStatCache {
12696            entries: stage0_index_entries(index),
12697            index_mtime,
12698        }
12699    }
12700
12701    fn from_index_mtime_only(index_mtime: Option<(u64, u64)>) -> Self {
12702        IndexStatCache {
12703            entries: HashMap::new(),
12704            index_mtime,
12705        }
12706    }
12707
12708    /// Whether `entry` is "racily clean" in git's sense: its cached mtime is not
12709    /// strictly older than the index file's mtime, so a same-timestamp write
12710    /// could have changed the content without moving the stat. Such entries must
12711    /// always be re-hashed.
12712    ///
12713    /// Conservative by construction: if the index mtime is unknown, or either
12714    /// side's mtime is zero (e.g. a tree-sourced entry whose stat was left
12715    /// zeroed), this returns `true` so the caller re-hashes rather than trusting
12716    /// a stat we cannot prove safe.
12717    fn is_racily_clean(&self, entry: &IndexEntry) -> bool {
12718        let Some(index_mtime) = self.index_mtime else {
12719            return true;
12720        };
12721        if index_mtime == (0, 0) {
12722            return true;
12723        }
12724        let entry_mtime = (
12725            u64::from(entry.mtime_seconds),
12726            u64::from(entry.mtime_nanoseconds),
12727        );
12728        if entry_mtime == (0, 0) {
12729            return true;
12730        }
12731        // Racy unless the index was written strictly after the entry's mtime.
12732        index_mtime <= entry_mtime
12733    }
12734
12735    fn is_racily_clean_ref(&self, entry: &IndexEntryRef<'_>) -> bool {
12736        let Some(index_mtime) = self.index_mtime else {
12737            return true;
12738        };
12739        if index_mtime == (0, 0) {
12740            return true;
12741        }
12742        let entry_mtime = (
12743            u64::from(entry.mtime_seconds),
12744            u64::from(entry.mtime_nanoseconds),
12745        );
12746        if entry_mtime == (0, 0) {
12747            return true;
12748        }
12749        index_mtime <= entry_mtime
12750    }
12751
12752    /// Whether the index has a stage-0 entry for `git_path` (i.e. the path is
12753    /// tracked). Used to skip hashing untracked worktree files.
12754    fn contains(&self, git_path: &[u8]) -> bool {
12755        self.entries.contains_key(git_path)
12756    }
12757
12758    fn tracked_entry(&self, git_path: &[u8]) -> Option<TrackedEntry> {
12759        self.entries.get(git_path).map(|entry| TrackedEntry {
12760            mode: entry.mode,
12761            oid: entry.oid,
12762        })
12763    }
12764
12765    fn index_entry(&self, git_path: &[u8]) -> Option<&IndexEntry> {
12766        self.entries.get(git_path)
12767    }
12768
12769    /// Returns the cached [`TrackedEntry`] for `git_path` (reusing its stored
12770    /// oid, so the caller can SKIP reading, filtering, and hashing the file) only
12771    /// when the worktree file is provably unchanged since it was staged: a
12772    /// stage-0 entry exists, its recorded mode matches the file's current mode
12773    /// (catching pure `chmod`s that do not move mtime), the size+mtime stat
12774    /// check passes, and the entry is not racily clean. Otherwise returns `None`
12775    /// and the caller hashes the file as usual.
12776    fn reuse_tracked_entry(
12777        &self,
12778        git_path: &[u8],
12779        worktree_metadata: &fs::Metadata,
12780    ) -> Option<TrackedEntry> {
12781        let entry = self.entries.get(git_path)?;
12782        self.reuse_index_entry(entry, worktree_metadata)
12783    }
12784
12785    fn reuse_index_entry(
12786        &self,
12787        entry: &IndexEntry,
12788        worktree_metadata: &fs::Metadata,
12789    ) -> Option<TrackedEntry> {
12790        // Gitlink: reusable as-is whenever the worktree path is a directory (a
12791        // submodule is never re-hashed; its cached stat is ignored). Routes
12792        // through the single `sley_index::gitlink_stat_verdict` rule so the
12793        // gitlink-vs-040000 mode mismatch never spuriously rejects it.
12794        if sley_index::is_gitlink(entry.mode) {
12795            return match sley_index::gitlink_stat_verdict(worktree_metadata) {
12796                sley_index::GitlinkStatVerdict::Populated => Some(TrackedEntry {
12797                    mode: entry.mode,
12798                    oid: entry.oid,
12799                }),
12800                sley_index::GitlinkStatVerdict::TypeChanged => None,
12801            };
12802        }
12803        if entry.mode != worktree_entry_mode(worktree_metadata) {
12804            return None;
12805        }
12806        if !worktree_entry_is_uptodate(entry, worktree_metadata) {
12807            return None;
12808        }
12809        if self.is_racily_clean(entry) {
12810            return None;
12811        }
12812        Some(TrackedEntry {
12813            mode: entry.mode,
12814            oid: entry.oid,
12815        })
12816    }
12817
12818    fn reuse_index_entry_ref(
12819        &self,
12820        entry: &IndexEntryRef<'_>,
12821        worktree_metadata: &fs::Metadata,
12822    ) -> Option<TrackedEntry> {
12823        if sley_index::is_gitlink(entry.mode) {
12824            return match sley_index::gitlink_stat_verdict(worktree_metadata) {
12825                sley_index::GitlinkStatVerdict::Populated => Some(TrackedEntry {
12826                    mode: entry.mode,
12827                    oid: entry.oid,
12828                }),
12829                sley_index::GitlinkStatVerdict::TypeChanged => None,
12830            };
12831        }
12832        if entry.mode != worktree_entry_mode(worktree_metadata) {
12833            return None;
12834        }
12835        if !worktree_entry_ref_is_uptodate(entry, worktree_metadata) {
12836            return None;
12837        }
12838        if self.is_racily_clean_ref(entry) {
12839            return None;
12840        }
12841        Some(TrackedEntry {
12842            mode: entry.mode,
12843            oid: entry.oid,
12844        })
12845    }
12846
12847    /// The stage-0 gitlink (mode 160000) index entry at `git_path`, if any.
12848    fn gitlink_entry(&self, git_path: &[u8]) -> Option<&IndexEntry> {
12849        self.entries
12850            .get(git_path)
12851            .filter(|entry| sley_index::is_gitlink(entry.mode))
12852    }
12853}
12854
12855fn read_index_entries(
12856    git_dir: &Path,
12857    format: ObjectFormat,
12858) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
12859    let db = FileObjectDatabase::from_git_dir(git_dir, format);
12860    Ok(read_index_entries_with_stat_cache(git_dir, format, &db)?.0)
12861}
12862
12863fn resolve_head_tree_oid(
12864    git_dir: &Path,
12865    format: ObjectFormat,
12866    db: &FileObjectDatabase,
12867) -> Result<Option<ObjectId>> {
12868    let Some(commit_oid) = resolve_head_commit_oid(git_dir, format)? else {
12869        return Ok(None);
12870    };
12871    if let Some(tree_oid) = sley_rev::commit_graph_tree_oid(git_dir, format, &commit_oid)? {
12872        return Ok(Some(tree_oid));
12873    }
12874    let object = read_expected_object(db, &commit_oid, ObjectType::Commit)?;
12875    let commit = Commit::parse_ref(format, &object.body)?;
12876    Ok(Some(commit.tree))
12877}
12878
12879fn resolve_head_commit_oid(git_dir: &Path, format: ObjectFormat) -> Result<Option<ObjectId>> {
12880    let refs = FileRefStore::new(git_dir, format);
12881    sley_refs::resolve_ref_peeled(&refs, "HEAD")
12882}
12883
12884fn status_row_is_untracked_or_ignored(entry: ShortStatusRow<'_>) -> bool {
12885    matches!((entry.index, entry.worktree), (b'?', b'?') | (b'!', b'!'))
12886}
12887
12888fn checkout_switch_head_symbolic(
12889    refs: &FileRefStore,
12890    branch_ref: String,
12891    committer: Vec<u8>,
12892    branch: &str,
12893    old_oid: Option<ObjectId>,
12894    new_oid: Option<ObjectId>,
12895) -> Result<()> {
12896    // Reflog "from" side: the previous branch's short name, or the commit id
12897    // when HEAD was detached (git's `checkout: moving from X to Y` shape,
12898    // which `@{-N}` resolution parses).
12899    let from = match refs.read_ref("HEAD") {
12900        Ok(Some(RefTarget::Symbolic(name))) => name
12901            .strip_prefix("refs/heads/")
12902            .unwrap_or(&name)
12903            .to_string(),
12904        Ok(Some(RefTarget::Direct(oid))) => oid.to_hex(),
12905        _ => "HEAD".to_string(),
12906    };
12907    let mut tx = refs.transaction();
12908    let reflog = match (old_oid, new_oid) {
12909        (Some(old_oid), Some(new_oid)) => Some(ReflogEntry {
12910            old_oid,
12911            new_oid,
12912            committer,
12913            message: format!("checkout: moving from {from} to {branch}").into_bytes(),
12914        }),
12915        _ => None,
12916    };
12917    tx.update(RefUpdate {
12918        name: "HEAD".into(),
12919        expected: None,
12920        new: RefTarget::Symbolic(branch_ref),
12921        reflog,
12922    });
12923    tx.commit()
12924}
12925
12926fn cache_tree_is_valid(tree: &CacheTree) -> bool {
12927    if tree.entry_count < 0 || tree.oid.is_none() {
12928        return false;
12929    }
12930    tree.subtrees
12931        .iter()
12932        .all(|child| cache_tree_is_valid(&child.tree))
12933}
12934
12935fn head_matches_index_from_cache_tree(
12936    index: &Index,
12937    format: ObjectFormat,
12938    head_tree_oid: &ObjectId,
12939    stage0_entry_count: usize,
12940) -> Result<bool> {
12941    let cache_tree = match index.cache_tree(format) {
12942        Ok(Some(cache_tree)) => cache_tree,
12943        Ok(None) | Err(_) => return Ok(false),
12944    };
12945    if !cache_tree_is_valid(&cache_tree) {
12946        return Ok(false);
12947    }
12948    let Some(root_oid) = cache_tree.oid.as_ref() else {
12949        return Ok(false);
12950    };
12951    if root_oid != head_tree_oid {
12952        return Ok(false);
12953    }
12954    Ok(cache_tree.entry_count as usize == stage0_entry_count)
12955}
12956
12957fn head_matches_borrowed_index_from_cache_tree(
12958    index: &BorrowedIndex<'_>,
12959    format: ObjectFormat,
12960    head_tree_oid: &ObjectId,
12961    stage0_entry_count: usize,
12962) -> Result<bool> {
12963    let cache_tree = match index.cache_tree(format) {
12964        Ok(Some(cache_tree)) => cache_tree,
12965        Ok(None) | Err(_) => return Ok(false),
12966    };
12967    if !cache_tree_is_valid(&cache_tree) {
12968        return Ok(false);
12969    }
12970    let Some(root_oid) = cache_tree.oid.as_ref() else {
12971        return Ok(false);
12972    };
12973    if root_oid != head_tree_oid {
12974        return Ok(false);
12975    }
12976    Ok(cache_tree.entry_count as usize == stage0_entry_count)
12977}
12978
12979/// Parses the index a single time and returns both the path -> [`TrackedEntry`]
12980/// map used for status comparisons AND the [`IndexStatCache`] used to short-cut
12981/// the worktree walk, avoiding a second parse of the same file.
12982fn read_index_entries_with_stat_cache(
12983    git_dir: &Path,
12984    format: ObjectFormat,
12985    db: &FileObjectDatabase,
12986) -> Result<(BTreeMap<Vec<u8>, TrackedEntry>, IndexStatCache, bool)> {
12987    let (index, stat_cache, head_matches_index) = read_index_with_stat_cache(git_dir, format, db)?;
12988    let tracked = index_entries_from_index(index);
12989    Ok((tracked, stat_cache, head_matches_index))
12990}
12991
12992fn index_entries_from_index(index: Index) -> BTreeMap<Vec<u8>, TrackedEntry> {
12993    index
12994        .entries
12995        .into_iter()
12996        .filter(|entry| entry.stage() == Stage::Normal)
12997        .map(|entry| {
12998            (
12999                entry.path.into_bytes(),
13000                TrackedEntry {
13001                    mode: entry.mode,
13002                    oid: entry.oid,
13003                },
13004            )
13005        })
13006        .collect()
13007}
13008
13009fn read_index_with_stat_cache(
13010    git_dir: &Path,
13011    format: ObjectFormat,
13012    db: &FileObjectDatabase,
13013) -> Result<(Index, IndexStatCache, bool)> {
13014    read_index_with_stat_cache_entries(git_dir, format, db, true)
13015}
13016
13017fn read_index_with_stat_cache_entries(
13018    git_dir: &Path,
13019    format: ObjectFormat,
13020    db: &FileObjectDatabase,
13021    include_entries: bool,
13022) -> Result<(Index, IndexStatCache, bool)> {
13023    let index_path = repository_index_path(git_dir);
13024    let index_metadata = match fs::metadata(&index_path) {
13025        Ok(metadata) => metadata,
13026        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
13027            return Ok((
13028                Index {
13029                    version: 2,
13030                    entries: Vec::new(),
13031                    extensions: Vec::new(),
13032                    checksum: None,
13033                },
13034                IndexStatCache::default(),
13035                false,
13036            ));
13037        }
13038        Err(err) => return Err(err.into()),
13039    };
13040    let index = Index::parse(&fs::read(&index_path)?, format)?;
13041    let index_mtime = file_mtime_parts(&index_metadata);
13042    let stage0_entry_count = index
13043        .entries
13044        .iter()
13045        .filter(|entry| index_entry_stage(entry) == 0)
13046        .count();
13047    let stat_cache = if include_entries {
13048        IndexStatCache::from_index_mtime(&index, index_mtime)
13049    } else {
13050        IndexStatCache::from_index_mtime_only(index_mtime)
13051    };
13052    let head_matches_index = match resolve_head_tree_oid(git_dir, format, db)? {
13053        Some(head_tree_oid) => {
13054            head_matches_index_from_cache_tree(&index, format, &head_tree_oid, stage0_entry_count)?
13055        }
13056        None => false,
13057    };
13058    Ok((index, stat_cache, head_matches_index))
13059}
13060
13061fn head_tree_entries(
13062    git_dir: &Path,
13063    format: ObjectFormat,
13064    db: &FileObjectDatabase,
13065) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
13066    let refs = FileRefStore::new(git_dir, format);
13067    let Some(head) = refs.read_ref("HEAD")? else {
13068        return Ok(BTreeMap::new());
13069    };
13070    let commit_oid = match head {
13071        RefTarget::Direct(oid) => Some(oid),
13072        RefTarget::Symbolic(name) => match refs.read_ref(&name)? {
13073            Some(RefTarget::Direct(oid)) => Some(oid),
13074            _ => None,
13075        },
13076    };
13077    let Some(commit_oid) = commit_oid else {
13078        return Ok(BTreeMap::new());
13079    };
13080    let object = read_expected_object(db, &commit_oid, ObjectType::Commit)?;
13081    let commit = Commit::parse_ref(format, &object.body)?;
13082    let mut entries = BTreeMap::new();
13083    collect_tree_entries(db, format, &commit.tree, &mut entries)?;
13084    Ok(entries)
13085}
13086
13087fn tree_entries(
13088    db: &FileObjectDatabase,
13089    format: ObjectFormat,
13090    tree_oid: &ObjectId,
13091) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
13092    let mut entries = BTreeMap::new();
13093    collect_tree_entries(db, format, tree_oid, &mut entries)?;
13094    Ok(entries)
13095}
13096
13097/// Flatten a tree's blob leaves into `entries`, keyed by full path.
13098///
13099/// Delegates to the canonical [`sley_diff_merge::flatten_tree`] (the local
13100/// recursive flattener was a byte-identical copy) and adapts its
13101/// `(mode, oid)` tuples into this module's [`TrackedEntry`]. Entries already
13102/// present in `entries` are overwritten, matching the previous insert-based
13103/// behaviour.
13104fn collect_tree_entries(
13105    db: &FileObjectDatabase,
13106    format: ObjectFormat,
13107    tree_oid: &ObjectId,
13108    entries: &mut BTreeMap<Vec<u8>, TrackedEntry>,
13109) -> Result<()> {
13110    for (path, (mode, oid)) in sley_diff_merge::flatten_tree(db, format, tree_oid)? {
13111        entries.insert(path, TrackedEntry { mode, oid });
13112    }
13113    Ok(())
13114}
13115
13116/// Like a full worktree walk, but accepts the index's [`IndexStatCache`] so the
13117/// walk can reuse a cached oid for files that are provably unchanged since they
13118/// were staged, skipping the read+filter+hash for those paths. Passing `None`
13119/// hashes every file when no stat cache is supplied.
13120fn worktree_entries_with_stat_cache(
13121    worktree_root: &Path,
13122    git_dir: &Path,
13123    format: ObjectFormat,
13124    stat_cache: Option<&IndexStatCache>,
13125    tracked_paths: Option<&BTreeSet<Vec<u8>>>,
13126    ignores: Option<&mut IgnoreMatcher>,
13127) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
13128    Ok(worktree_entries_with_submodule_dirt(
13129        worktree_root,
13130        git_dir,
13131        format,
13132        stat_cache,
13133        tracked_paths,
13134        ignores,
13135    )?
13136    .0)
13137}
13138
13139/// Tracked worktree entries keyed by repo path, plus the dirt mask
13140/// ([`DIRTY_SUBMODULE_MODIFIED`] / [`DIRTY_SUBMODULE_UNTRACKED`]) for every
13141/// tracked gitlink path whose submodule working tree is dirty.
13142type WorktreeEntriesWithDirt = (BTreeMap<Vec<u8>, TrackedEntry>, BTreeMap<Vec<u8>, u8>);
13143
13144/// Status worktree snapshot: tracked/untracked entries, gitlink dirt masks, and
13145/// tracked paths observed in the worktree.
13146type StatusWorktreeSnapshot = (
13147    BTreeMap<Vec<u8>, TrackedEntry>,
13148    BTreeMap<Vec<u8>, u8>,
13149    HashSet<Vec<u8>>,
13150);
13151
13152/// Like [`worktree_entries_with_stat_cache`], but also reports, for every
13153/// tracked gitlink path whose submodule working tree is dirty, the dirt mask
13154/// ([`DIRTY_SUBMODULE_MODIFIED`] / [`DIRTY_SUBMODULE_UNTRACKED`]).
13155fn worktree_entries_with_submodule_dirt(
13156    worktree_root: &Path,
13157    git_dir: &Path,
13158    format: ObjectFormat,
13159    stat_cache: Option<&IndexStatCache>,
13160    tracked_paths: Option<&BTreeSet<Vec<u8>>>,
13161    ignores: Option<&mut IgnoreMatcher>,
13162) -> Result<WorktreeEntriesWithDirt> {
13163    let mut entries = BTreeMap::new();
13164    let mut submodule_dirt_map = BTreeMap::new();
13165    let mut tracked_presence = HashSet::new();
13166    // Worktree blobs are compared to the index by OID, so they must be passed
13167    // through the clean filter (core.autocrlf / .gitattributes) first -- exactly
13168    // as `git add` would store them. With no filter configured this is an exact
13169    // passthrough, so unfiltered repositories see identical OIDs.
13170    let config = sley_config::read_repo_config(git_dir, None).unwrap_or_default();
13171    // Seed the matcher with the repo-wide sources only; each directory's
13172    // `.gitattributes` is folded in by `collect_worktree_entries` as it descends,
13173    // so the worktree is read exactly once (a separate full-tree attribute pass was
13174    // a second traversal of every directory).
13175    let mut attr_matcher = AttributeMatcher::from_worktree_base(worktree_root);
13176    let attr_requested = filter_attribute_names();
13177    let mut context = WorktreeEntriesWalk {
13178        git_dir,
13179        format,
13180        config: &config,
13181        matcher: &mut attr_matcher,
13182        requested: &attr_requested,
13183        stat_cache,
13184        tracked_paths,
13185        ignores,
13186        entries: &mut entries,
13187        submodule_dirt: &mut submodule_dirt_map,
13188        tracked_presence: &mut tracked_presence,
13189        record_clean_tracked: true,
13190    };
13191    collect_worktree_entries(&mut context, worktree_root, &[])?;
13192    Ok((entries, submodule_dirt_map))
13193}
13194
13195fn status_worktree_entries_with_submodule_dirt(
13196    worktree_root: &Path,
13197    git_dir: &Path,
13198    format: ObjectFormat,
13199    stat_cache: &IndexStatCache,
13200    tracked_paths: Option<&BTreeSet<Vec<u8>>>,
13201    ignores: Option<&mut IgnoreMatcher>,
13202) -> Result<StatusWorktreeSnapshot> {
13203    let mut entries = BTreeMap::new();
13204    let mut submodule_dirt_map = BTreeMap::new();
13205    let mut tracked_presence = HashSet::new();
13206    let config = sley_config::read_repo_config(git_dir, None).unwrap_or_default();
13207    let mut attr_matcher = AttributeMatcher::from_worktree_base(worktree_root);
13208    let attr_requested = filter_attribute_names();
13209    let mut context = WorktreeEntriesWalk {
13210        git_dir,
13211        format,
13212        config: &config,
13213        matcher: &mut attr_matcher,
13214        requested: &attr_requested,
13215        stat_cache: Some(stat_cache),
13216        tracked_paths,
13217        ignores,
13218        entries: &mut entries,
13219        submodule_dirt: &mut submodule_dirt_map,
13220        tracked_presence: &mut tracked_presence,
13221        record_clean_tracked: false,
13222    };
13223    collect_worktree_entries(&mut context, worktree_root, &[])?;
13224    Ok((entries, submodule_dirt_map, tracked_presence))
13225}
13226
13227fn worktree_entry_for_git_path(
13228    worktree_root: &Path,
13229    git_dir: &Path,
13230    format: ObjectFormat,
13231    git_path: &[u8],
13232    expected_oid: &ObjectId,
13233    expected_mode: u32,
13234    stat_cache: Option<&IndexStatCache>,
13235) -> Result<Option<TrackedEntry>> {
13236    let absolute = worktree_root.join(repo_path_to_os_path(git_path)?);
13237    let metadata = match fs::symlink_metadata(&absolute) {
13238        Ok(metadata) => metadata,
13239        Err(err)
13240            if matches!(
13241                err.kind(),
13242                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
13243            ) =>
13244        {
13245            return Ok(None);
13246        }
13247        Err(err) => return Err(err.into()),
13248    };
13249
13250    if sley_index::is_gitlink(expected_mode) {
13251        if !metadata.is_dir() {
13252            return Ok(Some(TrackedEntry {
13253                mode: worktree_entry_mode(&metadata),
13254                oid: ObjectId::null(format),
13255            }));
13256        }
13257        let oid = sley_diff_merge::gitlink_head_oid(&absolute, format).unwrap_or(*expected_oid);
13258        return Ok(Some(TrackedEntry {
13259            mode: sley_index::GITLINK_MODE,
13260            oid,
13261        }));
13262    }
13263
13264    if metadata.is_dir() {
13265        return Ok(Some(TrackedEntry {
13266            mode: worktree_entry_mode(&metadata),
13267            oid: ObjectId::null(format),
13268        }));
13269    }
13270
13271    if !(metadata.is_file() || metadata.file_type().is_symlink()) {
13272        return Ok(Some(TrackedEntry {
13273            mode: worktree_entry_mode(&metadata),
13274            oid: ObjectId::null(format),
13275        }));
13276    }
13277
13278    if let Some(tracked) =
13279        stat_cache.and_then(|cache| cache.reuse_tracked_entry(git_path, &metadata))
13280    {
13281        return Ok(Some(tracked));
13282    }
13283
13284    let mode = worktree_entry_mode(&metadata);
13285    let body = if metadata.file_type().is_symlink() {
13286        symlink_target_bytes(&absolute)?
13287    } else {
13288        let config = sley_config::read_repo_config(git_dir, None).unwrap_or_default();
13289        let body = fs::read(&absolute)?;
13290        let clean = apply_clean_filter(worktree_root, git_dir, &config, git_path, &body)?;
13291        let oid = match stat_cache.and_then(|cache| cache.index_entry(git_path)) {
13292            Some(index_entry) => clean_filtered_oid_for_status(
13293                format,
13294                &body,
13295                clean,
13296                index_entry.oid,
13297                index_entry.size,
13298                &metadata,
13299            )?,
13300            None => EncodedObject::new(ObjectType::Blob, clean).object_id(format)?,
13301        };
13302        return Ok(Some(TrackedEntry { mode, oid }));
13303    };
13304    let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
13305    Ok(Some(TrackedEntry { mode, oid }))
13306}
13307
13308fn worktree_entry_for_index_entry_with_attributes(
13309    worktree_root: &Path,
13310    git_dir: &Path,
13311    format: ObjectFormat,
13312    index_entry: &IndexEntry,
13313    stat_cache: &IndexStatCache,
13314    clean_filter: &mut Option<TrackedOnlyCleanFilter>,
13315) -> Result<Option<TrackedEntry>> {
13316    let git_path = index_entry.path.as_bytes();
13317    let expected_mode = index_entry.mode;
13318    let absolute = worktree_root.join(repo_path_to_os_path(git_path)?);
13319    let metadata = match fs::symlink_metadata(&absolute) {
13320        Ok(metadata) => metadata,
13321        Err(err)
13322            if matches!(
13323                err.kind(),
13324                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
13325            ) =>
13326        {
13327            return Ok(None);
13328        }
13329        Err(err) => return Err(err.into()),
13330    };
13331    let file_type = metadata.file_type();
13332
13333    if sley_index::is_gitlink(expected_mode) {
13334        if !file_type.is_dir() {
13335            return Ok(Some(TrackedEntry {
13336                mode: worktree_entry_mode(&metadata),
13337                oid: ObjectId::null(format),
13338            }));
13339        }
13340        let oid = sley_diff_merge::gitlink_head_oid(&absolute, format).unwrap_or(index_entry.oid);
13341        return Ok(Some(TrackedEntry {
13342            mode: sley_index::GITLINK_MODE,
13343            oid,
13344        }));
13345    }
13346
13347    if file_type.is_dir() {
13348        if expected_mode != 0o040000 {
13349            return Ok(None);
13350        }
13351        return Ok(Some(TrackedEntry {
13352            mode: worktree_entry_mode(&metadata),
13353            oid: ObjectId::null(format),
13354        }));
13355    }
13356
13357    if !(file_type.is_file() || file_type.is_symlink()) {
13358        return Ok(Some(TrackedEntry {
13359            mode: worktree_entry_mode(&metadata),
13360            oid: ObjectId::null(format),
13361        }));
13362    }
13363
13364    if let Some(tracked) = stat_cache.reuse_index_entry(index_entry, &metadata) {
13365        return Ok(Some(tracked));
13366    }
13367
13368    let mode = worktree_entry_mode(&metadata);
13369    let body = if file_type.is_symlink() {
13370        symlink_target_bytes(&absolute)?
13371    } else {
13372        let body = fs::read(&absolute)?;
13373        let clean_filter = tracked_only_clean_filter(clean_filter, worktree_root, git_dir);
13374        clean_filter.read_attributes_for_path(worktree_root, git_path)?;
13375        let checks =
13376            clean_filter
13377                .matcher
13378                .attributes_for_path(git_path, &clean_filter.requested, false);
13379        let clean =
13380            apply_clean_filter_with_attributes(&clean_filter.config, &checks, git_path, &body)?;
13381        let oid = clean_filtered_oid_for_status(
13382            format,
13383            &body,
13384            clean,
13385            index_entry.oid,
13386            index_entry.size,
13387            &metadata,
13388        )?;
13389        return Ok(Some(TrackedEntry { mode, oid }));
13390    };
13391    let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
13392    Ok(Some(TrackedEntry { mode, oid }))
13393}
13394
13395fn worktree_entry_for_index_entry_ref_with_attributes(
13396    worktree_root: &Path,
13397    git_dir: &Path,
13398    format: ObjectFormat,
13399    index_entry: &IndexEntryRef<'_>,
13400    stat_cache: &IndexStatCache,
13401    clean_filter: &mut Option<TrackedOnlyCleanFilter>,
13402) -> Result<Option<TrackedEntry>> {
13403    let git_path = index_entry.path;
13404    let expected_mode = index_entry.mode;
13405    let absolute = worktree_root.join(repo_path_to_os_path(git_path)?);
13406    let metadata = match fs::symlink_metadata(&absolute) {
13407        Ok(metadata) => metadata,
13408        Err(err)
13409            if matches!(
13410                err.kind(),
13411                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
13412            ) =>
13413        {
13414            return Ok(None);
13415        }
13416        Err(err) => return Err(err.into()),
13417    };
13418    let file_type = metadata.file_type();
13419
13420    if sley_index::is_gitlink(expected_mode) {
13421        if !file_type.is_dir() {
13422            return Ok(Some(TrackedEntry {
13423                mode: worktree_entry_mode(&metadata),
13424                oid: ObjectId::null(format),
13425            }));
13426        }
13427        let oid = sley_diff_merge::gitlink_head_oid(&absolute, format).unwrap_or(index_entry.oid);
13428        return Ok(Some(TrackedEntry {
13429            mode: sley_index::GITLINK_MODE,
13430            oid,
13431        }));
13432    }
13433
13434    if file_type.is_dir() {
13435        if expected_mode != 0o040000 {
13436            return Ok(None);
13437        }
13438        return Ok(Some(TrackedEntry {
13439            mode: worktree_entry_mode(&metadata),
13440            oid: ObjectId::null(format),
13441        }));
13442    }
13443
13444    if !(file_type.is_file() || file_type.is_symlink()) {
13445        return Ok(Some(TrackedEntry {
13446            mode: worktree_entry_mode(&metadata),
13447            oid: ObjectId::null(format),
13448        }));
13449    }
13450
13451    if let Some(tracked) = stat_cache.reuse_index_entry_ref(index_entry, &metadata) {
13452        return Ok(Some(tracked));
13453    }
13454
13455    let mode = worktree_entry_mode(&metadata);
13456    let body = if file_type.is_symlink() {
13457        symlink_target_bytes(&absolute)?
13458    } else {
13459        let body = fs::read(&absolute)?;
13460        let clean_filter = tracked_only_clean_filter(clean_filter, worktree_root, git_dir);
13461        clean_filter.read_attributes_for_path(worktree_root, git_path)?;
13462        let checks =
13463            clean_filter
13464                .matcher
13465                .attributes_for_path(git_path, &clean_filter.requested, false);
13466        let clean =
13467            apply_clean_filter_with_attributes(&clean_filter.config, &checks, git_path, &body)?;
13468        let oid = clean_filtered_oid_for_status(
13469            format,
13470            &body,
13471            clean,
13472            index_entry.oid,
13473            index_entry.size,
13474            &metadata,
13475        )?;
13476        return Ok(Some(TrackedEntry { mode, oid }));
13477    };
13478    let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
13479    Ok(Some(TrackedEntry { mode, oid }))
13480}
13481
13482fn clean_filtered_oid_for_status(
13483    format: ObjectFormat,
13484    raw_body: &[u8],
13485    clean_body: Vec<u8>,
13486    index_oid: ObjectId,
13487    index_size: u32,
13488    metadata: &fs::Metadata,
13489) -> Result<ObjectId> {
13490    let clean_oid = EncodedObject::new(ObjectType::Blob, clean_body).object_id(format)?;
13491    if clean_oid == index_oid && index_size != index_size_from_metadata(metadata) {
13492        return EncodedObject::new(ObjectType::Blob, raw_body.to_vec()).object_id(format);
13493    }
13494    Ok(clean_oid)
13495}
13496
13497struct TrackedOnlyCleanFilter {
13498    config: GitConfig,
13499    matcher: AttributeMatcher,
13500    requested: Vec<Vec<u8>>,
13501    attribute_dirs: BTreeSet<Vec<u8>>,
13502}
13503
13504impl TrackedOnlyCleanFilter {
13505    fn read_attributes_for_path(&mut self, worktree_root: &Path, git_path: &[u8]) -> Result<()> {
13506        self.read_attribute_dir(worktree_root, &[])?;
13507        let mut prefix = Vec::new();
13508        let mut parts = git_path.split(|byte| *byte == b'/').peekable();
13509        while let Some(part) = parts.next() {
13510            if parts.peek().is_none() {
13511                break;
13512            }
13513            if !prefix.is_empty() {
13514                prefix.push(b'/');
13515            }
13516            prefix.extend_from_slice(part);
13517            self.read_attribute_dir(worktree_root, &prefix)?;
13518        }
13519        Ok(())
13520    }
13521
13522    fn read_attribute_dir(&mut self, worktree_root: &Path, git_path: &[u8]) -> Result<()> {
13523        if !self.attribute_dirs.insert(git_path.to_vec()) {
13524            return Ok(());
13525        }
13526        let dir = if git_path.is_empty() {
13527            worktree_root.to_path_buf()
13528        } else {
13529            worktree_root.join(repo_path_to_os_path(git_path)?)
13530        };
13531        read_dir_attribute_patterns(worktree_root, &dir, &mut self.matcher)
13532    }
13533}
13534
13535fn tracked_only_clean_filter<'a>(
13536    clean_filter: &'a mut Option<TrackedOnlyCleanFilter>,
13537    worktree_root: &Path,
13538    git_dir: &Path,
13539) -> &'a mut TrackedOnlyCleanFilter {
13540    if clean_filter.is_none() {
13541        *clean_filter = Some(TrackedOnlyCleanFilter {
13542            config: sley_config::read_repo_config(git_dir, None).unwrap_or_default(),
13543            matcher: AttributeMatcher::from_worktree_base(worktree_root),
13544            requested: filter_attribute_names(),
13545            attribute_dirs: BTreeSet::new(),
13546        });
13547    }
13548    clean_filter
13549        .as_mut()
13550        .expect("tracked-only clean filter initialized")
13551}
13552
13553fn tracked_only_clean_filter_with_config<'a>(
13554    clean_filter: &'a mut Option<TrackedOnlyCleanFilter>,
13555    worktree_root: &Path,
13556    config: &GitConfig,
13557) -> &'a mut TrackedOnlyCleanFilter {
13558    if clean_filter.is_none() {
13559        *clean_filter = Some(TrackedOnlyCleanFilter {
13560            config: config.clone(),
13561            matcher: AttributeMatcher::from_worktree_base(worktree_root),
13562            requested: filter_attribute_names(),
13563            attribute_dirs: BTreeSet::new(),
13564        });
13565    }
13566    clean_filter
13567        .as_mut()
13568        .expect("tracked-only clean filter initialized")
13569}
13570
13571struct WorktreeEntriesWalk<'a> {
13572    git_dir: &'a Path,
13573    format: ObjectFormat,
13574    config: &'a GitConfig,
13575    matcher: &'a mut AttributeMatcher,
13576    requested: &'a [Vec<u8>],
13577    stat_cache: Option<&'a IndexStatCache>,
13578    tracked_paths: Option<&'a BTreeSet<Vec<u8>>>,
13579    ignores: Option<&'a mut IgnoreMatcher>,
13580    entries: &'a mut BTreeMap<Vec<u8>, TrackedEntry>,
13581    /// Dirt masks for tracked gitlink paths whose submodule worktree is dirty.
13582    submodule_dirt: &'a mut BTreeMap<Vec<u8>, u8>,
13583    tracked_presence: &'a mut HashSet<Vec<u8>>,
13584    record_clean_tracked: bool,
13585}
13586
13587impl WorktreeEntriesWalk<'_> {
13588    fn mark_tracked_present(&mut self, git_path: &[u8]) {
13589        self.tracked_presence.insert(git_path.to_vec());
13590    }
13591
13592    fn tracked_entry_for(&self, git_path: &[u8]) -> Option<TrackedEntry> {
13593        self.stat_cache
13594            .and_then(|cache| cache.tracked_entry(git_path))
13595    }
13596
13597    fn should_record_tracked_entry(&self, git_path: &[u8], entry: &TrackedEntry) -> bool {
13598        self.record_clean_tracked
13599            || self
13600                .tracked_entry_for(git_path)
13601                .is_none_or(|tracked| tracked != *entry)
13602    }
13603}
13604
13605fn git_path_append_component(parent: &[u8], component: &std::ffi::OsStr) -> Vec<u8> {
13606    let component = os_str_component_bytes(component);
13607    let separator = usize::from(!parent.is_empty());
13608    let mut path = Vec::with_capacity(parent.len() + separator + component.len());
13609    if !parent.is_empty() {
13610        path.extend_from_slice(parent);
13611        path.push(b'/');
13612    }
13613    path.extend_from_slice(component.as_ref());
13614    path
13615}
13616
13617fn git_path_push_component(path: &mut Vec<u8>, component: &std::ffi::OsStr) -> usize {
13618    let original_len = path.len();
13619    let component = os_str_component_bytes(component);
13620    if !path.is_empty() {
13621        path.push(b'/');
13622    }
13623    path.extend_from_slice(component.as_ref());
13624    original_len
13625}
13626
13627#[cfg(unix)]
13628fn os_str_component_bytes(component: &std::ffi::OsStr) -> Cow<'_, [u8]> {
13629    use std::os::unix::ffi::OsStrExt;
13630
13631    Cow::Borrowed(component.as_bytes())
13632}
13633
13634#[cfg(not(unix))]
13635fn os_str_component_bytes(component: &std::ffi::OsStr) -> Cow<'_, [u8]> {
13636    Cow::Owned(component.to_string_lossy().into_owned().into_bytes())
13637}
13638
13639fn collect_worktree_entries(
13640    context: &mut WorktreeEntriesWalk<'_>,
13641    dir: &Path,
13642    dir_git_path: &[u8],
13643) -> Result<()> {
13644    if is_same_path(dir, context.git_dir) {
13645        return Ok(());
13646    }
13647    // Fold this directory's `.gitattributes` into the matcher before processing its
13648    // files, so lookups for files here (and below) see it. This is what lets the
13649    // walk read the tree once instead of doing a separate full-tree attribute pass.
13650    read_dir_attribute_patterns_for_base(dir, dir_git_path, context.matcher)?;
13651    if let Some(ignores) = context.ignores.as_deref_mut() {
13652        read_dir_ignore_patterns_for_base(dir, dir_git_path, ignores)?;
13653    }
13654    let mut dir_entries = fs::read_dir(dir)?.collect::<std::result::Result<Vec<_>, _>>()?;
13655    dir_entries.sort_by_key(|entry| entry.file_name());
13656    for entry in dir_entries {
13657        let file_name = entry.file_name();
13658        let path = entry.path();
13659        if is_dot_git_entry(&path) {
13660            continue;
13661        }
13662        if is_same_path(&path, context.git_dir) {
13663            continue;
13664        }
13665        let metadata = entry.metadata()?;
13666        let git_path = git_path_append_component(dir_git_path, &file_name);
13667        if context
13668            .ignores
13669            .as_ref()
13670            .is_some_and(|ignores| ignores.is_ignored(&git_path, metadata.is_dir()))
13671        {
13672            if metadata.is_dir()
13673                && context.tracked_paths.is_some_and(|tracked_paths| {
13674                    tracked_paths_may_contain(tracked_paths, &git_path)
13675                })
13676            {
13677                collect_worktree_entries(context, &path, &git_path)?;
13678            }
13679            continue;
13680        }
13681        if metadata.is_dir() {
13682            // A directory staged as a gitlink (mode 160000) is opaque: the walk
13683            // never descends into it. Its worktree "content" is the commit the
13684            // embedded repository has checked out (upstream ce_compare_gitlink):
13685            // a populated submodule reports its HEAD (plus a dirt mask when its
13686            // own tree has modified/untracked content); an unpopulated
13687            // directory — no repository, or no commit checked out — always
13688            // matches the staged oid.
13689            if let Some(index_entry) = context
13690                .stat_cache
13691                .and_then(|cache| cache.gitlink_entry(&git_path))
13692            {
13693                context.mark_tracked_present(&git_path);
13694                let oid = sley_diff_merge::gitlink_head_oid(&path, context.format)
13695                    .unwrap_or(index_entry.oid);
13696                let dirt = submodule_dirt(&path);
13697                if dirt != 0 {
13698                    context.submodule_dirt.insert(git_path.clone(), dirt);
13699                }
13700                let tracked = TrackedEntry {
13701                    mode: sley_index::GITLINK_MODE,
13702                    oid,
13703                };
13704                if dirt != 0 || context.should_record_tracked_entry(&git_path, &tracked) {
13705                    context.entries.insert(git_path, tracked);
13706                }
13707                continue;
13708            }
13709            if is_nested_repository_boundary(&path) {
13710                if let Some(tracked_paths) = context.tracked_paths
13711                    && !tracked_paths_may_contain(tracked_paths, &git_path)
13712                {
13713                    continue;
13714                }
13715                context.entries.insert(
13716                    git_path,
13717                    TrackedEntry {
13718                        mode: 0o040000,
13719                        oid: ObjectId::null(context.format),
13720                    },
13721                );
13722                continue;
13723            }
13724            if let Some(tracked_paths) = context.tracked_paths
13725                && !tracked_paths_may_contain(tracked_paths, &git_path)
13726            {
13727                continue;
13728            }
13729            collect_worktree_entries(context, &path, &git_path)?;
13730        } else if metadata.is_file() || metadata.file_type().is_symlink() {
13731            if let Some(tracked_paths) = context.tracked_paths
13732                && !tracked_paths.contains(&git_path)
13733            {
13734                continue;
13735            }
13736            let entry_mode = worktree_entry_mode(&metadata);
13737            // git's racy-git stat shortcut: when the index's cached stat proves
13738            // this file is unchanged since it was staged, reuse the staged oid
13739            // and skip the read+filter+hash entirely. `reuse_tracked_entry`
13740            // returns `Some` ONLY for a non-racy size+mtime+mode match, so a
13741            // modified file always falls through to the full hash below and is
13742            // never silently reported clean.
13743            if let Some(tracked) = context
13744                .stat_cache
13745                .and_then(|cache| cache.reuse_tracked_entry(&git_path, &metadata))
13746            {
13747                context.mark_tracked_present(&git_path);
13748                if context.record_clean_tracked {
13749                    context.entries.insert(git_path, tracked);
13750                }
13751                continue;
13752            }
13753            // A file absent from the index is untracked: status and the
13754            // index-vs-worktree diff report it by *presence* (`??` / nothing), never
13755            // by content, so computing its oid is wasted work — git never hashes
13756            // untracked files. Record presence with a null oid and skip the
13757            // read+filter+hash. Without a stat cache we cannot tell tracked from
13758            // untracked, so fall through and hash as before.
13759            if context
13760                .stat_cache
13761                .is_some_and(|cache| !cache.contains(&git_path))
13762            {
13763                context.entries.insert(
13764                    git_path,
13765                    TrackedEntry {
13766                        mode: entry_mode,
13767                        oid: ObjectId::null(context.format),
13768                    },
13769                );
13770                continue;
13771            }
13772            let body = if metadata.file_type().is_symlink() {
13773                // The blob for a symlink is the raw link target; clean filters
13774                // never apply because git treats symlink content as opaque.
13775                symlink_target_bytes(&path)?
13776            } else {
13777                let body = fs::read(&path)?;
13778                // Resolve this path's attributes against the prebuilt matcher (a cheap
13779                // pattern match) and apply the clean filter -- no per-file matcher
13780                // rebuild. With no attributes/autocrlf configured this is an exact
13781                // passthrough, so the stored OID is unchanged.
13782                let checks =
13783                    context
13784                        .matcher
13785                        .attributes_for_path(&git_path, context.requested, false);
13786                let clean =
13787                    apply_clean_filter_with_attributes(context.config, &checks, &git_path, &body)?;
13788                let oid = match context
13789                    .stat_cache
13790                    .and_then(|cache| cache.index_entry(&git_path))
13791                {
13792                    Some(index_entry) => clean_filtered_oid_for_status(
13793                        context.format,
13794                        &body,
13795                        clean,
13796                        index_entry.oid,
13797                        index_entry.size,
13798                        &metadata,
13799                    )?,
13800                    None => EncodedObject::new(ObjectType::Blob, clean).object_id(context.format)?,
13801                };
13802                let tracked = TrackedEntry {
13803                    mode: entry_mode,
13804                    oid,
13805                };
13806                if context
13807                    .stat_cache
13808                    .is_some_and(|cache| cache.contains(&git_path))
13809                {
13810                    context.mark_tracked_present(&git_path);
13811                    if context.should_record_tracked_entry(&git_path, &tracked) {
13812                        context.entries.insert(git_path, tracked);
13813                    }
13814                } else {
13815                    context.entries.insert(git_path, tracked);
13816                }
13817                continue;
13818            };
13819            let oid = EncodedObject::new(ObjectType::Blob, body).object_id(context.format)?;
13820            let tracked = TrackedEntry {
13821                mode: entry_mode,
13822                oid,
13823            };
13824            if context
13825                .stat_cache
13826                .is_some_and(|cache| cache.contains(&git_path))
13827            {
13828                context.mark_tracked_present(&git_path);
13829                if context.should_record_tracked_entry(&git_path, &tracked) {
13830                    context.entries.insert(git_path, tracked);
13831                }
13832            } else {
13833                context.entries.insert(git_path, tracked);
13834            }
13835        }
13836    }
13837    Ok(())
13838}
13839
13840fn tracked_paths_may_contain(tracked_paths: &BTreeSet<Vec<u8>>, directory: &[u8]) -> bool {
13841    if tracked_paths.contains(directory) {
13842        return true;
13843    }
13844    let mut prefix = Vec::with_capacity(directory.len() + 1);
13845    prefix.extend_from_slice(directory);
13846    prefix.push(b'/');
13847    tracked_paths
13848        .range::<[u8], _>((
13849            std::ops::Bound::Included(prefix.as_slice()),
13850            std::ops::Bound::Unbounded,
13851        ))
13852        .next()
13853        .is_some_and(|path| path.starts_with(&prefix))
13854}
13855
13856fn is_same_path(left: &Path, right: &Path) -> bool {
13857    left == right
13858}
13859
13860/// Whether `path`'s final component is `.git`. Git never lists a `.git` entry at
13861/// any depth (a repository's own `.git`, a submodule gitlink file, or an embedded
13862/// repository's `.git` directory) as untracked content.
13863fn is_dot_git_entry(path: &Path) -> bool {
13864    path.file_name() == Some(std::ffi::OsStr::new(".git"))
13865}
13866
13867/// Whether `path` is a directory containing an embedded repository's `.git`
13868/// *directory*, or a `.git` file whose `gitdir:` pointer resolves to an
13869/// existing directory (a submodule worktree). Git treats both as a repository
13870/// boundary (listing the directory as `dir/`); an *invalid* `.git` file (no
13871/// resolvable `gitdir:` target) is not a boundary — Git descends into the
13872/// directory and lists its other untracked contents normally.
13873fn is_nested_repository_boundary(path: &Path) -> bool {
13874    if path.join(".git").is_dir() {
13875        return true;
13876    }
13877    sley_diff_merge::gitlink_git_dir(path).is_some()
13878}
13879
13880/// Whether `path` is an embedded repository's `.git` directory or a path inside it.
13881fn is_embedded_git_internals(root: &Path, path: &Path) -> bool {
13882    let Ok(relative) = path.strip_prefix(root) else {
13883        return false;
13884    };
13885    let mut current = root.to_path_buf();
13886    for component in relative.components() {
13887        if matches!(component, std::path::Component::Normal(name) if name == ".git")
13888            && current != root
13889            && current.join(".git").is_dir()
13890        {
13891            return true;
13892        }
13893        current.push(component);
13894    }
13895    false
13896}
13897
13898fn worktree_entry_mode(metadata: &fs::Metadata) -> u32 {
13899    if metadata.file_type().is_symlink() {
13900        0o120000
13901    } else if metadata.is_dir() {
13902        0o040000
13903    } else {
13904        file_mode(metadata)
13905    }
13906}
13907
13908fn worktree_path(root: &Path, path: &[u8]) -> Result<PathBuf> {
13909    let text = std::str::from_utf8(path).map_err(|err| GitError::InvalidPath(err.to_string()))?;
13910    let relative = PathBuf::from(text);
13911    if relative.is_absolute()
13912        || relative.components().any(|component| {
13913            matches!(
13914                component,
13915                std::path::Component::ParentDir | std::path::Component::Prefix(_)
13916            )
13917        })
13918    {
13919        return Err(GitError::InvalidPath(format!(
13920            "invalid worktree path {text}"
13921        )));
13922    }
13923    Ok(root.join(relative))
13924}
13925
13926fn remove_worktree_file(root: &Path, path: &[u8]) -> Result<()> {
13927    let file = worktree_path(root, path)?;
13928    if !file.exists() {
13929        return Ok(());
13930    }
13931    if file.is_dir() {
13932        // A tracked path that is a directory on disk is a gitlink: upstream
13933        // checkout/reset never recurses into a submodule's working tree. It
13934        // rmdirs the path when empty (remove_scheduled_dirs) and leaves a
13935        // populated submodule in place.
13936        match fs::remove_dir(&file) {
13937            Ok(()) => prune_empty_parents(root, file.parent())?,
13938            Err(err) if err.kind() == std::io::ErrorKind::DirectoryNotEmpty => {}
13939            Err(err) => return Err(err.into()),
13940        }
13941        return Ok(());
13942    }
13943    fs::remove_file(&file)?;
13944    prune_empty_parents(root, file.parent())?;
13945    Ok(())
13946}
13947
13948fn prune_empty_parents(root: &Path, mut dir: Option<&Path>) -> Result<()> {
13949    while let Some(path) = dir {
13950        if path == root {
13951            break;
13952        }
13953        match fs::remove_dir(path) {
13954            Ok(()) => dir = path.parent(),
13955            Err(err) if err.kind() == std::io::ErrorKind::NotFound => dir = path.parent(),
13956            Err(err) if err.kind() == std::io::ErrorKind::DirectoryNotEmpty => break,
13957            Err(err) => return Err(err.into()),
13958        }
13959    }
13960    Ok(())
13961}
13962
13963fn git_tree_entry_cmp(
13964    left_name: &[u8],
13965    left_mode: u32,
13966    right_name: &[u8],
13967    right_mode: u32,
13968) -> Ordering {
13969    let shared = left_name.len().min(right_name.len());
13970    let name_order = left_name[..shared].cmp(&right_name[..shared]);
13971    if name_order != Ordering::Equal {
13972        return name_order;
13973    }
13974    let left_end = left_name.len() == shared;
13975    let right_end = right_name.len() == shared;
13976    match (left_end, right_end) {
13977        (true, true) => Ordering::Equal,
13978        (true, false) => tree_name_terminator(left_mode).cmp(&right_name[shared]),
13979        (false, true) => left_name[shared].cmp(&tree_name_terminator(right_mode)),
13980        (false, false) => Ordering::Equal,
13981    }
13982}
13983
13984fn tree_name_terminator(mode: u32) -> u8 {
13985    if mode == 0o040000 { b'/' } else { 0 }
13986}
13987
13988#[cfg(unix)]
13989fn file_mode(metadata: &fs::Metadata) -> u32 {
13990    use std::os::unix::fs::PermissionsExt;
13991    if metadata.permissions().mode() & 0o111 != 0 {
13992        0o100755
13993    } else {
13994        0o100644
13995    }
13996}
13997
13998#[cfg(not(unix))]
13999fn file_mode(_metadata: &fs::Metadata) -> u32 {
14000    0o100644
14001}
14002
14003/// The blob content git stores for a symlink: the raw bytes of the link target
14004/// exactly as `readlink(2)` returns them. On Unix the target is an opaque byte
14005/// string, so we take the `OsStr` bytes verbatim (no UTF-8 round-trip, no path
14006/// re-componentization that could rewrite separators).
14007#[cfg(unix)]
14008fn symlink_target_bytes(path: &Path) -> Result<Vec<u8>> {
14009    use std::os::unix::ffi::OsStrExt;
14010    let target = fs::read_link(path)?;
14011    Ok(target.as_os_str().as_bytes().to_vec())
14012}
14013
14014#[cfg(not(unix))]
14015fn symlink_target_bytes(path: &Path) -> Result<Vec<u8>> {
14016    let target = fs::read_link(path)?;
14017    // git normalizes symlink targets to forward slashes on platforms whose
14018    // native separator is `\`.
14019    Ok(target.to_string_lossy().replace('\\', "/").into_bytes())
14020}
14021
14022fn git_path_bytes(path: &Path) -> Result<Vec<u8>> {
14023    if path.components().any(|component| {
14024        matches!(
14025            component,
14026            std::path::Component::ParentDir | std::path::Component::Prefix(_)
14027        )
14028    }) {
14029        return Err(GitError::InvalidPath(format!(
14030            "invalid index path {}",
14031            path.display()
14032        )));
14033    }
14034    Ok(path
14035        .components()
14036        .filter_map(|component| match component {
14037            std::path::Component::Normal(value) => Some(value.to_string_lossy().into_owned()),
14038            _ => None,
14039        })
14040        .collect::<Vec<_>>()
14041        .join("/")
14042        .into_bytes())
14043}
14044
14045fn repo_path_to_os_path(path: &[u8]) -> Result<PathBuf> {
14046    #[cfg(unix)]
14047    {
14048        use std::os::unix::ffi::OsStrExt;
14049
14050        Ok(PathBuf::from(std::ffi::OsStr::from_bytes(path)))
14051    }
14052
14053    #[cfg(not(unix))]
14054    {
14055        let path = std::str::from_utf8(path)
14056            .map_err(|_| GitError::InvalidPath("index path is not utf8".into()))?;
14057        Ok(path.split('/').collect())
14058    }
14059}
14060
14061fn git_path_to_relative_path(path: &[u8]) -> Result<PathBuf> {
14062    let path = std::str::from_utf8(path)
14063        .map_err(|err| GitError::InvalidPath(format!("invalid utf-8 index path: {err}")))?;
14064    Ok(path.split('/').collect())
14065}
14066
14067fn path_has_trailing_separator(path: &Path) -> bool {
14068    path.as_os_str()
14069        .to_string_lossy()
14070        .ends_with(std::path::MAIN_SEPARATOR)
14071}
14072
14073#[cfg(test)]
14074mod tests {
14075    use super::*;
14076    use sley_odb::ObjectReader;
14077    use std::sync::atomic::{AtomicU64, Ordering};
14078
14079    static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
14080
14081    fn short_status(
14082        worktree_root: impl AsRef<Path>,
14083        git_dir: impl AsRef<Path>,
14084        format: ObjectFormat,
14085    ) -> Result<Vec<ShortStatusEntry>> {
14086        let mut entries = Vec::new();
14087        stream_short_status(worktree_root, git_dir, format, |entry| {
14088            entries.push(entry.to_owned_entry());
14089            Ok(StreamControl::Continue)
14090        })?;
14091        Ok(entries)
14092    }
14093
14094    #[test]
14095    fn atomic_metadata_writer_writes_and_reports_stat() {
14096        let root = temp_root();
14097        let path = root.join(".git").join("HEAD");
14098
14099        let result = write_metadata_file_atomic(
14100            &path,
14101            b"ref: refs/heads/main\n",
14102            AtomicMetadataWriteOptions::default(),
14103        )
14104        .expect("write metadata");
14105
14106        assert_eq!(
14107            fs::read(&path).expect("read metadata"),
14108            b"ref: refs/heads/main\n"
14109        );
14110        assert_eq!(result.path, path);
14111        assert_eq!(result.len, b"ref: refs/heads/main\n".len() as u64);
14112        assert!(result.mtime.is_some());
14113        assert!(!path.with_file_name("HEAD.lock").exists());
14114        fs::remove_dir_all(root).expect("test operation should succeed");
14115    }
14116
14117    #[test]
14118    fn atomic_metadata_writer_existing_lock_preserves_original() {
14119        let root = temp_root();
14120        let git_dir = root.join(".git");
14121        fs::create_dir_all(&git_dir).expect("create git dir");
14122        let path = git_dir.join("HEAD");
14123        let lock = git_dir.join("HEAD.lock");
14124        fs::write(&path, b"ref: refs/heads/main\n").expect("write original");
14125        fs::write(&lock, b"held\n").expect("write lock");
14126
14127        let err = write_metadata_file_atomic(
14128            &path,
14129            b"ref: refs/heads/other\n",
14130            AtomicMetadataWriteOptions::default(),
14131        )
14132        .expect_err("held lock must fail");
14133
14134        assert!(matches!(err, GitError::Transaction(_)));
14135        assert_eq!(
14136            fs::read(&path).expect("read original"),
14137            b"ref: refs/heads/main\n"
14138        );
14139        assert_eq!(fs::read(&lock).expect("read lock"), b"held\n");
14140        fs::remove_dir_all(root).expect("test operation should succeed");
14141    }
14142
14143    // --- `ls-files --eol` stat/attr helpers (mirror convert.c) ---------------
14144
14145    #[test]
14146    fn convert_stats_ascii_classifies_eol_content() {
14147        assert_eq!(convert_stats_ascii(b""), "none");
14148        assert_eq!(convert_stats_ascii(b"abc"), "none");
14149        assert_eq!(convert_stats_ascii(b"a\nb\n"), "lf");
14150        assert_eq!(convert_stats_ascii(b"a\r\nb\r\n"), "crlf");
14151        assert_eq!(convert_stats_ascii(b"a\r\nb\n"), "mixed");
14152        // A lone CR makes the content binary (-text), matching git.
14153        assert_eq!(convert_stats_ascii(b"a\rb"), "-text");
14154        // A NUL byte is binary.
14155        assert_eq!(convert_stats_ascii(b"a\0b\n"), "-text");
14156        // A trailing ^Z (EOF) is not counted as non-printable.
14157        assert_eq!(convert_stats_ascii(b"abc\n\x1a"), "lf");
14158    }
14159
14160    fn attr_check(name: &[u8], state: Option<AttributeState>) -> AttributeCheck {
14161        AttributeCheck {
14162            attribute: name.to_vec(),
14163            state,
14164        }
14165    }
14166
14167    #[test]
14168    fn convert_attr_ascii_matches_git_attr_action() {
14169        // No attributes at all: empty attr field.
14170        assert_eq!(convert_attr_ascii(&[]), "");
14171        // text (set) -> "text"; -text (unset) -> "-text".
14172        assert_eq!(
14173            convert_attr_ascii(&[attr_check(b"text", Some(AttributeState::Set))]),
14174            "text"
14175        );
14176        assert_eq!(
14177            convert_attr_ascii(&[attr_check(b"text", Some(AttributeState::Unset))]),
14178            "-text"
14179        );
14180        // text=auto -> "text=auto"; with eol=crlf/lf the AUTO variants.
14181        assert_eq!(
14182            convert_attr_ascii(&[attr_check(
14183                b"text",
14184                Some(AttributeState::Value(b"auto".to_vec()))
14185            )]),
14186            "text=auto"
14187        );
14188        assert_eq!(
14189            convert_attr_ascii(&[
14190                attr_check(b"text", Some(AttributeState::Value(b"auto".to_vec()))),
14191                attr_check(b"eol", Some(AttributeState::Value(b"crlf".to_vec()))),
14192            ]),
14193            "text=auto eol=crlf"
14194        );
14195        assert_eq!(
14196            convert_attr_ascii(&[
14197                attr_check(b"text", Some(AttributeState::Value(b"auto".to_vec()))),
14198                attr_check(b"eol", Some(AttributeState::Value(b"lf".to_vec()))),
14199            ]),
14200            "text=auto eol=lf"
14201        );
14202        // eol=crlf/lf alone (no text) forces text + the eol direction.
14203        assert_eq!(
14204            convert_attr_ascii(&[attr_check(
14205                b"eol",
14206                Some(AttributeState::Value(b"crlf".to_vec()))
14207            )]),
14208            "text eol=crlf"
14209        );
14210        assert_eq!(
14211            convert_attr_ascii(&[attr_check(
14212                b"eol",
14213                Some(AttributeState::Value(b"lf".to_vec()))
14214            )]),
14215            "text eol=lf"
14216        );
14217        // -text overrides any eol attribute (binary wins).
14218        assert_eq!(
14219            convert_attr_ascii(&[
14220                attr_check(b"text", Some(AttributeState::Unset)),
14221                attr_check(b"eol", Some(AttributeState::Value(b"crlf".to_vec()))),
14222            ]),
14223            "-text"
14224        );
14225    }
14226
14227    #[test]
14228    fn smudge_safety_guard_skips_irreversible_autocrlf() {
14229        // text=auto eol=crlf (AUTO_CRLF): convert pure-LF, but leave content
14230        // alone when it already has a CR or CRLF, or is binary.
14231        let auto = ContentFilterPlan {
14232            text: TextDecision::Auto,
14233            eol: EolConversion::Crlf,
14234            driver: None,
14235        };
14236        assert!(auto.will_convert_lf_to_crlf(b"a\nb\n"));
14237        assert!(!auto.will_convert_lf_to_crlf(b"a\r\nb\n")); // has CRLF
14238        assert!(!auto.will_convert_lf_to_crlf(b"a\nb\rc")); // lone CR (binary)
14239        assert!(!auto.will_convert_lf_to_crlf(b"abc")); // no naked LF
14240
14241        // text eol=crlf (TEXT_CRLF): no safety guard — always convert naked LF
14242        // even when a CR/CRLF is already present.
14243        let text = ContentFilterPlan {
14244            text: TextDecision::Text,
14245            eol: EolConversion::Crlf,
14246            driver: None,
14247        };
14248        assert!(text.will_convert_lf_to_crlf(b"a\r\nb\nc\n"));
14249        assert!(!text.will_convert_lf_to_crlf(b"a\r\nb\r\n")); // no naked LF
14250    }
14251
14252    /// Build an in-memory ignore matcher from raw `.gitignore` lines (no disk).
14253    fn ignore_matcher(patterns: &[&[u8]]) -> IgnoreMatcher {
14254        let mut matcher = IgnoreMatcher::default();
14255        let owned: Vec<Vec<u8>> = patterns.iter().map(|p| p.to_vec()).collect();
14256        matcher.extend_patterns(&owned);
14257        matcher
14258    }
14259
14260    #[test]
14261    fn ignore_match_kind_fast_paths_match_the_wildcard_engine() {
14262        // Literal: exact basename anywhere; not a superstring.
14263        let matcher = ignore_matcher(&[b"Pods"]);
14264        assert!(matcher.is_ignored(b"a/b/Pods", true));
14265        assert!(matcher.is_ignored(b"Pods", false));
14266        assert!(!matcher.is_ignored(b"Pods_not", false));
14267        assert!(matches!(
14268            classify_ignore_pattern(b"Pods"),
14269            MatchKind::Literal
14270        ));
14271
14272        // Suffix `*.log`: basename ending in `.log` at any depth.
14273        let matcher = ignore_matcher(&[b"*.log"]);
14274        assert!(matcher.is_ignored(b"x.log", false));
14275        assert!(matcher.is_ignored(b"a/b/x.log", false));
14276        assert!(matcher.is_ignored(b".log", false));
14277        assert!(!matcher.is_ignored(b"x.logx", false));
14278        assert!(matches!(
14279            classify_ignore_pattern(b"*.log"),
14280            MatchKind::Suffix
14281        ));
14282
14283        // Prefix `build*`: basename starting with `build`.
14284        let matcher = ignore_matcher(&[b"build*"]);
14285        assert!(matcher.is_ignored(b"buildfoo", false));
14286        assert!(matcher.is_ignored(b"a/build", false));
14287        assert!(!matcher.is_ignored(b"xbuild", false));
14288        assert!(matches!(
14289            classify_ignore_pattern(b"build*"),
14290            MatchKind::Prefix
14291        ));
14292    }
14293
14294    #[test]
14295    fn ignore_anchored_suffix_does_not_cross_slash() {
14296        // `/*.log` is anchored: matches `.log` files only at the matcher base,
14297        // never in a subdirectory — the slash guard in `match_segment`.
14298        let matcher = ignore_matcher(&[b"/*.log"]);
14299        assert!(matcher.is_ignored(b"x.log", false));
14300        assert!(!matcher.is_ignored(b"sub/x.log", false));
14301
14302        // Anchored literal likewise only matches at root.
14303        let matcher = ignore_matcher(&[b"/foo"]);
14304        assert!(matcher.is_ignored(b"foo", false));
14305        assert!(!matcher.is_ignored(b"a/foo", false));
14306    }
14307
14308    #[test]
14309    fn ignore_anchored_directory_glob_matches_root_directory() {
14310        let matcher = ignore_matcher(&[b"/tmp-*/"]);
14311        assert!(matcher.is_ignored(b"tmp-info-only", true));
14312        assert!(matcher.is_ignored(b"tmp-info-only/file.txt", false));
14313        assert!(!matcher.is_ignored(b"nested/tmp-info-only", true));
14314        assert!(!matcher.is_ignored(b"tmp-info-only", false));
14315    }
14316
14317    #[test]
14318    fn ignore_negated_directory_glob_does_not_reinclude_files() {
14319        // t0008-ignores "directories and ** matches": a negated directory-only
14320        // pattern re-includes *directories* but never the *files* inside them
14321        // (git: re-including a dir with `!dir/` still needs an explicit
14322        // `!dir/*` to reach its files). Verified against git 2.54 check-ignore:
14323        //   data/file              -> data/**           (ignored)
14324        //   data/data1/file1       -> data/**           (ignored, NOT !data/**/)
14325        //   data/data1/file1.txt   -> !data/**/*.txt    (re-included)
14326        //   data/data1   (dir)     -> !data/**/         (re-included)
14327        let matcher = ignore_matcher(&[b"data/**", b"!data/**/", b"!data/**/*.txt"]);
14328        // Files stay ignored: `!data/**/` must not win the file leaf scan.
14329        assert!(matcher.is_ignored(b"data/file", false));
14330        assert!(matcher.is_ignored(b"data/data1/file1", false));
14331        assert!(matcher.is_ignored(b"data/data2/file2", false));
14332        // `.txt` files are re-included by the explicit non-dir negation.
14333        assert!(!matcher.is_ignored(b"data/data1/file1.txt", false));
14334        assert!(!matcher.is_ignored(b"data/data2/file2.txt", false));
14335        // Directories ARE re-included by `!data/**/` (the directory-glob gain
14336        // from `fix: match git status ignored directory globs`).
14337        assert!(!matcher.is_ignored(b"data/data1", true));
14338        assert!(!matcher.is_ignored(b"data/data2", true));
14339    }
14340
14341    #[test]
14342    fn ignore_double_star_prefix_collapses_to_basename() {
14343        // `**/X` ≡ `X` for slash-free X (verified against `git check-ignore`).
14344        let matcher = ignore_matcher(&[b"**/Pods"]);
14345        assert!(matcher.is_ignored(b"a/b/Pods", true));
14346        assert!(matcher.is_ignored(b"Pods", true));
14347        assert!(!matcher.is_ignored(b"Pods_not", false));
14348
14349        let matcher = ignore_matcher(&[b"**/*.jks"]);
14350        assert!(matcher.is_ignored(b"x.jks", false));
14351        assert!(matcher.is_ignored(b"a/deep/y.jks", false));
14352        assert!(!matcher.is_ignored(b"x.jksx", false));
14353
14354        // `**/A/B` keeps a slash in the tail, so it stays a real glob and must
14355        // match the trailing path at any depth.
14356        let matcher = ignore_matcher(&[b"**/Flutter/ephemeral"]);
14357        assert!(matcher.is_ignored(b"Flutter/ephemeral", true));
14358        assert!(matcher.is_ignored(b"a/Flutter/ephemeral", true));
14359        assert!(!matcher.is_ignored(b"Flutter/other", true));
14360        assert!(matches!(
14361            classify_ignore_pattern(b"**/Flutter/ephemeral"),
14362            MatchKind::PathSuffix
14363        ));
14364    }
14365
14366    #[test]
14367    fn ignore_slash_glob_literal_basename_bucket_preserves_matches() {
14368        let matcher = ignore_matcher(&[b"**/android/**/GeneratedPluginRegistrant.java"]);
14369        assert!(
14370            matcher
14371                .buckets
14372                .glob_path_literal_basename
14373                .contains_key(b"GeneratedPluginRegistrant.java".as_slice())
14374        );
14375        assert!(matcher.is_ignored(
14376            b"packages/app/android/src/GeneratedPluginRegistrant.java",
14377            false
14378        ));
14379        assert!(matcher.is_ignored(
14380            b"android/app/src/main/java/io/flutter/GeneratedPluginRegistrant.java",
14381            false
14382        ));
14383        assert!(!matcher.is_ignored(b"android/app/src/main/java/io/flutter/Other.java", false));
14384
14385        let matcher = ignore_matcher(&[b"**/ios/**/Pods/"]);
14386        assert!(
14387            matcher
14388                .buckets
14389                .glob_directory_literal_basename
14390                .contains_key(b"Pods".as_slice())
14391        );
14392        assert!(matcher.is_ignored(b"ios/Runner/Pods", true));
14393        assert!(matcher.is_ignored(b"dev/app/ios/Runner/Pods/Manifest.lock", false));
14394        assert!(!matcher.is_ignored(b"dev/app/ios/Runner/Podfile", false));
14395
14396        let matcher = ignore_matcher(&[b"**/ios/**/*.mode1v3"]);
14397        assert!(
14398            !matcher.buckets.glob_path_suffix_basename.is_empty(),
14399            "suffix-final slash glob should be prefiltered by basename suffix"
14400        );
14401        assert!(matcher.is_ignored(b"apps/ios/Runner/default.mode1v3", false));
14402        assert!(!matcher.is_ignored(b"apps/ios/Runner/default.mode2v3", false));
14403
14404        let matcher = ignore_matcher(&[b"**/ios/Runner/GeneratedPluginRegistrant.*"]);
14405        assert!(
14406            !matcher.buckets.glob_path_prefix_basename.is_empty(),
14407            "prefix-final slash glob should be prefiltered by basename prefix"
14408        );
14409        assert!(matcher.is_ignored(b"apps/ios/Runner/GeneratedPluginRegistrant.swift", false));
14410        assert!(!matcher.is_ignored(
14411            b"apps/ios/Runner/OtherGeneratedPluginRegistrant.swift",
14412            false
14413        ));
14414
14415        let matcher = ignore_matcher(&[b"ios/Scenarios/*.framework/"]);
14416        assert!(
14417            !matcher.buckets.glob_directory_suffix_basename.is_empty(),
14418            "directory suffix-final slash glob should be prefiltered by directory component"
14419        );
14420        assert!(matcher.is_ignored(b"ios/Scenarios/App.framework", true));
14421        assert!(matcher.is_ignored(b"ios/Scenarios/App.framework/Info.plist", false));
14422        assert!(!matcher.is_ignored(b"ios/Scenarios/App.xcframework/Info.plist", false));
14423    }
14424
14425    #[test]
14426    fn ignore_complex_globs_still_use_the_engine() {
14427        let matcher = ignore_matcher(&[b"*.[Cc]ache"]);
14428        assert!(matcher.is_ignored(b"x.cache", false));
14429        assert!(matcher.is_ignored(b"x.Cache", false));
14430        assert!(!matcher.is_ignored(b"x.xache", false));
14431        assert!(matches!(
14432            classify_ignore_pattern(b"*.[Cc]ache"),
14433            MatchKind::Glob
14434        ));
14435
14436        let matcher = ignore_matcher(&[b"Icon?"]);
14437        assert!(matcher.is_ignored(b"IconA", false));
14438        assert!(!matcher.is_ignored(b"Icon", false));
14439        assert!(!matcher.is_ignored(b"IconAB", false));
14440
14441        // Multi-star is not a simple prefix/suffix.
14442        assert!(matches!(
14443            classify_ignore_pattern(b"app.*.symbols"),
14444            MatchKind::Glob
14445        ));
14446        assert!(matches!(classify_ignore_pattern(b"a*b*c"), MatchKind::Glob));
14447
14448        let matcher = ignore_matcher(&[b".vscode/*", b"dev/devicelab/ABresults*.json"]);
14449        assert!(matcher.is_ignored(b".vscode/settings.json", false));
14450        assert!(!matcher.is_ignored(b"pkg/.vscode/settings.json", false));
14451        assert!(matcher.is_ignored(b"dev/devicelab/ABresults-1.json", false));
14452        assert!(!matcher.is_ignored(b"dev/devicelab/results-1.json", false));
14453    }
14454
14455    #[test]
14456    fn ignore_negation_still_applies_after_fast_paths() {
14457        // Last match wins: a negated literal un-ignores a suffix-matched file.
14458        let matcher = ignore_matcher(&[b"*.log", b"!keep.log"]);
14459        assert!(matcher.is_ignored(b"a/x.log", false));
14460        assert!(!matcher.is_ignored(b"a/keep.log", false));
14461    }
14462
14463    #[test]
14464    fn read_expected_object_missing_blob_exposes_oid_and_kind() {
14465        let root = temp_root();
14466        let git_dir = root.join(".git");
14467        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
14468        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
14469        let missing = ObjectId::empty_blob(ObjectFormat::Sha1);
14470
14471        let err = read_expected_object(&db, &missing, ObjectType::Blob)
14472            .expect_err("missing blob should error");
14473        let kind = err.not_found_kind().expect("typed not found");
14474        assert_eq!(kind.object_id(), Some(missing));
14475        assert_eq!(kind.missing_object_kind(), Some(MissingObjectKind::Blob));
14476        assert_eq!(
14477            kind.missing_object_context(),
14478            Some(MissingObjectContext::WorktreeMaterialize)
14479        );
14480        fs::remove_dir_all(root).expect("test operation should succeed");
14481    }
14482
14483    #[test]
14484    fn update_index_adds_file_entry_and_blob() {
14485        let root = temp_root();
14486        let git_dir = root.join(".git");
14487        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
14488        fs::write(root.join("hello.txt"), b"hello\n").expect("test operation should succeed");
14489        let result = add_paths_to_index(
14490            &root,
14491            &git_dir,
14492            ObjectFormat::Sha1,
14493            &[PathBuf::from("hello.txt")],
14494        )
14495        .expect("test operation should succeed");
14496        assert_eq!(result.entries, 1);
14497        let index = Index::parse_v2_sha1(
14498            &fs::read(repository_index_path(git_dir)).expect("test operation should succeed"),
14499        )
14500        .expect("test operation should succeed");
14501        assert_eq!(index.entries[0].path, b"hello.txt");
14502        fs::remove_dir_all(root).expect("test operation should succeed");
14503    }
14504
14505    #[test]
14506    fn update_index_and_write_tree_support_sha256() {
14507        let root = temp_root();
14508        let git_dir = root.join(".git");
14509        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
14510        fs::write(root.join("hello.txt"), b"hello\n").expect("test operation should succeed");
14511        let result = add_paths_to_index(
14512            &root,
14513            &git_dir,
14514            ObjectFormat::Sha256,
14515            &[PathBuf::from("hello.txt")],
14516        )
14517        .expect("test operation should succeed");
14518        assert_eq!(result.entries, 1);
14519
14520        let index = Index::parse(
14521            &fs::read(repository_index_path(&git_dir)).expect("test operation should succeed"),
14522            ObjectFormat::Sha256,
14523        )
14524        .expect("test operation should succeed");
14525        assert_eq!(index.entries[0].path, b"hello.txt");
14526        assert_eq!(index.entries[0].oid.format(), ObjectFormat::Sha256);
14527
14528        let tree_oid = write_tree_from_index(&git_dir, ObjectFormat::Sha256)
14529            .expect("test operation should succeed");
14530        assert_eq!(tree_oid.format(), ObjectFormat::Sha256);
14531        let odb = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
14532        let tree = odb
14533            .read_object(&tree_oid)
14534            .expect("test operation should succeed");
14535        assert_eq!(tree.object_type, ObjectType::Tree);
14536        fs::remove_dir_all(root).expect("test operation should succeed");
14537    }
14538
14539    #[test]
14540    fn write_tree_from_index_writes_nested_tree_objects() {
14541        let root = temp_root();
14542        let git_dir = root.join(".git");
14543        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
14544        fs::create_dir_all(root.join("src")).expect("test operation should succeed");
14545        fs::write(root.join("README.md"), b"readme\n").expect("test operation should succeed");
14546        fs::write(root.join("src").join("lib.rs"), b"pub fn demo() {}\n")
14547            .expect("test operation should succeed");
14548        let result = add_paths_to_index(
14549            &root,
14550            &git_dir,
14551            ObjectFormat::Sha1,
14552            &[PathBuf::from("README.md"), PathBuf::from("src/lib.rs")],
14553        )
14554        .expect("test operation should succeed");
14555        assert_eq!(result.entries, 2);
14556        let tree_oid = write_tree_from_index(&git_dir, ObjectFormat::Sha1)
14557            .expect("test operation should succeed");
14558        let odb = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
14559        let tree = odb
14560            .read_object(&tree_oid)
14561            .expect("test operation should succeed");
14562        assert_eq!(tree.object_type, ObjectType::Tree);
14563        fs::remove_dir_all(root).expect("test operation should succeed");
14564    }
14565
14566    #[test]
14567    fn short_status_reports_added_and_untracked_paths() {
14568        let root = temp_root();
14569        let git_dir = root.join(".git");
14570        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
14571        fs::write(root.join("hello.txt"), b"hello\n").expect("test operation should succeed");
14572        fs::write(root.join("extra.txt"), b"extra\n").expect("test operation should succeed");
14573        add_paths_to_index(
14574            &root,
14575            &git_dir,
14576            ObjectFormat::Sha1,
14577            &[PathBuf::from("hello.txt")],
14578        )
14579        .expect("test operation should succeed");
14580        let status = short_status(&root, &git_dir, ObjectFormat::Sha1)
14581            .expect("test operation should succeed");
14582        assert_eq!(
14583            status
14584                .iter()
14585                .map(ShortStatusEntry::line)
14586                .collect::<Vec<_>>(),
14587            vec!["A  hello.txt", "?? extra.txt"]
14588        );
14589        fs::remove_dir_all(root).expect("test operation should succeed");
14590    }
14591
14592    #[test]
14593    fn worktree_root_is_none_for_bare_repository() {
14594        // A bare git_dir (basename `.git`) with `core.bare = true` must resolve to
14595        // `Ok(None)` rather than falling through to the "parent of .git" case.
14596        let root = temp_root();
14597        let git_dir = root.join(".git");
14598        fs::create_dir_all(&git_dir).expect("create bare git dir");
14599        // Hermetic minimal config — do not depend on host gitconfig.
14600        fs::write(git_dir.join("config"), b"[core]\n\tbare = true\n").expect("write bare config");
14601
14602        assert_eq!(
14603            worktree_root_for_git_dir(&git_dir).expect("resolve bare worktree root"),
14604            None,
14605            "a bare repository has no working tree"
14606        );
14607
14608        fs::remove_dir_all(root).expect("test operation should succeed");
14609    }
14610
14611    #[test]
14612    fn worktree_root_is_parent_for_non_bare_dot_git() {
14613        // A non-bare `.git` directory (no core.bare / core.bare = false) still
14614        // resolves to its parent — the ordinary non-bare layout.
14615        let root = temp_root();
14616        let work = root.join("work");
14617        let git_dir = work.join(".git");
14618        fs::create_dir_all(&git_dir).expect("create non-bare git dir");
14619        fs::write(git_dir.join("config"), b"[core]\n\tbare = false\n")
14620            .expect("write non-bare config");
14621
14622        assert_eq!(
14623            worktree_root_for_git_dir(&git_dir).expect("resolve non-bare worktree root"),
14624            Some(work.clone()),
14625            "a non-bare .git dir resolves to its parent"
14626        );
14627
14628        fs::remove_dir_all(root).expect("test operation should succeed");
14629    }
14630
14631    fn temp_root() -> PathBuf {
14632        let path = std::env::temp_dir().join(format!(
14633            "sley-worktree-{}-{}",
14634            std::process::id(),
14635            TEMP_COUNTER.fetch_add(1, Ordering::Relaxed)
14636        ));
14637        fs::create_dir_all(&path).expect("test operation should succeed");
14638        path
14639    }
14640
14641    fn index_entry_for<'a>(index: &'a Index, path: &[u8]) -> &'a IndexEntry {
14642        index
14643            .entries
14644            .iter()
14645            .find(|entry| entry.path == path)
14646            .unwrap_or_else(|| panic!("missing index entry for {}", String::from_utf8_lossy(path)))
14647    }
14648
14649    fn read_index(git_dir: &Path) -> Index {
14650        Index::parse(
14651            &fs::read(repository_index_path(git_dir)).expect("test operation should succeed"),
14652            ObjectFormat::Sha1,
14653        )
14654        .expect("test operation should succeed")
14655    }
14656
14657    /// Stages `paths` from the worktree, writes their tree, wraps it in a commit
14658    /// object, and points `refs/heads/main` + `HEAD` at it. Returns the commit
14659    /// id. After this call the index reflects the committed tree.
14660    fn build_commit(root: &Path, git_dir: &Path, paths: &[&str]) -> ObjectId {
14661        let path_bufs = paths.iter().map(PathBuf::from).collect::<Vec<_>>();
14662        add_paths_to_index(root, git_dir, ObjectFormat::Sha1, &path_bufs)
14663            .expect("test operation should succeed");
14664        let tree = write_tree_from_index(git_dir, ObjectFormat::Sha1)
14665            .expect("test operation should succeed");
14666        let mut body = Vec::new();
14667        body.extend_from_slice(format!("tree {tree}\n").as_bytes());
14668        body.extend_from_slice(b"author Test <test@example.com> 0 +0000\n");
14669        body.extend_from_slice(b"committer Test <test@example.com> 0 +0000\n");
14670        body.extend_from_slice(b"\n");
14671        body.extend_from_slice(b"sparse fixture\n");
14672        let odb = FileObjectDatabase::from_git_dir(git_dir, ObjectFormat::Sha1);
14673        let commit = odb
14674            .write_object(EncodedObject::new(ObjectType::Commit, body))
14675            .expect("test operation should succeed");
14676        let refs = FileRefStore::new(git_dir, ObjectFormat::Sha1);
14677        let mut tx = refs.transaction();
14678        tx.update(RefUpdate {
14679            name: "refs/heads/main".into(),
14680            expected: None,
14681            new: RefTarget::Direct(commit),
14682            reflog: None,
14683        });
14684        tx.update(RefUpdate {
14685            name: "HEAD".into(),
14686            expected: None,
14687            new: RefTarget::Symbolic("refs/heads/main".into()),
14688            reflog: None,
14689        });
14690        tx.commit().expect("test operation should succeed");
14691        commit
14692    }
14693
14694    fn full_sparse(patterns: &[&[u8]]) -> SparseCheckout {
14695        SparseCheckout {
14696            patterns: patterns.iter().map(|pattern| pattern.to_vec()).collect(),
14697            sparse_index: false,
14698        }
14699    }
14700
14701    #[test]
14702    fn apply_sparse_checkout_full_mode_skips_out_of_cone_paths() {
14703        let root = temp_root();
14704        let git_dir = root.join(".git");
14705        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
14706        fs::create_dir_all(root.join("in")).expect("test operation should succeed");
14707        fs::create_dir_all(root.join("out")).expect("test operation should succeed");
14708        fs::write(root.join("in").join("keep.txt"), b"keep\n")
14709            .expect("test operation should succeed");
14710        fs::write(root.join("out").join("drop.txt"), b"drop\n")
14711            .expect("test operation should succeed");
14712        fs::write(root.join("top.txt"), b"top\n").expect("test operation should succeed");
14713        build_commit(&root, &git_dir, &["in/keep.txt", "out/drop.txt", "top.txt"]);
14714
14715        // Full (non-cone) pattern: keep only the `in/` subtree.
14716        let sparse = full_sparse(&[b"/in/"]);
14717        let result = apply_sparse_checkout_with_mode(
14718            &root,
14719            &git_dir,
14720            ObjectFormat::Sha1,
14721            &sparse,
14722            SparseCheckoutMode::Full,
14723        )
14724        .expect("test operation should succeed");
14725
14726        assert!(root.join("in").join("keep.txt").exists());
14727        assert!(!root.join("out").join("drop.txt").exists());
14728        assert!(!root.join("top.txt").exists());
14729        assert!(result.materialized.contains(&b"in/keep.txt".to_vec()));
14730        assert!(result.skipped.contains(&b"out/drop.txt".to_vec()));
14731        assert!(result.skipped.contains(&b"top.txt".to_vec()));
14732
14733        let index = read_index(&git_dir);
14734        assert!(!index_entry_skip_worktree(index_entry_for(
14735            &index,
14736            b"in/keep.txt"
14737        )));
14738        assert!(index_entry_skip_worktree(index_entry_for(
14739            &index,
14740            b"out/drop.txt"
14741        )));
14742        assert!(index_entry_skip_worktree(index_entry_for(
14743            &index, b"top.txt"
14744        )));
14745        // Out-of-cone entries are preserved in the index, just not on disk.
14746        assert_eq!(index.entries.len(), 3);
14747        fs::remove_dir_all(root).expect("test operation should succeed");
14748    }
14749
14750    #[test]
14751    fn apply_sparse_checkout_toggle_rematerializes() {
14752        let root = temp_root();
14753        let git_dir = root.join(".git");
14754        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
14755        fs::create_dir_all(root.join("a")).expect("test operation should succeed");
14756        fs::create_dir_all(root.join("b")).expect("test operation should succeed");
14757        fs::write(root.join("a").join("file.txt"), b"a\n").expect("test operation should succeed");
14758        fs::write(root.join("b").join("file.txt"), b"b\n").expect("test operation should succeed");
14759        build_commit(&root, &git_dir, &["a/file.txt", "b/file.txt"]);
14760
14761        // First narrow to `a/`.
14762        apply_sparse_checkout_with_mode(
14763            &root,
14764            &git_dir,
14765            ObjectFormat::Sha1,
14766            &full_sparse(&[b"/a/"]),
14767            SparseCheckoutMode::Full,
14768        )
14769        .expect("test operation should succeed");
14770        assert!(root.join("a").join("file.txt").exists());
14771        assert!(!root.join("b").join("file.txt").exists());
14772        let index = read_index(&git_dir);
14773        assert!(index_entry_skip_worktree(index_entry_for(
14774            &index,
14775            b"b/file.txt"
14776        )));
14777
14778        // Now switch the cone to `b/`: `a/` must leave, `b/` must come back with
14779        // the correct content, and the skip-worktree bits must flip.
14780        apply_sparse_checkout_with_mode(
14781            &root,
14782            &git_dir,
14783            ObjectFormat::Sha1,
14784            &full_sparse(&[b"/b/"]),
14785            SparseCheckoutMode::Full,
14786        )
14787        .expect("test operation should succeed");
14788        assert!(!root.join("a").join("file.txt").exists());
14789        assert!(root.join("b").join("file.txt").exists());
14790        assert_eq!(
14791            fs::read(root.join("b").join("file.txt")).expect("test operation should succeed"),
14792            b"b\n"
14793        );
14794        let index = read_index(&git_dir);
14795        assert!(index_entry_skip_worktree(index_entry_for(
14796            &index,
14797            b"a/file.txt"
14798        )));
14799        assert!(!index_entry_skip_worktree(index_entry_for(
14800            &index,
14801            b"b/file.txt"
14802        )));
14803        fs::remove_dir_all(root).expect("test operation should succeed");
14804    }
14805
14806    #[test]
14807    fn apply_sparse_checkout_cone_mode_matches_directory_prefixes() {
14808        let root = temp_root();
14809        let git_dir = root.join(".git");
14810        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
14811        fs::create_dir_all(root.join("kept").join("nested"))
14812            .expect("test operation should succeed");
14813        fs::create_dir_all(root.join("other")).expect("test operation should succeed");
14814        fs::write(root.join("kept").join("a.txt"), b"a\n").expect("test operation should succeed");
14815        fs::write(root.join("kept").join("nested").join("b.txt"), b"b\n")
14816            .expect("test operation should succeed");
14817        fs::write(root.join("other").join("c.txt"), b"c\n").expect("test operation should succeed");
14818        fs::write(root.join("root.txt"), b"r\n").expect("test operation should succeed");
14819        build_commit(
14820            &root,
14821            &git_dir,
14822            &["kept/a.txt", "kept/nested/b.txt", "other/c.txt", "root.txt"],
14823        );
14824
14825        // Standard cone patterns: top-level files plus the whole `kept/` tree.
14826        let sparse = SparseCheckout {
14827            patterns: vec![b"/*".to_vec(), b"!/*/".to_vec(), b"/kept/".to_vec()],
14828            sparse_index: false,
14829        };
14830        // Auto mode should detect cone shape on its own.
14831        assert!(patterns_are_cone(&sparse.patterns));
14832        apply_sparse_checkout(&root, &git_dir, ObjectFormat::Sha1, &sparse)
14833            .expect("test operation should succeed");
14834
14835        assert!(root.join("root.txt").exists());
14836        assert!(root.join("kept").join("a.txt").exists());
14837        assert!(root.join("kept").join("nested").join("b.txt").exists());
14838        assert!(!root.join("other").join("c.txt").exists());
14839
14840        let index = read_index(&git_dir);
14841        assert!(!index_entry_skip_worktree(index_entry_for(
14842            &index,
14843            b"root.txt"
14844        )));
14845        assert!(!index_entry_skip_worktree(index_entry_for(
14846            &index,
14847            b"kept/a.txt"
14848        )));
14849        assert!(!index_entry_skip_worktree(index_entry_for(
14850            &index,
14851            b"kept/nested/b.txt"
14852        )));
14853        assert!(index_entry_skip_worktree(index_entry_for(
14854            &index,
14855            b"other/c.txt"
14856        )));
14857        fs::remove_dir_all(root).expect("test operation should succeed");
14858    }
14859
14860    #[test]
14861    fn apply_sparse_checkout_honors_preexisting_skip_worktree_via_idempotence() {
14862        let root = temp_root();
14863        let git_dir = root.join(".git");
14864        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
14865        fs::create_dir_all(root.join("in")).expect("test operation should succeed");
14866        fs::create_dir_all(root.join("out")).expect("test operation should succeed");
14867        fs::write(root.join("in").join("keep.txt"), b"keep\n")
14868            .expect("test operation should succeed");
14869        fs::write(root.join("out").join("drop.txt"), b"drop\n")
14870            .expect("test operation should succeed");
14871        build_commit(&root, &git_dir, &["in/keep.txt", "out/drop.txt"]);
14872
14873        let sparse = full_sparse(&[b"/in/"]);
14874        apply_sparse_checkout_with_mode(
14875            &root,
14876            &git_dir,
14877            ObjectFormat::Sha1,
14878            &sparse,
14879            SparseCheckoutMode::Full,
14880        )
14881        .expect("test operation should succeed");
14882        assert!(!root.join("out").join("drop.txt").exists());
14883
14884        // Re-applying the same spec is a no-op: the already-skipped file stays
14885        // absent and the bit stays set (we do not resurrect it).
14886        let result = apply_sparse_checkout_with_mode(
14887            &root,
14888            &git_dir,
14889            ObjectFormat::Sha1,
14890            &sparse,
14891            SparseCheckoutMode::Full,
14892        )
14893        .expect("test operation should succeed");
14894        assert!(!root.join("out").join("drop.txt").exists());
14895        assert!(root.join("in").join("keep.txt").exists());
14896        assert!(result.skipped.contains(&b"out/drop.txt".to_vec()));
14897        let index = read_index(&git_dir);
14898        assert!(index_entry_skip_worktree(index_entry_for(
14899            &index,
14900            b"out/drop.txt"
14901        )));
14902        fs::remove_dir_all(root).expect("test operation should succeed");
14903    }
14904
14905    #[test]
14906    fn checkout_detached_sparse_only_writes_in_cone_paths() {
14907        let root = temp_root();
14908        let git_dir = root.join(".git");
14909        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
14910        fs::create_dir_all(root.join("keep")).expect("test operation should succeed");
14911        fs::create_dir_all(root.join("skip")).expect("test operation should succeed");
14912        fs::write(root.join("keep").join("a.txt"), b"a\n").expect("test operation should succeed");
14913        fs::write(root.join("skip").join("b.txt"), b"b\n").expect("test operation should succeed");
14914        let commit = build_commit(&root, &git_dir, &["keep/a.txt", "skip/b.txt"]);
14915
14916        // The worktree is clean and matches the commit. A sparse checkout must
14917        // keep the in-cone file and evict the out-of-cone one.
14918        let sparse = full_sparse(&[b"/keep/"]);
14919        let result = checkout_detached_sparse(
14920            &root,
14921            &git_dir,
14922            ObjectFormat::Sha1,
14923            &commit,
14924            b"Test <test@example.com> 0 +0000".to_vec(),
14925            b"checkout".to_vec(),
14926            &sparse,
14927        )
14928        .expect("test operation should succeed");
14929        assert_eq!(result.files, 2);
14930
14931        assert!(root.join("keep").join("a.txt").exists());
14932        assert_eq!(
14933            fs::read(root.join("keep").join("a.txt")).expect("test operation should succeed"),
14934            b"a\n"
14935        );
14936        assert!(!root.join("skip").join("b.txt").exists());
14937
14938        let index = read_index(&git_dir);
14939        assert_eq!(index.entries.len(), 2);
14940        assert!(!index_entry_skip_worktree(index_entry_for(
14941            &index,
14942            b"keep/a.txt"
14943        )));
14944        let skipped = index_entry_for(&index, b"skip/b.txt");
14945        assert!(index_entry_skip_worktree(skipped));
14946        // The skipped entry still carries the committed blob id and mode.
14947        assert_eq!(skipped.mode, 0o100644);
14948        fs::remove_dir_all(root).expect("test operation should succeed");
14949    }
14950
14951    // ----- content filtering: EOL / autocrlf + clean/smudge drivers -----
14952
14953    /// Build a [`GitConfig`] from raw config text.
14954    fn config_from(text: &str) -> GitConfig {
14955        GitConfig::parse(text.as_bytes()).expect("test operation should succeed")
14956    }
14957
14958    /// Conformance grid for git's `output_eol(crlf_action)` decision table
14959    /// (convert.c) on the smudge side, exercised across the same
14960    /// attr × autocrlf × eol × content matrix as upstream t0027/t0026.
14961    ///
14962    /// Each row asserts the smudge output for a representative content shape.
14963    /// The cases that historically under-converted are the non-`auto` `text`
14964    /// paths (the auto-only safety guard must NOT fire) and the
14965    /// `autocrlf=true overrides core.eol` precedence rows.
14966    #[test]
14967    fn smudge_output_eol_decision_table() {
14968        // Naked-LF-only blob (the canonical "should gain CRLF" case).
14969        const LF: &[u8] = b"a\nb\nc\n";
14970        // Mixed CRLF + naked LF: a non-auto crlf action converts the naked LFs
14971        // to CRLF (whole file becomes CRLF); an auto action leaves it untouched.
14972        const CRLF_MIX_LF: &[u8] = b"a\r\nb\nc\r\n";
14973        // Naked LF plus a lone CR: non-auto converts LFs, keeping the lone CR.
14974        const LF_MIX_CR: &[u8] = b"a\nb\rc\n";
14975
14976        let smudge = |cfg: &str, attrline: Option<&[u8]>, input: &[u8]| -> Vec<u8> {
14977            let config = config_from(cfg);
14978            let checks = match attrline {
14979                Some(line) => {
14980                    let mut matcher = AttributeMatcher::default();
14981                    read_attribute_patterns_from_bytes(line, &mut matcher, &[]);
14982                    matcher.attributes_for_path(b"f.txt", &filter_attribute_names(), false)
14983                }
14984                None => Vec::new(),
14985            };
14986            apply_smudge_filter_with_attributes(&config, &checks, b"f.txt", input)
14987                .expect("smudge must succeed")
14988        };
14989
14990        // --- attr=text (CRLF_TEXT_*): non-auto, the safety guard must not fire.
14991        // text + eol=crlf => CRLF_TEXT_CRLF: every naked LF gains CR.
14992        let attr_text_crlf: &[u8] = b"*.txt text eol=crlf";
14993        for cfg in [
14994            "[core]\n\tautocrlf = false\n\teol = lf\n",
14995            "[core]\n\tautocrlf = false\n\teol = crlf\n",
14996            "[core]\n\tautocrlf = true\n\teol = lf\n",
14997            "[core]\n\tautocrlf = input\n",
14998        ] {
14999            assert_eq!(
15000                smudge(cfg, Some(attr_text_crlf), LF),
15001                b"a\r\nb\r\nc\r\n",
15002                "text eol=crlf must add CR to naked LF (cfg={cfg:?})"
15003            );
15004            assert_eq!(
15005                smudge(cfg, Some(attr_text_crlf), CRLF_MIX_LF),
15006                b"a\r\nb\r\nc\r\n",
15007                "text eol=crlf must convert mixed content fully (cfg={cfg:?})"
15008            );
15009            assert_eq!(
15010                smudge(cfg, Some(attr_text_crlf), LF_MIX_CR),
15011                b"a\r\nb\rc\r\n",
15012                "text eol=crlf keeps the lone CR but adds CR to naked LF (cfg={cfg:?})"
15013            );
15014        }
15015
15016        // --- attr=text, no eol attr: CRLF_TEXT, resolved by text_eol_is_crlf().
15017        // autocrlf=true wins over core.eol=lf (the precedence fix).
15018        assert_eq!(
15019            smudge(
15020                "[core]\n\tautocrlf = true\n\teol = lf\n",
15021                Some(b"*.txt text"),
15022                LF
15023            ),
15024            b"a\r\nb\r\nc\r\n",
15025            "autocrlf=true must override core.eol=lf for plain text attr"
15026        );
15027        // autocrlf unset, core.eol=crlf => CRLF.
15028        assert_eq!(
15029            smudge("[core]\n\teol = crlf\n", Some(b"*.txt text"), LF),
15030            b"a\r\nb\r\nc\r\n",
15031            "core.eol=crlf adds CR to naked LF for plain text attr"
15032        );
15033        // autocrlf unset, core.eol=lf (and native LF on this host) => no CR.
15034        assert_eq!(
15035            smudge("[core]\n\teol = lf\n", Some(b"*.txt text"), LF),
15036            LF,
15037            "core.eol=lf leaves naked LF untouched on smudge"
15038        );
15039        // text + autocrlf=input => CRLF_TEXT_INPUT: no CR on smudge.
15040        assert_eq!(
15041            smudge("[core]\n\tautocrlf = input\n", Some(b"*.txt text"), LF),
15042            LF,
15043            "autocrlf=input overrides core.eol; no CR on smudge"
15044        );
15045
15046        // --- attr=text=auto (CRLF_AUTO_*): the safety guard DOES fire.
15047        // auto + autocrlf=true + naked-LF-only => convert.
15048        assert_eq!(
15049            smudge("[core]\n\tautocrlf = true\n", Some(b"*.txt text=auto"), LF),
15050            b"a\r\nb\r\nc\r\n",
15051            "text=auto converts a clean naked-LF file"
15052        );
15053        // auto + already has a CR/CRLF => leave untouched (irreversible guard).
15054        assert_eq!(
15055            smudge(
15056                "[core]\n\tautocrlf = true\n",
15057                Some(b"*.txt text=auto"),
15058                CRLF_MIX_LF
15059            ),
15060            CRLF_MIX_LF,
15061            "text=auto must not touch content that already has CRLF"
15062        );
15063        assert_eq!(
15064            smudge(
15065                "[core]\n\tautocrlf = true\n",
15066                Some(b"*.txt text=auto"),
15067                LF_MIX_CR
15068            ),
15069            LF_MIX_CR,
15070            "text=auto must not touch content that already has a lone CR"
15071        );
15072
15073        // --- no attr, autocrlf=true => CRLF_AUTO_CRLF (auto guard applies).
15074        assert_eq!(
15075            smudge("[core]\n\tautocrlf = true\n\teol = lf\n", None, LF),
15076            b"a\r\nb\r\nc\r\n",
15077            "autocrlf=true (no attr) converts clean naked-LF and overrides core.eol=lf"
15078        );
15079        // --- no attr, autocrlf=false => CRLF_BINARY: never convert.
15080        assert_eq!(
15081            smudge("[core]\n\teol = crlf\n", None, LF),
15082            LF,
15083            "no attr + autocrlf=false leaves content untouched even with core.eol=crlf"
15084        );
15085        // --- -text (CRLF_BINARY): never convert regardless of config.
15086        assert_eq!(
15087            smudge("[core]\n\tautocrlf = true\n", Some(b"*.txt -text"), LF),
15088            LF,
15089            "-text is binary: never convert"
15090        );
15091    }
15092
15093    /// Resolve attribute checks against an on-disk `.gitattributes` in `root`.
15094    fn attrs(root: &Path, path: &[u8]) -> Vec<AttributeCheck> {
15095        filter_attribute_checks(root, path).expect("test operation should succeed")
15096    }
15097
15098    #[test]
15099    fn standard_attribute_matcher_matches_per_path_lookup() {
15100        let root = temp_root();
15101        fs::create_dir_all(root.join(".git").join("info")).expect("test operation should succeed");
15102        fs::create_dir_all(root.join("src").join("nested")).expect("test operation should succeed");
15103        fs::write(root.join(".gitattributes"), b"*.rs diff=rust\n")
15104            .expect("test operation should succeed");
15105        fs::write(
15106            root.join("src").join(".gitattributes"),
15107            b"*.rs diff=python\n",
15108        )
15109        .expect("test operation should succeed");
15110        fs::write(
15111            root.join(".git").join("info").join("attributes"),
15112            b"src/nested/*.rs diff=java\n",
15113        )
15114        .expect("test operation should succeed");
15115
15116        let requested = vec![b"diff".to_vec()];
15117        let path = b"src/nested/file.rs";
15118        let per_path = standard_attributes_for_path(&root, path, &requested, false)
15119            .expect("test operation should succeed");
15120        let matcher = StandardAttributeMatcher::from_worktree_root(&root)
15121            .expect("test operation should succeed");
15122        assert_eq!(
15123            matcher.attributes_for_path(path, &requested, false),
15124            per_path
15125        );
15126
15127        fs::remove_dir_all(root).expect("test operation should succeed");
15128    }
15129
15130    #[test]
15131    fn filter_attribute_lookup_reads_only_path_chain() {
15132        let root = temp_root();
15133        fs::create_dir_all(root.join(".git").join("info")).expect("test operation should succeed");
15134        fs::create_dir_all(root.join("src").join("nested")).expect("test operation should succeed");
15135        fs::create_dir_all(root.join("sibling")).expect("test operation should succeed");
15136        fs::write(root.join(".gitattributes"), b"*.txt text\n")
15137            .expect("test operation should succeed");
15138        fs::write(root.join("src").join(".gitattributes"), b"*.txt -text\n")
15139            .expect("test operation should succeed");
15140        fs::write(
15141            root.join("sibling").join(".gitattributes"),
15142            b"*.txt eol=crlf\n",
15143        )
15144        .expect("test operation should succeed");
15145        fs::write(
15146            root.join(".git").join("info").join("attributes"),
15147            b"src/nested/*.txt eol=lf\n",
15148        )
15149        .expect("test operation should succeed");
15150
15151        let path = b"src/nested/file.txt";
15152        let full = standard_attributes_for_path(&root, path, &filter_attribute_names(), false)
15153            .expect("test operation should succeed");
15154        assert_eq!(filter_attribute_checks(&root, path).unwrap(), full);
15155
15156        fs::remove_dir_all(root).expect("test operation should succeed");
15157    }
15158
15159    #[test]
15160    fn crlf_to_lf_collapses_only_pairs() {
15161        assert_eq!(
15162            convert_crlf_to_lf_cow(Cow::Borrowed(b"a\r\nb\r\n")).as_ref(),
15163            b"a\nb\n"
15164        );
15165        // A lone CR (no following LF) is preserved.
15166        assert_eq!(
15167            convert_crlf_to_lf_cow(Cow::Borrowed(b"a\rb")).as_ref(),
15168            b"a\rb"
15169        );
15170        // An already-LF stream is unchanged.
15171        assert!(matches!(
15172            convert_crlf_to_lf_cow(Cow::Borrowed(b"a\nb\n")),
15173            Cow::Borrowed(_)
15174        ));
15175    }
15176
15177    #[test]
15178    fn lf_to_crlf_does_not_double_convert() {
15179        assert_eq!(convert_lf_to_crlf(b"a\nb\n"), b"a\r\nb\r\n");
15180        // Existing CRLF is left intact (no extra CR added).
15181        assert_eq!(convert_lf_to_crlf(b"a\r\nb\r\n"), b"a\r\nb\r\n");
15182    }
15183
15184    #[test]
15185    fn autocrlf_round_trip_clean_then_smudge() {
15186        // autocrlf=true: worktree CRLF -> blob LF on clean, blob LF -> worktree
15187        // CRLF on smudge.
15188        let config = config_from("[core]\n\tautocrlf = true\n");
15189        let checks: Vec<AttributeCheck> = Vec::new();
15190        let worktree = b"line1\r\nline2\r\n";
15191        let blob = apply_clean_filter_with_attributes(&config, &checks, b"file.txt", worktree)
15192            .expect("test operation should succeed");
15193        assert_eq!(blob, b"line1\nline2\n", "clean must normalize CRLF to LF");
15194        let restored = apply_smudge_filter_with_attributes(&config, &checks, b"file.txt", &blob)
15195            .expect("test operation should succeed");
15196        assert_eq!(
15197            restored, worktree,
15198            "smudge must restore CRLF from the LF blob"
15199        );
15200    }
15201
15202    #[test]
15203    fn conv_flags_from_config_matches_git_defaults() {
15204        // Unset core.safecrlf defaults to WARN (git's global_conv_flags_eol).
15205        assert_eq!(ConvFlags::from_config(&config_from("")), ConvFlags::Warn);
15206        assert_eq!(
15207            ConvFlags::from_config(&config_from("[core]\n\tsafecrlf = warn\n")),
15208            ConvFlags::Warn
15209        );
15210        assert_eq!(
15211            ConvFlags::from_config(&config_from("[core]\n\tsafecrlf = WARN\n")),
15212            ConvFlags::Warn
15213        );
15214        assert_eq!(
15215            ConvFlags::from_config(&config_from("[core]\n\tsafecrlf = true\n")),
15216            ConvFlags::Die
15217        );
15218        assert_eq!(
15219            ConvFlags::from_config(&config_from("[core]\n\tsafecrlf = false\n")),
15220            ConvFlags::Off
15221        );
15222    }
15223
15224    #[test]
15225    fn safecrlf_warn_does_not_change_clean_bytes() {
15226        // The warning is purely additive: byte output is identical whether
15227        // safecrlf is off or warn.
15228        let config = config_from("[core]\n\tautocrlf = true\n");
15229        let checks: Vec<AttributeCheck> = Vec::new();
15230        let worktree = b"a\nb\nc\n";
15231        let plain = apply_clean_filter_with_attributes(&config, &checks, b"f.txt", worktree)
15232            .expect("clean");
15233        let warned = apply_clean_filter_with_attributes_cow_safecrlf(
15234            &config,
15235            &checks,
15236            b"f.txt",
15237            worktree,
15238            ConvFlags::Warn,
15239            SafeCrlfIndexBlob::None,
15240        )
15241        .expect("clean with safecrlf")
15242        .into_owned();
15243        assert_eq!(plain, warned, "safecrlf must not alter the cleaned bytes");
15244    }
15245
15246    #[test]
15247    fn safecrlf_die_errors_on_lf_to_crlf_round_trip() {
15248        // autocrlf=true on a pure-LF file: checkout would add CRLF, so the
15249        // round-trip is irreversible and safecrlf=true dies (exit 128).
15250        let config = config_from("[core]\n\tautocrlf = true\n");
15251        let checks: Vec<AttributeCheck> = Vec::new();
15252        let err = apply_clean_filter_with_attributes_cow_safecrlf(
15253            &config,
15254            &checks,
15255            b"f.txt",
15256            b"a\nb\n",
15257            ConvFlags::Die,
15258            SafeCrlfIndexBlob::None,
15259        )
15260        .expect_err("die must error");
15261        assert!(matches!(err, GitError::Exit(128)));
15262    }
15263
15264    #[test]
15265    fn safecrlf_die_errors_on_crlf_to_lf_round_trip() {
15266        // autocrlf=input on a CRLF file: clean strips CRLF and checkout never
15267        // restores it, so safecrlf=true dies.
15268        let config = config_from("[core]\n\tautocrlf = input\n");
15269        let checks: Vec<AttributeCheck> = Vec::new();
15270        let err = apply_clean_filter_with_attributes_cow_safecrlf(
15271            &config,
15272            &checks,
15273            b"f.txt",
15274            b"a\r\nb\r\n",
15275            ConvFlags::Die,
15276            SafeCrlfIndexBlob::None,
15277        )
15278        .expect_err("die must error");
15279        assert!(matches!(err, GitError::Exit(128)));
15280    }
15281
15282    #[test]
15283    fn safecrlf_reversible_round_trip_does_not_warn_or_die() {
15284        // A CRLF file under autocrlf=true survives the round trip (clean to LF,
15285        // smudge back to CRLF), so even safecrlf=true is silent.
15286        let config = config_from("[core]\n\tautocrlf = true\n");
15287        let checks: Vec<AttributeCheck> = Vec::new();
15288        let out = apply_clean_filter_with_attributes_cow_safecrlf(
15289            &config,
15290            &checks,
15291            b"f.txt",
15292            b"a\r\nb\r\n",
15293            ConvFlags::Die,
15294            SafeCrlfIndexBlob::None,
15295        )
15296        .expect("reversible round trip must not die");
15297        assert_eq!(out.as_ref(), b"a\nb\n");
15298    }
15299
15300    #[test]
15301    fn safecrlf_binary_content_is_silent() {
15302        // autocrlf=true with NUL-containing (binary) content: no conversion and
15303        // no warning/die, mirroring git's early-return in crlf_to_git.
15304        let config = config_from("[core]\n\tautocrlf = true\n");
15305        let checks: Vec<AttributeCheck> = Vec::new();
15306        let body: &[u8] = b"a\nb\0c\n";
15307        let out = apply_clean_filter_with_attributes_cow_safecrlf(
15308            &config,
15309            &checks,
15310            b"f.bin",
15311            body,
15312            ConvFlags::Die,
15313            SafeCrlfIndexBlob::None,
15314        )
15315        .expect("binary content must not die");
15316        assert_eq!(out.as_ref(), body, "binary content is never converted");
15317    }
15318
15319    #[test]
15320    fn safecrlf_off_is_silent_even_on_irreversible_round_trip() {
15321        let config = config_from("[core]\n\tautocrlf = true\n");
15322        let checks: Vec<AttributeCheck> = Vec::new();
15323        let out = apply_clean_filter_with_attributes_cow_safecrlf(
15324            &config,
15325            &checks,
15326            b"f.txt",
15327            b"a\nb\n",
15328            ConvFlags::Off,
15329            SafeCrlfIndexBlob::None,
15330        )
15331        .expect("safecrlf=off never errors");
15332        // autocrlf=true does not convert on clean (only smudge), so bytes pass through.
15333        assert_eq!(out.as_ref(), b"a\nb\n");
15334    }
15335
15336    #[test]
15337    fn autocrlf_input_normalizes_on_clean_but_not_smudge() {
15338        // autocrlf=input: clean normalizes to LF, smudge leaves LF as-is.
15339        let config = config_from("[core]\n\tautocrlf = input\n");
15340        let checks: Vec<AttributeCheck> = Vec::new();
15341        let blob = apply_clean_filter_with_attributes(&config, &checks, b"file.txt", b"a\r\nb\r\n")
15342            .expect("test operation should succeed");
15343        assert_eq!(blob, b"a\nb\n");
15344        let smudged = apply_smudge_filter_with_attributes(&config, &checks, b"file.txt", &blob)
15345            .expect("test operation should succeed");
15346        assert_eq!(
15347            smudged, b"a\nb\n",
15348            "input mode must not add carriage returns"
15349        );
15350    }
15351
15352    #[test]
15353    fn eol_crlf_attribute_drives_conversion_without_config() {
15354        // No core.autocrlf; the `eol=crlf` attribute alone forces conversion.
15355        let config = config_from("");
15356        let checks = vec![AttributeCheck {
15357            attribute: b"eol".to_vec(),
15358            state: Some(AttributeState::Value(b"crlf".to_vec())),
15359        }];
15360        let blob = apply_clean_filter_with_attributes(&config, &checks, b"a.txt", b"x\r\ny\r\n")
15361            .expect("test operation should succeed");
15362        assert_eq!(blob, b"x\ny\n");
15363        let smudged = apply_smudge_filter_with_attributes(&config, &checks, b"a.txt", &blob)
15364            .expect("test operation should succeed");
15365        assert_eq!(smudged, b"x\r\ny\r\n");
15366    }
15367
15368    #[test]
15369    fn binary_attribute_disables_eol_conversion() {
15370        // `-text` (binary) must leave CRLF/NUL content untouched in both
15371        // directions even when autocrlf=true.
15372        let config = config_from("[core]\n\tautocrlf = true\n");
15373        let checks = vec![AttributeCheck {
15374            attribute: b"text".to_vec(),
15375            state: Some(AttributeState::Unset),
15376        }];
15377        let content = b"\x00\x01\r\n\x02\r\n".to_vec();
15378        let blob = apply_clean_filter_with_attributes(&config, &checks, b"data.bin", &content)
15379            .expect("test operation should succeed");
15380        assert_eq!(blob, content, "binary file must not be CRLF-normalized");
15381        let smudged = apply_smudge_filter_with_attributes(&config, &checks, b"data.bin", &blob)
15382            .expect("test operation should succeed");
15383        assert_eq!(
15384            smudged, content,
15385            "binary file must not gain carriage returns"
15386        );
15387    }
15388
15389    #[test]
15390    fn autocrlf_auto_skips_binary_looking_content() {
15391        // text=auto (via autocrlf) must not convert content that contains NUL.
15392        let config = config_from("[core]\n\tautocrlf = true\n");
15393        let checks: Vec<AttributeCheck> = Vec::new();
15394        let content = b"a\r\n\x00b\r\n".to_vec();
15395        let blob = apply_clean_filter_with_attributes(&config, &checks, b"f", &content)
15396            .expect("test operation should succeed");
15397        assert_eq!(blob, content, "binary-looking content stays untouched");
15398    }
15399
15400    #[test]
15401    fn autocrlf_via_add_and_checkout_round_trips() {
15402        // End-to-end: a CRLF worktree file is stored as an LF blob by the
15403        // filtered add path, and restored as CRLF by the filtered checkout.
15404        let root = temp_root();
15405        let git_dir = root.join(".git");
15406        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
15407        let config = config_from("[core]\n\tautocrlf = true\n");
15408
15409        fs::write(root.join("crlf.txt"), b"alpha\r\nbeta\r\n")
15410            .expect("test operation should succeed");
15411        add_paths_to_index_filtered(
15412            &root,
15413            &git_dir,
15414            ObjectFormat::Sha1,
15415            &[PathBuf::from("crlf.txt")],
15416            &config,
15417        )
15418        .expect("test operation should succeed");
15419
15420        // The stored blob must be LF-normalized.
15421        let index = read_index(&git_dir);
15422        let entry = index_entry_for(&index, b"crlf.txt");
15423        let odb = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
15424        let blob = odb
15425            .read_object(&entry.oid)
15426            .expect("test operation should succeed");
15427        assert_eq!(blob.body, b"alpha\nbeta\n");
15428
15429        // Commit and point HEAD at it, then re-checkout with smudge filtering.
15430        let tree = write_tree_from_index(&git_dir, ObjectFormat::Sha1)
15431            .expect("test operation should succeed");
15432        let mut body = Vec::new();
15433        body.extend_from_slice(format!("tree {tree}\n").as_bytes());
15434        body.extend_from_slice(b"author T <t@e> 0 +0000\ncommitter T <t@e> 0 +0000\n\nm\n");
15435        let odb = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
15436        let commit = odb
15437            .write_object(EncodedObject::new(ObjectType::Commit, body))
15438            .expect("test operation should succeed");
15439        let refs = FileRefStore::new(&git_dir, ObjectFormat::Sha1);
15440        let mut tx = refs.transaction();
15441        tx.update(RefUpdate {
15442            name: "HEAD".into(),
15443            expected: None,
15444            new: RefTarget::Direct(commit),
15445            reflog: None,
15446        });
15447        tx.commit().expect("test operation should succeed");
15448
15449        // Make the worktree match the committed (LF) blob so the tree is clean
15450        // for checkout; `short_status`/`worktree_entries` compare by content
15451        // hash and are not filter-aware. Checkout will then smudge it to CRLF.
15452        fs::write(root.join("crlf.txt"), b"alpha\nbeta\n").expect("test operation should succeed");
15453        checkout_detached_filtered(
15454            &root,
15455            &git_dir,
15456            ObjectFormat::Sha1,
15457            &commit,
15458            b"T <t@e> 0 +0000".to_vec(),
15459            b"co".to_vec(),
15460            &config,
15461        )
15462        .expect("test operation should succeed");
15463        assert_eq!(
15464            fs::read(root.join("crlf.txt")).expect("test operation should succeed"),
15465            b"alpha\r\nbeta\r\n",
15466            "checkout must restore CRLF line endings"
15467        );
15468        fs::remove_dir_all(root).expect("test operation should succeed");
15469    }
15470
15471    #[test]
15472    fn driver_filter_clean_and_smudge_transform_both_directions() {
15473        // filter=case: clean upper-cases (worktree -> blob), smudge lower-cases
15474        // (blob -> worktree).
15475        let config =
15476            config_from("[filter \"case\"]\n\tclean = tr a-z A-Z\n\tsmudge = tr A-Z a-z\n");
15477        let checks = vec![AttributeCheck {
15478            attribute: b"filter".to_vec(),
15479            state: Some(AttributeState::Value(b"case".to_vec())),
15480        }];
15481        let blob = apply_clean_filter_with_attributes(&config, &checks, b"f.txt", b"Hello World")
15482            .expect("test operation should succeed");
15483        assert_eq!(blob, b"HELLO WORLD", "clean driver must upper-case");
15484        let worktree =
15485            apply_smudge_filter_with_attributes(&config, &checks, b"f.txt", b"HELLO WORLD")
15486                .expect("test operation should succeed");
15487        assert_eq!(worktree, b"hello world", "smudge driver must lower-case");
15488    }
15489
15490    #[test]
15491    fn driver_filter_resolved_from_gitattributes_file() {
15492        // The filter name is read from a real `.gitattributes`, the commands from
15493        // config; exercises the public worktree-rooted entry points.
15494        let root = temp_root();
15495        let git_dir = root.join(".git");
15496        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
15497        fs::write(root.join(".gitattributes"), b"*.dat filter=rot\n")
15498            .expect("test operation should succeed");
15499        let config =
15500            config_from("[filter \"rot\"]\n\tclean = sed s/a/b/g\n\tsmudge = sed s/b/a/g\n");
15501        // Clean reads attributes from the live worktree `.gitattributes`.
15502        let blob = apply_clean_filter(&root, &git_dir, &config, b"x.dat", b"banana")
15503            .expect("test operation should succeed");
15504        assert_eq!(blob, b"bbnbnb");
15505        // Smudge reads attributes from the index (the worktree file may not
15506        // exist yet during checkout), so stage `.gitattributes` first.
15507        add_paths_to_index(
15508            &root,
15509            &git_dir,
15510            ObjectFormat::Sha1,
15511            &[PathBuf::from(".gitattributes")],
15512        )
15513        .expect("test operation should succeed");
15514        let smudged = apply_smudge_filter(
15515            &root,
15516            &git_dir,
15517            ObjectFormat::Sha1,
15518            &config,
15519            b"x.dat",
15520            &blob,
15521        )
15522        .expect("test operation should succeed");
15523        // sed s/b/a/g is not a perfect inverse, but verifies the smudge command
15524        // ran on the blob bytes.
15525        assert_eq!(smudged, b"aanana");
15526        fs::remove_dir_all(root).expect("test operation should succeed");
15527    }
15528
15529    #[test]
15530    fn required_filter_failure_is_fatal() {
15531        // A required filter whose command fails must surface an error.
15532        let config = config_from("[filter \"boom\"]\n\tclean = false\n\trequired = true\n");
15533        let checks = vec![AttributeCheck {
15534            attribute: b"filter".to_vec(),
15535            state: Some(AttributeState::Value(b"boom".to_vec())),
15536        }];
15537        let err = apply_clean_filter_with_attributes(&config, &checks, b"f", b"data")
15538            .expect_err("required filter failure must error");
15539        assert!(matches!(err, GitError::Command(_)), "got {err:?}");
15540    }
15541
15542    #[test]
15543    fn required_filter_missing_command_is_fatal() {
15544        // required=true but no clean command for this direction is also fatal.
15545        let config = config_from("[filter \"need\"]\n\tsmudge = cat\n\trequired = true\n");
15546        let checks = vec![AttributeCheck {
15547            attribute: b"filter".to_vec(),
15548            state: Some(AttributeState::Value(b"need".to_vec())),
15549        }];
15550        let err = apply_clean_filter_with_attributes(&config, &checks, b"f", b"data")
15551            .expect_err("required filter without a clean command must error");
15552        assert!(matches!(err, GitError::Command(_)), "got {err:?}");
15553    }
15554
15555    #[test]
15556    fn non_required_filter_failure_passes_through() {
15557        // A non-required filter that fails must pass the content through
15558        // unchanged rather than erroring.
15559        let config = config_from("[filter \"opt\"]\n\tclean = false\n");
15560        let checks = vec![AttributeCheck {
15561            attribute: b"filter".to_vec(),
15562            state: Some(AttributeState::Value(b"opt".to_vec())),
15563        }];
15564        let out = apply_clean_filter_with_attributes(&config, &checks, b"f", b"keepme")
15565            .expect("test operation should succeed");
15566        assert_eq!(
15567            out, b"keepme",
15568            "optional filter failure passes content through"
15569        );
15570    }
15571
15572    #[test]
15573    fn filter_with_no_command_is_noop() {
15574        // filter=name with no configured commands and not required is ignored.
15575        let config = config_from("");
15576        let checks = vec![AttributeCheck {
15577            attribute: b"filter".to_vec(),
15578            state: Some(AttributeState::Value(b"ghost".to_vec())),
15579        }];
15580        let out = apply_clean_filter_with_attributes(&config, &checks, b"f", b"unchanged")
15581            .expect("test operation should succeed");
15582        assert_eq!(out, b"unchanged");
15583    }
15584
15585    #[test]
15586    fn driver_and_eol_compose_on_clean_and_smudge() {
15587        // filter=case + autocrlf=true: clean runs the driver then CRLF->LF;
15588        // smudge runs LF->CRLF then the driver.
15589        let config = config_from(
15590            "[core]\n\tautocrlf = true\n[filter \"case\"]\n\tclean = tr a-z A-Z\n\tsmudge = tr A-Z a-z\n",
15591        );
15592        let checks = vec![
15593            AttributeCheck {
15594                attribute: b"filter".to_vec(),
15595                state: Some(AttributeState::Value(b"case".to_vec())),
15596            },
15597            AttributeCheck {
15598                attribute: b"text".to_vec(),
15599                state: Some(AttributeState::Set),
15600            },
15601        ];
15602        let blob = apply_clean_filter_with_attributes(&config, &checks, b"f.txt", b"ab\r\ncd\r\n")
15603            .expect("test operation should succeed");
15604        assert_eq!(blob, b"AB\nCD\n", "clean: upper-case then CRLF->LF");
15605        let worktree = apply_smudge_filter_with_attributes(&config, &checks, b"f.txt", &blob)
15606            .expect("test operation should succeed");
15607        assert_eq!(
15608            worktree, b"ab\r\ncd\r\n",
15609            "smudge: LF->CRLF then lower-case"
15610        );
15611    }
15612
15613    #[test]
15614    fn attrs_helper_reads_filter_from_disk() {
15615        let root = temp_root();
15616        fs::write(root.join(".gitattributes"), b"*.txt text\n*.bin -text\n")
15617            .expect("test operation should succeed");
15618        let text = attrs(&root, b"a.txt");
15619        assert!(
15620            text.iter()
15621                .any(|c| c.attribute == b"text" && c.state == Some(AttributeState::Set))
15622        );
15623        let bin = attrs(&root, b"a.bin");
15624        assert!(
15625            bin.iter()
15626                .any(|c| c.attribute == b"text" && c.state == Some(AttributeState::Unset))
15627        );
15628        fs::remove_dir_all(root).expect("test operation should succeed");
15629    }
15630
15631    /// Builds a stat cache holding a single stage-0 entry whose size+mtime match
15632    /// `file`'s real metadata, with the index-file mtime placed strictly after
15633    /// the entry mtime so the entry reads as non-racy by default. The entry's oid
15634    /// is `oid` and its mode is `mode`.
15635    fn stat_cache_for(file: &Path, oid: ObjectId, mode: u32) -> (IndexStatCache, IndexEntry) {
15636        let metadata = fs::metadata(file).expect("test operation should succeed");
15637        let mut entry = index_entry_from_metadata(b"f.txt".to_vec(), oid, &metadata);
15638        entry.mode = mode;
15639        let index_mtime = Some((u64::from(entry.mtime_seconds) + 10, 0));
15640        let mut entries = HashMap::new();
15641        entries.insert(entry.path.as_bytes().to_vec(), entry.clone());
15642        (
15643            IndexStatCache {
15644                entries,
15645                index_mtime,
15646            },
15647            entry,
15648        )
15649    }
15650
15651    #[test]
15652    fn reuse_tracked_entry_only_reuses_clean_non_racy_match() {
15653        let root = temp_root();
15654        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
15655        let file = root.join("f.txt");
15656        let metadata = fs::metadata(&file).expect("test operation should succeed");
15657        let real_mode = file_mode(&metadata);
15658        let oid = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec())
15659            .object_id(ObjectFormat::Sha1)
15660            .expect("test operation should succeed");
15661
15662        // Clean, non-racy, matching stat + mode -> reuse the cached oid.
15663        let (cache, _) = stat_cache_for(&file, oid, real_mode);
15664        let reused = cache.reuse_tracked_entry(b"f.txt", &metadata);
15665        assert_eq!(
15666            reused,
15667            Some(TrackedEntry {
15668                mode: real_mode,
15669                oid,
15670            }),
15671            "a clean non-racy stat+mode match must reuse the staged oid"
15672        );
15673
15674        // No stage-0 entry for the path -> must hash.
15675        assert_eq!(
15676            cache.reuse_tracked_entry(b"other.txt", &metadata),
15677            None,
15678            "a path with no cached entry must fall through to hashing"
15679        );
15680
15681        // Size differs from the file -> must hash.
15682        let (mut size_cache, mut shrunk) = stat_cache_for(&file, oid, real_mode);
15683        shrunk.size = shrunk.size.saturating_sub(1);
15684        size_cache.entries.insert(shrunk.path.to_vec(), shrunk);
15685        assert_eq!(
15686            size_cache.reuse_tracked_entry(b"f.txt", &metadata),
15687            None,
15688            "a size mismatch must fall through to hashing"
15689        );
15690
15691        // Mode differs (e.g. a chmod that did not move mtime) -> must hash.
15692        let (mode_cache, _) = stat_cache_for(&file, oid, 0o100755);
15693        assert_eq!(
15694            mode_cache.reuse_tracked_entry(b"f.txt", &metadata),
15695            None,
15696            "a mode mismatch must fall through to hashing"
15697        );
15698
15699        // Racily clean (index mtime not strictly after the entry mtime) -> hash.
15700        let (mut racy_cache, entry) = stat_cache_for(&file, oid, real_mode);
15701        racy_cache.index_mtime = Some((
15702            u64::from(entry.mtime_seconds),
15703            u64::from(entry.mtime_nanoseconds),
15704        ));
15705        assert_eq!(
15706            racy_cache.reuse_tracked_entry(b"f.txt", &metadata),
15707            None,
15708            "a racily-clean entry must always be re-hashed"
15709        );
15710
15711        // Unknown index mtime is treated as racy -> hash.
15712        let (mut unknown_cache, _) = stat_cache_for(
15713            &file,
15714            EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec())
15715                .object_id(ObjectFormat::Sha1)
15716                .expect("test operation should succeed"),
15717            real_mode,
15718        );
15719        unknown_cache.index_mtime = None;
15720        assert_eq!(
15721            unknown_cache.reuse_tracked_entry(b"f.txt", &metadata),
15722            None,
15723            "an unknown index mtime must be treated conservatively as racy"
15724        );
15725
15726        fs::remove_dir_all(root).expect("test operation should succeed");
15727    }
15728
15729    #[test]
15730    fn index_stat_probe_cache_serves_many_paths_from_one_index_parse() {
15731        let root = temp_root();
15732        let git_dir = root.join(".git");
15733        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
15734        fs::write(root.join("a.txt"), b"alpha\n").expect("test operation should succeed");
15735        fs::write(root.join("b.txt"), b"bravo\n").expect("test operation should succeed");
15736        build_commit(&root, &git_dir, &["a.txt", "b.txt"]);
15737
15738        let cache = IndexStatProbeCache::from_repository_index(&git_dir, ObjectFormat::Sha1)
15739            .expect("probe cache");
15740        assert_eq!(cache.len(), 2);
15741        assert!(cache.contains_git_path(b"a.txt"));
15742        assert!(cache.contains_git_path(b"b.txt"));
15743        let a = cache.probe_for_git_path(b"a.txt").expect("a probe");
15744        let b = cache.probe_for_git_path(b"b.txt").expect("b probe");
15745        assert_eq!(a.entry().path, b"a.txt");
15746        assert_eq!(b.entry().path, b"b.txt");
15747        assert_eq!(a.index_mtime(), cache.index_mtime());
15748        assert_eq!(b.index_mtime(), cache.index_mtime());
15749        assert!(
15750            cache.probe_for_git_path(b"missing.txt").is_none(),
15751            "missing paths should not allocate probes"
15752        );
15753
15754        let one_shot =
15755            IndexStatProbe::from_repository_index(&git_dir, ObjectFormat::Sha1, b"a.txt")
15756                .expect("legacy one-shot probe")
15757                .expect("a probe");
15758        assert_eq!(one_shot.entry().path, b"a.txt");
15759        assert_eq!(one_shot.index_mtime(), cache.index_mtime());
15760
15761        fs::remove_dir_all(root).expect("test operation should succeed");
15762    }
15763
15764    #[test]
15765    fn short_status_detects_same_length_content_change() {
15766        let root = temp_root();
15767        let git_dir = root.join(".git");
15768        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
15769        fs::write(root.join("f.txt"), b"aaaa\n").expect("test operation should succeed");
15770        build_commit(&root, &git_dir, &["f.txt"]);
15771        // Overwrite with the SAME byte length but different content. Right after
15772        // staging the entry is racily clean (index mtime >= entry mtime), so the
15773        // stat shortcut must not be trusted and the change must surface as M.
15774        fs::write(root.join("f.txt"), b"bbbb\n").expect("test operation should succeed");
15775        let status = short_status(&root, &git_dir, ObjectFormat::Sha1)
15776            .expect("test operation should succeed");
15777        assert_eq!(
15778            status
15779                .iter()
15780                .map(ShortStatusEntry::line)
15781                .collect::<Vec<_>>(),
15782            vec![" M f.txt"],
15783            "a same-length content change must be reported modified"
15784        );
15785        fs::remove_dir_all(root).expect("test operation should succeed");
15786    }
15787
15788    #[test]
15789    fn short_status_clean_after_byte_identical_rewrite() {
15790        let root = temp_root();
15791        let git_dir = root.join(".git");
15792        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
15793        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
15794        build_commit(&root, &git_dir, &["f.txt"]);
15795        // Rewrite with byte-identical content; the mtime moves so the stat
15796        // shortcut declines to reuse and the fallback hash proves it clean.
15797        std::thread::sleep(std::time::Duration::from_millis(20));
15798        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
15799        let status = short_status(&root, &git_dir, ObjectFormat::Sha1)
15800            .expect("test operation should succeed");
15801        assert!(
15802            status.is_empty(),
15803            "a byte-identical rewrite must be clean via the fallback hash, got {status:?}"
15804        );
15805        fs::remove_dir_all(root).expect("test operation should succeed");
15806    }
15807
15808    #[test]
15809    fn short_status_trusts_stat_cache_and_skips_rehash() {
15810        let root = temp_root();
15811        let git_dir = root.join(".git");
15812        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
15813        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
15814        build_commit(&root, &git_dir, &["f.txt"]);
15815
15816        // Plant a BOGUS oid in the stage-0 entry while preserving its size+mtime,
15817        // so a real re-hash of the (unchanged) worktree file would NOT match it.
15818        let index_path = repository_index_path(&git_dir);
15819        let mut index = read_index(&git_dir);
15820        let bogus = ObjectId::from_hex(ObjectFormat::Sha1, &"0".repeat(40))
15821            .expect("test operation should succeed");
15822        let real_oid = index_entry_for(&index, b"f.txt").oid;
15823        assert_ne!(
15824            real_oid, bogus,
15825            "fixture oid should differ from the bogus oid"
15826        );
15827        index
15828            .entries
15829            .iter_mut()
15830            .find(|entry| entry.path == b"f.txt")
15831            .expect("test operation should succeed")
15832            .oid = bogus.clone();
15833        fs::write(
15834            &index_path,
15835            index
15836                .write(ObjectFormat::Sha1)
15837                .expect("test operation should succeed"),
15838        )
15839        .expect("test operation should succeed");
15840
15841        // Make the index file STRICTLY newer than the entry mtime (non-racy) by
15842        // waiting past one-second filesystem granularity and rewriting it, so the
15843        // racy-clean guard does not force a re-hash.
15844        std::thread::sleep(std::time::Duration::from_millis(1100));
15845        fs::write(
15846            &index_path,
15847            fs::read(&index_path).expect("test operation should succeed"),
15848        )
15849        .expect("test operation should succeed");
15850
15851        // The file is unchanged on disk, so a trusted stat reuses the bogus index
15852        // oid for the worktree entry: worktree-oid == index-oid == bogus, so the
15853        // WORKTREE column is clean. Had status re-hashed the file, the real oid
15854        // would differ from the bogus index oid and the worktree column would be
15855        // 'M'. (The index-vs-HEAD column is 'M' because we corrupted the index
15856        // oid away from HEAD; that is expected and not what this test asserts.)
15857        let status = short_status(&root, &git_dir, ObjectFormat::Sha1)
15858            .expect("test operation should succeed");
15859        let entry = status
15860            .iter()
15861            .find(|entry| entry.path == b"f.txt")
15862            .expect("f.txt should appear (its index oid now differs from HEAD)");
15863        assert_eq!(
15864            entry.worktree, b' ',
15865            "non-racy stat match must trust the cached oid (no re-hash); worktree column was {}",
15866            entry.worktree as char
15867        );
15868        assert_eq!(
15869            entry.index_oid.as_ref(),
15870            Some(&bogus),
15871            "the worktree entry must have reused the planted bogus index oid, not the real hash"
15872        );
15873
15874        fs::remove_dir_all(root).expect("test operation should succeed");
15875    }
15876
15877    #[test]
15878    fn worktree_entry_state_detects_same_size_content_change() {
15879        let root = temp_root();
15880        let git_dir = root.join(".git");
15881        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
15882        fs::write(root.join("f.txt"), b"aaaa\n").expect("test operation should succeed");
15883        build_commit(&root, &git_dir, &["f.txt"]);
15884        let index = read_index(&git_dir);
15885        let entry = index_entry_for(&index, b"f.txt").clone();
15886        let probe = IndexStatProbe::from_index_entry_and_index_path(
15887            entry.clone(),
15888            repository_index_path(&git_dir),
15889        );
15890
15891        fs::write(root.join("f.txt"), b"bbbb\n").expect("test operation should succeed");
15892        let state = worktree_entry_state(
15893            &root,
15894            &git_dir,
15895            ObjectFormat::Sha1,
15896            Path::new("f.txt"),
15897            &entry.oid,
15898            entry.mode,
15899            Some(&probe),
15900        )
15901        .expect("test operation should succeed");
15902        assert_eq!(state, WorktreeEntryState::Modified);
15903
15904        fs::remove_dir_all(root).expect("test operation should succeed");
15905    }
15906
15907    #[test]
15908    fn worktree_entry_state_reports_deleted_for_missing_and_parent_not_directory() {
15909        let root = temp_root();
15910        let git_dir = root.join(".git");
15911        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
15912        fs::create_dir_all(root.join("dir")).expect("test operation should succeed");
15913        fs::write(root.join("dir").join("f.txt"), b"hello\n")
15914            .expect("test operation should succeed");
15915        build_commit(&root, &git_dir, &["dir/f.txt"]);
15916        let index = read_index(&git_dir);
15917        let entry = index_entry_for(&index, b"dir/f.txt").clone();
15918
15919        fs::remove_file(root.join("dir").join("f.txt")).expect("test operation should succeed");
15920        let missing = worktree_entry_state_by_git_path(
15921            &root,
15922            &git_dir,
15923            ObjectFormat::Sha1,
15924            b"dir/f.txt",
15925            &entry.oid,
15926            entry.mode,
15927            None,
15928        )
15929        .expect("test operation should succeed");
15930        assert_eq!(missing, WorktreeEntryState::Deleted);
15931
15932        fs::remove_dir(root.join("dir")).expect("test operation should succeed");
15933        fs::write(root.join("dir"), b"not a directory").expect("test operation should succeed");
15934        let parent_not_directory = worktree_entry_state_by_git_path(
15935            &root,
15936            &git_dir,
15937            ObjectFormat::Sha1,
15938            b"dir/f.txt",
15939            &entry.oid,
15940            entry.mode,
15941            None,
15942        )
15943        .expect("test operation should succeed");
15944        assert_eq!(parent_not_directory, WorktreeEntryState::Deleted);
15945
15946        fs::remove_dir_all(root).expect("test operation should succeed");
15947    }
15948
15949    #[test]
15950    fn worktree_entry_state_trusts_clean_non_racy_probe() {
15951        let root = temp_root();
15952        let git_dir = root.join(".git");
15953        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
15954        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
15955        build_commit(&root, &git_dir, &["f.txt"]);
15956        let index_path = repository_index_path(&git_dir);
15957        let mut index = read_index(&git_dir);
15958        let bogus = ObjectId::from_hex(ObjectFormat::Sha1, &"1".repeat(40))
15959            .expect("test operation should succeed");
15960        index
15961            .entries
15962            .iter_mut()
15963            .find(|entry| entry.path == b"f.txt")
15964            .expect("test operation should succeed")
15965            .oid = bogus;
15966        fs::write(
15967            &index_path,
15968            index
15969                .write(ObjectFormat::Sha1)
15970                .expect("test operation should succeed"),
15971        )
15972        .expect("test operation should succeed");
15973        std::thread::sleep(std::time::Duration::from_millis(1100));
15974        fs::write(
15975            &index_path,
15976            fs::read(&index_path).expect("test operation should succeed"),
15977        )
15978        .expect("test operation should succeed");
15979        let index = read_index(&git_dir);
15980        let entry = index_entry_for(&index, b"f.txt").clone();
15981        let probe = IndexStatProbe::from_index_entry_and_index_path(
15982            entry.clone(),
15983            repository_index_path(&git_dir),
15984        );
15985
15986        let state = worktree_entry_state(
15987            &root,
15988            &git_dir,
15989            ObjectFormat::Sha1,
15990            Path::new("f.txt"),
15991            &entry.oid,
15992            entry.mode,
15993            Some(&probe),
15994        )
15995        .expect("test operation should succeed");
15996        assert_eq!(
15997            state,
15998            WorktreeEntryState::Clean,
15999            "a non-racy stat match must be enough to prove this path clean"
16000        );
16001
16002        fs::remove_dir_all(root).expect("test operation should succeed");
16003    }
16004
16005    #[test]
16006    fn worktree_entry_state_rehashes_racy_probe() {
16007        let root = temp_root();
16008        let git_dir = root.join(".git");
16009        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
16010        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
16011        build_commit(&root, &git_dir, &["f.txt"]);
16012        let index = read_index(&git_dir);
16013        let mut entry = index_entry_for(&index, b"f.txt").clone();
16014        entry.oid = ObjectId::from_hex(ObjectFormat::Sha1, &"2".repeat(40))
16015            .expect("test operation should succeed");
16016        let probe = IndexStatProbe::from_index_entry(
16017            entry.clone(),
16018            Some((
16019                u64::from(entry.mtime_seconds),
16020                u64::from(entry.mtime_nanoseconds),
16021            )),
16022        );
16023
16024        let state = worktree_entry_state(
16025            &root,
16026            &git_dir,
16027            ObjectFormat::Sha1,
16028            Path::new("f.txt"),
16029            &entry.oid,
16030            entry.mode,
16031            Some(&probe),
16032        )
16033        .expect("test operation should succeed");
16034        assert_eq!(
16035            state,
16036            WorktreeEntryState::Modified,
16037            "a racily-clean stat match must fall through to hashing"
16038        );
16039
16040        fs::remove_dir_all(root).expect("test operation should succeed");
16041    }
16042
16043    #[cfg(unix)]
16044    #[test]
16045    fn worktree_entry_state_detects_chmod_only_change() {
16046        use std::os::unix::fs::PermissionsExt;
16047
16048        let root = temp_root();
16049        let git_dir = root.join(".git");
16050        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
16051        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
16052        build_commit(&root, &git_dir, &["f.txt"]);
16053        let index = read_index(&git_dir);
16054        let entry = index_entry_for(&index, b"f.txt").clone();
16055
16056        let file = root.join("f.txt");
16057        let mut permissions = fs::metadata(&file)
16058            .expect("test operation should succeed")
16059            .permissions();
16060        permissions.set_mode(permissions.mode() | 0o111);
16061        fs::set_permissions(&file, permissions).expect("test operation should succeed");
16062        let state = worktree_entry_state(
16063            &root,
16064            &git_dir,
16065            ObjectFormat::Sha1,
16066            Path::new("f.txt"),
16067            &entry.oid,
16068            entry.mode,
16069            None,
16070        )
16071        .expect("test operation should succeed");
16072        assert_eq!(state, WorktreeEntryState::Modified);
16073
16074        fs::remove_dir_all(root).expect("test operation should succeed");
16075    }
16076
16077    #[cfg(unix)]
16078    #[test]
16079    fn worktree_entry_state_detects_symlink_target_change() {
16080        use std::os::unix::fs::symlink;
16081
16082        let root = temp_root();
16083        let git_dir = root.join(".git");
16084        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
16085        symlink("one", root.join("link")).expect("test operation should succeed");
16086        build_commit(&root, &git_dir, &["link"]);
16087        let index = read_index(&git_dir);
16088        let entry = index_entry_for(&index, b"link").clone();
16089
16090        fs::remove_file(root.join("link")).expect("test operation should succeed");
16091        symlink("two", root.join("link")).expect("test operation should succeed");
16092        let state = worktree_entry_state(
16093            &root,
16094            &git_dir,
16095            ObjectFormat::Sha1,
16096            Path::new("link"),
16097            &entry.oid,
16098            entry.mode,
16099            None,
16100        )
16101        .expect("test operation should succeed");
16102        assert_eq!(state, WorktreeEntryState::Modified);
16103
16104        fs::remove_dir_all(root).expect("test operation should succeed");
16105    }
16106
16107    #[test]
16108    fn worktree_entry_state_treats_present_unpopulated_gitlink_directory_as_clean() {
16109        let root = temp_root();
16110        let git_dir = root.join(".git");
16111        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
16112        fs::create_dir_all(root.join("submodule")).expect("test operation should succeed");
16113        let oid = ObjectId::from_hex(ObjectFormat::Sha1, &"3".repeat(40))
16114            .expect("test operation should succeed");
16115
16116        let state = worktree_entry_state(
16117            &root,
16118            &git_dir,
16119            ObjectFormat::Sha1,
16120            Path::new("submodule"),
16121            &oid,
16122            sley_index::GITLINK_MODE,
16123            None,
16124        )
16125        .expect("test operation should succeed");
16126        assert_eq!(state, WorktreeEntryState::Clean);
16127
16128        fs::remove_dir_all(root).expect("test operation should succeed");
16129    }
16130
16131    #[test]
16132    fn short_status_empty_on_unborn_repository() {
16133        let root = temp_root();
16134        let git_dir = root.join(".git");
16135        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
16136        fs::write(git_dir.join("HEAD"), "ref: refs/heads/main\n")
16137            .expect("test operation should succeed");
16138        let status = short_status(&root, &git_dir, ObjectFormat::Sha1)
16139            .expect("test operation should succeed");
16140        assert!(
16141            status.is_empty(),
16142            "an unborn repository with an empty worktree must be clean, got {status:?}"
16143        );
16144        fs::remove_dir_all(root).expect("test operation should succeed");
16145    }
16146
16147    #[test]
16148    fn untracked_paths_skips_embedded_git_internals() {
16149        let root = temp_root();
16150        let git_dir = root.join(".git");
16151        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
16152        fs::write(git_dir.join("HEAD"), "ref: refs/heads/main\n")
16153            .expect("test operation should succeed");
16154        let nested = root.join("not-a-submodule");
16155        fs::create_dir_all(nested.join(".git")).expect("test operation should succeed");
16156        fs::write(nested.join(".git/HEAD"), "ref: refs/heads/main\n")
16157            .expect("test operation should succeed");
16158        fs::write(nested.join("file.txt"), b"inside\n").expect("test operation should succeed");
16159        let paths = untracked_paths(&root, &git_dir, ObjectFormat::Sha1)
16160            .expect("test operation should succeed");
16161        assert!(
16162            paths.iter().any(|path| path == b"not-a-submodule/"),
16163            "embedded repository directory should be listed, got {paths:?}"
16164        );
16165        assert!(
16166            !paths
16167                .iter()
16168                .any(|path| path.starts_with(b"not-a-submodule/.git")),
16169            "embedded .git internals must not be listed, got {paths:?}"
16170        );
16171        fs::remove_dir_all(root).expect("test operation should succeed");
16172    }
16173
16174    #[cfg(unix)]
16175    #[test]
16176    fn untracked_paths_lists_symlink() {
16177        use std::os::unix::fs::symlink;
16178
16179        let root = temp_root();
16180        let git_dir = root.join(".git");
16181        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
16182        fs::write(git_dir.join("HEAD"), "ref: refs/heads/main\n")
16183            .expect("test operation should succeed");
16184        fs::write(root.join("target.txt"), b"target\n").expect("test operation should succeed");
16185        symlink(root.join("target.txt"), root.join("path1")).expect("create symlink");
16186        let paths = untracked_paths(&root, &git_dir, ObjectFormat::Sha1)
16187            .expect("test operation should succeed");
16188        assert!(
16189            paths.contains(&b"path1".to_vec()),
16190            "untracked symlink must be listed, got {paths:?}"
16191        );
16192        fs::remove_dir_all(root).expect("test operation should succeed");
16193    }
16194}