Skip to main content

sley_worktree/
lib.rs

1use sley_config::GitConfig;
2use sley_core::{
3    BString, GitError, MissingObjectContext, MissingObjectKind, ObjectFormat, ObjectId, RepoPath,
4    Result,
5};
6use sley_index::{BorrowedIndex, CacheTree, Index, IndexEntry, IndexEntryRef, Stage};
7use sley_object::{Commit, EncodedObject, ObjectType, Tree, TreeEntry, tree_entry_object_type};
8use sley_odb::{FileObjectDatabase, ObjectPresenceChecker, ObjectReader, ObjectWriter};
9use sley_refs::{FileRefStore, RefTarget, RefUpdate, ReflogEntry, branch_ref_name};
10use std::borrow::Cow;
11use std::cell::RefCell;
12use std::cmp::Ordering;
13use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
14use std::io::Write;
15use std::path::{Path, PathBuf};
16use std::process::{Command, Stdio};
17use std::sync::{Mutex, OnceLock};
18use std::time::UNIX_EPOCH;
19use std::{env, fs};
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub enum WorktreeStatus {
23    Clean,
24    Modified(RepoPath),
25    Added(RepoPath),
26    Deleted(RepoPath),
27    Untracked(RepoPath),
28}
29
30pub trait WorktreeScanner {
31    fn status(&self) -> Result<Vec<WorktreeStatus>>;
32}
33
34#[derive(Debug, Clone, PartialEq, Eq)]
35pub struct SparseCheckout {
36    pub patterns: Vec<Vec<u8>>,
37    pub sparse_index: bool,
38}
39
40/// Selects how the patterns in a [`SparseCheckout`] are interpreted when
41/// deciding which index paths are "in cone" (kept in the worktree).
42///
43/// * [`SparseCheckoutMode::Full`] interprets the patterns exactly like
44///   `.gitignore` lines (full pattern matching, including `*`, `?`, `**`,
45///   character classes, anchoring with a leading `/`, directory-only `/`
46///   suffixes, and `!` negation). A path is *included* when the last pattern
47///   that matches it is not negated. This mirrors upstream Git's non-cone
48///   `core.sparseCheckout` behaviour.
49/// * [`SparseCheckoutMode::Cone`] interprets the patterns as the restricted
50///   directory-prefix form Git emits for `core.sparseCheckoutCone`: a literal
51///   `/*` (top-level files), the recursive-parent guard `!/*/`, and anchored
52///   directory patterns such as `/dir/` (everything under `dir/`) plus the
53///   parent guards `/dir/*` and `!/dir/*/`. Matching is purely prefix based,
54///   so glob metacharacters are treated literally.
55/// * [`SparseCheckoutMode::Auto`] inspects the patterns and uses cone matching
56///   when every pattern fits the cone grammar above, otherwise full matching.
57#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
58pub enum SparseCheckoutMode {
59    #[default]
60    Auto,
61    Full,
62    Cone,
63}
64
65#[derive(Debug, Clone, PartialEq, Eq)]
66pub struct ApplySparseResult {
67    /// Paths whose worktree file was (re)materialized because they are in cone.
68    pub materialized: Vec<Vec<u8>>,
69    /// Paths that were taken out of the worktree because they are out of cone;
70    /// their index entry now has the skip-worktree bit set.
71    pub skipped: Vec<Vec<u8>>,
72    /// Out-of-cone paths whose worktree file was *not* up to date with the index
73    /// and was therefore left in place (and its skip-worktree bit left clear),
74    /// matching git's data-loss-avoiding behavior. The caller surfaces these as
75    /// git's "The following paths are not up to date …" warning. Sorted by path.
76    pub not_up_to_date: Vec<Vec<u8>>,
77}
78
79#[derive(Debug, Clone, PartialEq, Eq)]
80pub struct UpdateIndexResult {
81    pub entries: usize,
82    pub updated: Vec<ObjectId>,
83}
84
85#[derive(Debug, Clone, PartialEq, Eq)]
86pub enum AddUpdateTrackedAction {
87    Add(Vec<u8>),
88    Remove(Vec<u8>),
89}
90
91#[derive(Debug, Clone, PartialEq, Eq)]
92pub enum AddExactTrackedPathResult {
93    Handled(Option<AddUpdateTrackedAction>),
94    Unsupported,
95}
96
97#[derive(Debug, Clone, PartialEq, Eq)]
98pub struct CacheInfoEntry {
99    pub mode: u32,
100    pub oid: ObjectId,
101    pub path: Vec<u8>,
102    pub stage: u16,
103}
104
105#[derive(Debug, Clone, PartialEq, Eq)]
106pub enum IndexInfoRecord {
107    Add(CacheInfoEntry),
108    Remove { path: Vec<u8> },
109}
110
111/// Batch-wide options for the `git add`-style callers that apply one uniform
112/// mode to every path. The positional `add`/`remove`/`force_remove`/`info_only`/
113/// `chmod` fields describe that uniform mode; `ignore_skip_worktree_entries` is
114/// a genuine whole-invocation toggle (it is not positional in git either).
115///
116/// `git update-index <flag> <path>...` does NOT use this for its per-path mode —
117/// it builds [`UpdateIndexPath`] values directly, each carrying the sticky mode
118/// in effect when that path was parsed. See [`UpdateIndexPath`].
119#[derive(Debug, Clone, Copy, PartialEq, Eq)]
120pub struct UpdateIndexOptions {
121    pub add: bool,
122    pub remove: bool,
123    pub force_remove: bool,
124    pub chmod: Option<bool>,
125    pub info_only: bool,
126    pub ignore_skip_worktree_entries: bool,
127}
128
129impl UpdateIndexOptions {
130    /// The uniform per-path mode this batch applies to every path.
131    fn path_mode(&self) -> UpdateIndexPathMode {
132        UpdateIndexPathMode {
133            add: self.add,
134            remove: self.remove,
135            force_remove: self.force_remove,
136            info_only: self.info_only,
137            chmod: self.chmod,
138        }
139    }
140}
141
142/// A single positional path passed to `update-index`, together with the
143/// *mode* that was active at the point the path was seen on the command line.
144///
145/// git's `update-index` processes argv left-to-right with `parse_options_step`
146/// (`PARSE_OPT_STOP_AT_NON_OPTION`): the mode flags `--add`/`--remove`/
147/// `--force-remove`/`--info-only`/`--chmod` set sticky global state, and each
148/// non-option path is handed to `update_one()` under whatever state is in
149/// effect *at that point*. So `--add foo --force-remove bar` ADDs `foo` and
150/// FORCE-REMOVEs `bar` — the flags are positional, not global. We mirror that
151/// by snapshotting the mode onto each path as it is parsed, rather than
152/// applying one batch-wide `UpdateIndexOptions` to every path.
153///
154/// `--chmod=(+|-)x` is likewise sticky (`--chmod=+x A --chmod=-x B` flips A
155/// executable and B non-executable). Each path reports its action
156/// (`add '<p>'`, `remove '<p>'`, `chmod (+|-)x '<p>'`) inline under `--verbose`,
157/// interleaved in command-line order — which is why the mode must travel with
158/// the path.
159#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
160pub struct UpdateIndexPathMode {
161    pub add: bool,
162    pub remove: bool,
163    pub force_remove: bool,
164    pub info_only: bool,
165    /// `--chmod=+x` → `Some(true)`, `--chmod=-x` → `Some(false)`, else `None`.
166    pub chmod: Option<bool>,
167}
168
169#[derive(Debug, Clone)]
170pub struct UpdateIndexPath {
171    pub path: PathBuf,
172    pub mode: UpdateIndexPathMode,
173}
174
175#[derive(Debug, Clone, PartialEq, Eq, Default)]
176pub struct WriteTreeOptions {
177    pub missing_ok: bool,
178    pub prefix: Option<Vec<u8>>,
179}
180
181#[derive(Debug, Clone, PartialEq, Eq)]
182pub struct ShortStatusEntry {
183    pub index: u8,
184    pub worktree: u8,
185    pub path: Vec<u8>,
186    pub head_mode: Option<u32>,
187    pub index_mode: Option<u32>,
188    pub worktree_mode: Option<u32>,
189    pub head_oid: Option<ObjectId>,
190    pub index_oid: Option<ObjectId>,
191    /// For a tracked gitlink (submodule) path: how the submodule's working
192    /// state differs from the staged gitlink. `None` for ordinary paths.
193    pub submodule: Option<SubmoduleStatus>,
194}
195
196/// Submodule-specific change detail for a status entry, mirroring upstream's
197/// `wt_status_change_data` trio: `new_submodule_commits` plus the
198/// `DIRTY_SUBMODULE_MODIFIED`/`DIRTY_SUBMODULE_UNTRACKED` dirty bits.
199#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
200pub struct SubmoduleStatus {
201    /// The submodule's checked-out HEAD differs from the staged gitlink oid.
202    pub new_commits: bool,
203    /// The submodule has staged or unstaged changes to tracked files.
204    pub modified_content: bool,
205    /// The submodule has untracked files.
206    pub untracked_content: bool,
207}
208
209impl SubmoduleStatus {
210    pub fn any(&self) -> bool {
211        self.new_commits || self.modified_content || self.untracked_content
212    }
213}
214
215/// Bit set in a submodule dirt mask when the submodule has staged or unstaged
216/// changes to tracked files (upstream `DIRTY_SUBMODULE_MODIFIED`).
217pub const DIRTY_SUBMODULE_MODIFIED: u8 = 1;
218/// Bit set in a submodule dirt mask when the submodule has untracked files
219/// (upstream `DIRTY_SUBMODULE_UNTRACKED`).
220pub const DIRTY_SUBMODULE_UNTRACKED: u8 = 2;
221
222/// Inspect the working state of the submodule whose worktree is at `sub_root`
223/// and report its dirt mask: [`DIRTY_SUBMODULE_MODIFIED`] for staged/unstaged
224/// changes to tracked files, [`DIRTY_SUBMODULE_UNTRACKED`] for untracked
225/// files. Returns 0 for a clean submodule — and for a directory that is not a
226/// populated repository at all (upstream treats an unpopulated gitlink as
227/// always unchanged). The native equivalent of upstream's
228/// `is_submodule_modified()` (which runs `git status --porcelain=2` inside the
229/// submodule and classifies `?` lines as untracked, everything else as
230/// modified).
231pub fn submodule_dirt(sub_root: &Path) -> u8 {
232    let Some(git_dir) = sley_diff_merge::gitlink_git_dir(sub_root) else {
233        return 0;
234    };
235    let Ok(config) = sley_config::read_repo_config(&git_dir, None) else {
236        return 0;
237    };
238    let Ok(format) = config.repository_object_format() else {
239        return 0;
240    };
241    let Ok(entries) = short_status_with_options(
242        sub_root,
243        &git_dir,
244        format,
245        ShortStatusOptions {
246            include_ignored: false,
247            ignored_mode: StatusIgnoredMode::Traditional,
248            untracked_mode: StatusUntrackedMode::Normal,
249        },
250    ) else {
251        return 0;
252    };
253    let mut dirt = 0;
254    for entry in entries {
255        if entry.index == b'?' && entry.worktree == b'?' {
256            dirt |= DIRTY_SUBMODULE_UNTRACKED;
257        } else {
258            dirt |= DIRTY_SUBMODULE_MODIFIED;
259        }
260    }
261    dirt
262}
263
264#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
265pub enum StatusUntrackedMode {
266    #[default]
267    All,
268    Normal,
269    None,
270}
271
272#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
273pub enum StatusIgnoredMode {
274    #[default]
275    Traditional,
276    Matching,
277}
278
279#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
280pub struct ShortStatusOptions {
281    pub include_ignored: bool,
282    pub ignored_mode: StatusIgnoredMode,
283    pub untracked_mode: StatusUntrackedMode,
284}
285
286/// The worktree state of one tracked path relative to an expected index/tree
287/// entry.
288#[derive(Debug, Clone, Copy, PartialEq, Eq)]
289pub enum WorktreeEntryState {
290    /// The path exists in the worktree and matches the expected mode/object id.
291    Clean,
292    /// The path exists, but its type, mode, filtered content, symlink target, or
293    /// gitlink HEAD differs from the expected entry.
294    Modified,
295    /// The path, or one of its parents, is missing from the worktree.
296    Deleted,
297}
298
299#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
300pub struct AtomicMetadataWriteOptions {
301    pub fsync_file: bool,
302    pub fsync_dir: bool,
303}
304
305#[derive(Debug, Clone, PartialEq, Eq)]
306pub struct AtomicMetadataWriteResult {
307    pub path: PathBuf,
308    pub len: u64,
309    pub mtime: Option<(u64, u64)>,
310}
311
312/// Stage-0 index stat data that can prove a worktree path clean without
313/// re-reading and re-hashing it.
314///
315/// This is the public carrier for sley's racy-git shortcut. Callers that already
316/// parsed `.git/index` can build a probe from the matching [`IndexEntry`] and
317/// the index file's mtime, then pass it to [`worktree_entry_state`] or
318/// [`worktree_entry_state_by_git_path`]. The probe is trusted only when its path,
319/// mode, and object id match the expected entry and the cached stat is not
320/// racily clean; otherwise the helper falls back to the same content hashing
321/// path used by [`short_status_with_options`].
322#[derive(Debug, Clone, PartialEq, Eq)]
323pub struct IndexStatProbe {
324    entry: IndexEntry,
325    index_mtime: Option<(u64, u64)>,
326}
327
328/// Reusable stage-0 index stat probes for many worktree paths.
329///
330/// Prefer this over repeated [`IndexStatProbe::from_repository_index`] calls
331/// when an embedder needs to verify many paths. It parses `.git/index` once,
332/// records the index file mtime used for racy-git checks, and serves cheap
333/// per-path probes from memory.
334#[derive(Debug, Clone, PartialEq, Eq, Default)]
335pub struct IndexStatProbeCache {
336    entries: HashMap<Vec<u8>, IndexEntry>,
337    index_mtime: Option<(u64, u64)>,
338}
339
340impl IndexStatProbe {
341    /// Build a probe from a parsed stage-0 index entry and the index file's mtime
342    /// split as `(seconds, nanoseconds)`.
343    pub fn from_index_entry(entry: IndexEntry, index_mtime: Option<(u64, u64)>) -> Self {
344        Self { entry, index_mtime }
345    }
346
347    /// Build a probe from a parsed index entry and the path of the index file on
348    /// disk, using that file's mtime as the racy-clean reference timestamp.
349    pub fn from_index_entry_and_index_path(
350        entry: IndexEntry,
351        index_path: impl AsRef<Path>,
352    ) -> Self {
353        let index_mtime = fs::metadata(index_path.as_ref())
354            .ok()
355            .and_then(|metadata| file_mtime_parts(&metadata));
356        Self { entry, index_mtime }
357    }
358
359    /// Read this repository's index and return a probe for `git_path` when a
360    /// stage-0 entry exists.
361    ///
362    /// For repeated lookups prefer [`IndexStatProbeCache::from_repository_index`]
363    /// and [`IndexStatProbeCache::probe_for_git_path`]. This one-shot helper
364    /// keeps a small process-local cache for back-to-back calls against an
365    /// unchanged index, but the explicit cache makes ownership and invalidation
366    /// clearer for high-volume embedders.
367    pub fn from_repository_index(
368        git_dir: impl AsRef<Path>,
369        format: ObjectFormat,
370        git_path: &[u8],
371    ) -> Result<Option<Self>> {
372        let index_path = repository_index_path(git_dir);
373        cached_repository_index_stat_probe(&index_path, format, git_path)
374    }
375
376    /// The parsed index entry this probe was built from.
377    pub fn entry(&self) -> &IndexEntry {
378        &self.entry
379    }
380
381    /// The index file mtime used as the racy-clean reference timestamp.
382    pub fn index_mtime(&self) -> Option<(u64, u64)> {
383        self.index_mtime
384    }
385
386    fn stat_cache_for(
387        &self,
388        git_path: &[u8],
389        expected_oid: &ObjectId,
390        expected_mode: u32,
391    ) -> Option<IndexStatCache> {
392        if index_entry_stage(&self.entry) != 0
393            || self.entry.path.as_bytes() != git_path
394            || self.entry.oid != *expected_oid
395            || self.entry.mode != expected_mode
396        {
397            return None;
398        }
399        let mut entries = HashMap::new();
400        entries.insert(git_path.to_vec(), self.entry.clone());
401        Some(IndexStatCache {
402            entries,
403            index_mtime: self.index_mtime,
404        })
405    }
406}
407
408impl IndexStatProbeCache {
409    /// Build a reusable probe cache from an already parsed index and index-file
410    /// mtime.
411    pub fn from_index(index: &Index, index_mtime: Option<(u64, u64)>) -> Self {
412        Self {
413            entries: stage0_index_entries(index),
414            index_mtime,
415        }
416    }
417
418    /// Read this repository's index once and build reusable stat probes.
419    ///
420    /// A missing index returns an empty cache, matching the one-shot helper's
421    /// `Ok(None)` result for every path.
422    pub fn from_repository_index(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Result<Self> {
423        let index_path = repository_index_path(git_dir);
424        read_index_stat_probe_cache(&index_path, format)
425    }
426
427    /// Return a per-path probe for a stage-0 entry, if present.
428    pub fn probe_for_git_path(&self, git_path: &[u8]) -> Option<IndexStatProbe> {
429        self.entries
430            .get(git_path)
431            .cloned()
432            .map(|entry| IndexStatProbe {
433                entry,
434                index_mtime: self.index_mtime,
435            })
436    }
437
438    /// Whether this cache has a stage-0 entry for `git_path`.
439    pub fn contains_git_path(&self, git_path: &[u8]) -> bool {
440        self.entries.contains_key(git_path)
441    }
442
443    /// Number of stage-0 entries in the cache.
444    pub fn len(&self) -> usize {
445        self.entries.len()
446    }
447
448    /// Whether the cache has no stage-0 entries.
449    pub fn is_empty(&self) -> bool {
450        self.entries.is_empty()
451    }
452
453    /// The index file mtime used as the racy-clean reference timestamp.
454    pub fn index_mtime(&self) -> Option<(u64, u64)> {
455        self.index_mtime
456    }
457}
458
459#[derive(Clone)]
460struct CachedRepositoryIndexStatProbes {
461    index_path: PathBuf,
462    format: ObjectFormat,
463    len: u64,
464    mtime: Option<(u64, u64)>,
465    probes: IndexStatProbeCache,
466}
467
468static REPOSITORY_INDEX_STAT_PROBES: OnceLock<Mutex<Option<CachedRepositoryIndexStatProbes>>> =
469    OnceLock::new();
470
471fn cached_repository_index_stat_probe(
472    index_path: &Path,
473    format: ObjectFormat,
474    git_path: &[u8],
475) -> Result<Option<IndexStatProbe>> {
476    let metadata = match fs::metadata(index_path) {
477        Ok(metadata) => metadata,
478        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
479            if let Some(cache) = REPOSITORY_INDEX_STAT_PROBES.get()
480                && let Ok(mut guard) = cache.lock()
481            {
482                *guard = None;
483            }
484            return Ok(None);
485        }
486        Err(err) => return Err(err.into()),
487    };
488    let len = metadata.len();
489    let mtime = file_mtime_parts(&metadata);
490    let cache = REPOSITORY_INDEX_STAT_PROBES.get_or_init(|| Mutex::new(None));
491    if let Ok(guard) = cache.lock()
492        && let Some(cached) = guard.as_ref()
493        && cached.index_path == index_path
494        && cached.format == format
495        && cached.len == len
496        && cached.mtime == mtime
497    {
498        return Ok(cached.probes.probe_for_git_path(git_path));
499    }
500
501    let probes = read_index_stat_probe_cache_with_metadata(index_path, format, mtime)?;
502    let probe = probes.probe_for_git_path(git_path);
503    if let Ok(mut guard) = cache.lock() {
504        *guard = Some(CachedRepositoryIndexStatProbes {
505            index_path: index_path.to_path_buf(),
506            format,
507            len,
508            mtime,
509            probes: probes.clone(),
510        });
511    }
512    Ok(probe)
513}
514
515fn read_index_stat_probe_cache(
516    index_path: &Path,
517    format: ObjectFormat,
518) -> Result<IndexStatProbeCache> {
519    let metadata = match fs::metadata(index_path) {
520        Ok(metadata) => metadata,
521        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
522            return Ok(IndexStatProbeCache::default());
523        }
524        Err(err) => return Err(err.into()),
525    };
526    read_index_stat_probe_cache_with_metadata(index_path, format, file_mtime_parts(&metadata))
527}
528
529fn read_index_stat_probe_cache_with_metadata(
530    index_path: &Path,
531    format: ObjectFormat,
532    index_mtime: Option<(u64, u64)>,
533) -> Result<IndexStatProbeCache> {
534    let bytes = fs::read(index_path)?;
535    let index = Index::parse(&bytes, format)?;
536    Ok(IndexStatProbeCache::from_index(&index, index_mtime))
537}
538
539fn stage0_index_entries(index: &Index) -> HashMap<Vec<u8>, IndexEntry> {
540    let mut entries = HashMap::new();
541    for entry in &index.entries {
542        if index_entry_stage(entry) == 0 {
543            entries.insert(entry.path.as_bytes().to_vec(), entry.clone());
544        }
545    }
546    entries
547}
548
549#[derive(Debug, Clone, PartialEq, Eq)]
550pub struct CheckoutResult {
551    pub branch: String,
552    pub oid: ObjectId,
553    pub files: usize,
554}
555
556#[derive(Debug, Clone, PartialEq, Eq)]
557pub struct RestoreResult {
558    pub restored: usize,
559}
560
561#[derive(Debug, Clone, PartialEq, Eq)]
562pub struct RemoveResult {
563    pub removed: Vec<Vec<u8>>,
564}
565
566#[derive(Debug, Clone, PartialEq, Eq)]
567pub struct MoveResult {
568    pub source: Vec<u8>,
569    pub destination: Vec<u8>,
570    pub skipped: bool,
571    pub fatal: Option<String>,
572    pub details: Vec<MoveDetail>,
573}
574
575#[derive(Debug, Clone, PartialEq, Eq)]
576pub struct MoveDetail {
577    pub source: Vec<u8>,
578    pub destination: Vec<u8>,
579    pub skipped: bool,
580}
581
582pub fn repository_index_path(git_dir: impl AsRef<Path>) -> PathBuf {
583    env::var_os("GIT_INDEX_FILE")
584        .map(PathBuf::from)
585        .unwrap_or_else(|| git_dir.as_ref().join("index"))
586}
587
588pub fn read_repository_index(
589    git_dir: impl AsRef<Path>,
590    format: ObjectFormat,
591) -> Result<Option<Index>> {
592    let index_path = repository_index_path(git_dir);
593    if !index_path.exists() {
594        return Ok(None);
595    }
596    Ok(Some(Index::parse(&fs::read(index_path)?, format)?))
597}
598
599fn empty_index() -> Index {
600    Index {
601        version: 2,
602        entries: Vec::new(),
603        extensions: Vec::new(),
604        checksum: None,
605    }
606}
607
608/// Resolve the working-tree root for a repository identified by its git
609/// directory, returning `Ok(None)` for a bare repository.
610///
611/// This is the repository-intrinsic worktree resolution (it does *not* consult
612/// `GIT_WORK_TREE`/`GIT_DIR` or CLI overrides — those are the caller's job):
613///
614/// 0. if `core.bare` is true the repository is bare and `Ok(None)` is returned
615///    immediately — `core.bare` takes precedence, so a bare repo ignores
616///    `core.worktree` and the `.git`-parent fallback;
617/// 1. otherwise, a `core.worktree` setting in `<git_dir>/config` (absolute, or
618///    relative to the git directory), canonicalised;
619/// 2. otherwise, for a linked worktree (a git directory that has both a
620///    `commondir` and a `gitdir` administrative file), the directory containing
621///    the worktree's `.git` link, canonicalised;
622/// 3. otherwise, when the git directory is a `.git` directory, its parent (the
623///    ordinary non-bare layout) — returned verbatim, not canonicalised;
624/// 4. otherwise the repository is bare and `Ok(None)` is returned.
625///
626/// `Ok(None)` means specifically "bare" (case 0 or case 4). A [`GitError::Io`] is
627/// returned if a path that should exist cannot be canonicalised, and a
628/// [`GitError::InvalidPath`] if a `.git` directory has no parent (a malformed
629/// layout).
630pub fn worktree_root_for_git_dir(git_dir: &Path) -> Result<Option<PathBuf>> {
631    if let Ok(config) = sley_config::read_repo_config(git_dir, None) {
632        // A bare repository has no working tree, and `core.bare` takes precedence:
633        // a bare repo ignores `core.worktree`. Check it before any worktree
634        // resolution so a bare `.git`-named directory does not fall through to the
635        // "parent of .git" case below.
636        if config.get_bool("core", None, "bare") == Some(true) {
637            return Ok(None);
638        }
639        if let Some(worktree) = config.get("core", None, "worktree") {
640            let worktree = PathBuf::from(worktree);
641            let worktree = if worktree.is_absolute() {
642                worktree
643            } else {
644                git_dir.join(worktree)
645            };
646            return fs::canonicalize(worktree)
647                .map(Some)
648                .map_err(|err| GitError::Io(err.to_string()));
649        }
650    }
651    if git_dir.join("commondir").is_file() {
652        let gitdir_file = git_dir.join("gitdir");
653        if gitdir_file.is_file() {
654            let value = fs::read_to_string(&gitdir_file)?;
655            let worktree_git_file = resolve_worktree_admin_path(git_dir, value.trim());
656            if let Some(worktree) = worktree_git_file.parent() {
657                return fs::canonicalize(worktree)
658                    .map(Some)
659                    .map_err(|err| GitError::Io(err.to_string()));
660            }
661        }
662    }
663    if git_dir.file_name().and_then(|name| name.to_str()) != Some(".git") {
664        return Ok(None);
665    }
666    git_dir
667        .parent()
668        .map(Path::to_path_buf)
669        .map(Some)
670        .ok_or_else(|| GitError::InvalidPath("git dir has no parent worktree".into()))
671}
672
673/// Resolve a path read from a git-directory administrative file (e.g. the
674/// `gitdir` link of a linked worktree): absolute paths are kept as-is, relative
675/// paths are joined onto the administrative directory.
676fn resolve_worktree_admin_path(admin_dir: &Path, value: &str) -> PathBuf {
677    let path = PathBuf::from(value);
678    if path.is_absolute() {
679        path
680    } else {
681        admin_dir.join(path)
682    }
683}
684
685/// Whether the repository at `git_dir` is shallow — i.e. it has a `shallow`
686/// file recording grafted commit boundaries (`git clone --depth`).
687pub fn is_shallow_repository(git_dir: &Path) -> bool {
688    git_dir.join("shallow").exists()
689}
690
691#[derive(Debug, Clone, Copy, PartialEq, Eq)]
692pub struct RemoveOptions {
693    pub recursive: bool,
694    pub cached: bool,
695    pub force: bool,
696    pub dry_run: bool,
697    pub ignore_unmatch: bool,
698}
699
700#[derive(Debug, Clone, Copy, PartialEq, Eq)]
701pub struct MoveOptions {
702    pub force: bool,
703    pub dry_run: bool,
704    pub skip_errors: bool,
705}
706
707impl ShortStatusEntry {
708    pub fn line(&self) -> String {
709        format!(
710            "{}{} {}",
711            self.index as char,
712            self.worktree as char,
713            String::from_utf8_lossy(&self.path)
714        )
715    }
716}
717
718pub fn add_paths_to_index(
719    worktree_root: impl AsRef<Path>,
720    git_dir: impl AsRef<Path>,
721    format: ObjectFormat,
722    paths: &[PathBuf],
723) -> Result<UpdateIndexResult> {
724    update_index_paths(
725        worktree_root,
726        git_dir,
727        format,
728        paths,
729        UpdateIndexOptions {
730            add: true,
731            remove: false,
732            force_remove: false,
733            chmod: None,
734            info_only: false,
735            ignore_skip_worktree_entries: false,
736        },
737    )
738}
739
740pub fn update_index_paths(
741    worktree_root: impl AsRef<Path>,
742    git_dir: impl AsRef<Path>,
743    format: ObjectFormat,
744    paths: &[PathBuf],
745    options: UpdateIndexOptions,
746) -> Result<UpdateIndexResult> {
747    let git_dir = git_dir.as_ref();
748    let index = read_repository_index(git_dir, format)?.unwrap_or_else(empty_index);
749    update_index_paths_with_index(worktree_root, git_dir, format, index, paths, options)
750}
751
752pub fn update_index_paths_with_index(
753    worktree_root: impl AsRef<Path>,
754    git_dir: impl AsRef<Path>,
755    format: ObjectFormat,
756    index: Index,
757    paths: &[PathBuf],
758    options: UpdateIndexOptions,
759) -> Result<UpdateIndexResult> {
760    let ordered = ordered_paths_from_plain(paths, options);
761    update_index_paths_impl(
762        worktree_root.as_ref(),
763        git_dir.as_ref(),
764        format,
765        index,
766        &ordered,
767        options,
768        None,
769        false,
770    )
771}
772
773/// Stamp a single uniform mode (from a batch-wide [`UpdateIndexOptions`]) onto
774/// every path. Used by the `git add`-style callers that genuinely apply one
775/// mode to all paths; the positional `git update-index <flag> <path>...` path
776/// instead snapshots a distinct mode per path in the CLI parse walk.
777fn ordered_paths_from_plain(paths: &[PathBuf], options: UpdateIndexOptions) -> Vec<UpdateIndexPath> {
778    let mode = options.path_mode();
779    paths
780        .iter()
781        .map(|path| UpdateIndexPath {
782            path: path.clone(),
783            mode,
784        })
785        .collect()
786}
787
788/// Stage an ordered list of paths, each carrying its own `--chmod` state, and
789/// (under `verbose`) print the `add`/`remove`/`chmod` action lines inline in
790/// command-line order. This is the entry point `git update-index <path>...`
791/// uses so that `--chmod=+x A --chmod=-x B --verbose` produces the interleaved
792/// `add 'A'` / `chmod +x 'A'` / `add 'B'` / `chmod -x 'B'` output git emits.
793pub fn update_index_ordered_paths_filtered(
794    worktree_root: impl AsRef<Path>,
795    git_dir: impl AsRef<Path>,
796    format: ObjectFormat,
797    paths: &[UpdateIndexPath],
798    options: UpdateIndexOptions,
799    config: &GitConfig,
800    verbose: bool,
801) -> Result<UpdateIndexResult> {
802    let git_dir = git_dir.as_ref();
803    let index = read_repository_index(git_dir, format)?.unwrap_or_else(empty_index);
804    update_index_ordered_paths_filtered_with_index(
805        worktree_root,
806        git_dir,
807        format,
808        index,
809        paths,
810        options,
811        config,
812        verbose,
813    )
814}
815
816pub fn update_index_ordered_paths_filtered_with_index(
817    worktree_root: impl AsRef<Path>,
818    git_dir: impl AsRef<Path>,
819    format: ObjectFormat,
820    index: Index,
821    paths: &[UpdateIndexPath],
822    options: UpdateIndexOptions,
823    config: &GitConfig,
824    verbose: bool,
825) -> Result<UpdateIndexResult> {
826    update_index_paths_impl(
827        worktree_root.as_ref(),
828        git_dir.as_ref(),
829        format,
830        index,
831        paths,
832        options,
833        Some(config),
834        verbose,
835    )
836}
837
838/// Like [`add_paths_to_index`], but runs the configured content filters
839/// (`core.autocrlf`/`text`/`eol` EOL conversion and `filter.<name>.clean`
840/// drivers) on each file's contents before hashing it into the object store.
841///
842/// `config` is the repository config used to resolve the filters; pass the
843/// parsed `<git_dir>/config` (the orchestrator typically already has this).
844pub fn add_paths_to_index_filtered(
845    worktree_root: impl AsRef<Path>,
846    git_dir: impl AsRef<Path>,
847    format: ObjectFormat,
848    paths: &[PathBuf],
849    config: &GitConfig,
850) -> Result<UpdateIndexResult> {
851    update_index_paths_filtered(
852        worktree_root,
853        git_dir,
854        format,
855        paths,
856        UpdateIndexOptions {
857            add: true,
858            remove: false,
859            force_remove: false,
860            chmod: None,
861            info_only: false,
862            ignore_skip_worktree_entries: false,
863        },
864        config,
865    )
866}
867
868/// Like [`update_index_paths`], but applies the clean-side content filters (see
869/// [`apply_clean_filter`]) to file contents before they are hashed/written.
870pub fn update_index_paths_filtered(
871    worktree_root: impl AsRef<Path>,
872    git_dir: impl AsRef<Path>,
873    format: ObjectFormat,
874    paths: &[PathBuf],
875    options: UpdateIndexOptions,
876    config: &GitConfig,
877) -> Result<UpdateIndexResult> {
878    let git_dir = git_dir.as_ref();
879    let index = read_repository_index(git_dir, format)?.unwrap_or_else(empty_index);
880    update_index_paths_filtered_with_index(
881        worktree_root,
882        git_dir,
883        format,
884        index,
885        paths,
886        options,
887        config,
888    )
889}
890
891pub fn update_index_paths_filtered_with_index(
892    worktree_root: impl AsRef<Path>,
893    git_dir: impl AsRef<Path>,
894    format: ObjectFormat,
895    index: Index,
896    paths: &[PathBuf],
897    options: UpdateIndexOptions,
898    config: &GitConfig,
899) -> Result<UpdateIndexResult> {
900    let ordered = ordered_paths_from_plain(paths, options);
901    update_index_paths_impl(
902        worktree_root.as_ref(),
903        git_dir.as_ref(),
904        format,
905        index,
906        &ordered,
907        options,
908        Some(config),
909        false,
910    )
911}
912
913pub fn add_update_all_tracked_filtered(
914    worktree_root: impl AsRef<Path>,
915    git_dir: impl AsRef<Path>,
916    format: ObjectFormat,
917    clean_config: &GitConfig,
918) -> Result<Vec<AddUpdateTrackedAction>> {
919    let worktree_root = worktree_root.as_ref();
920    let git_dir = git_dir.as_ref();
921    let index_path = repository_index_path(git_dir);
922    if !index_path.exists() {
923        return Ok(Vec::new());
924    }
925    let mut index = Index::parse(&fs::read(&index_path)?, format)?;
926    let index_mtime = fs::metadata(&index_path)
927        .ok()
928        .and_then(|metadata| file_mtime_parts(&metadata));
929    let stat_cache = IndexStatCache::from_index_mtime_only(index_mtime);
930    let prechecks = tracked_only_non_clean_prechecks_parallel(worktree_root, &index, &stat_cache)?;
931    if prechecks.is_empty() {
932        return Ok(Vec::new());
933    }
934
935    let pending = prechecks
936        .into_iter()
937        .map(|precheck| match precheck {
938            TrackedOnlyPrecheck::Deleted(idx) => {
939                (precheck, index.entries[idx].path.as_bytes().to_vec())
940            }
941            TrackedOnlyPrecheck::Slow(idx) => {
942                (precheck, index.entries[idx].path.as_bytes().to_vec())
943            }
944        })
945        .collect::<Vec<_>>();
946    let odb = FileObjectDatabase::from_git_dir(git_dir, format);
947    let mut actions = Vec::new();
948    let mut index_dirty = false;
949    let mut clean_filter = None;
950    for (precheck, path) in pending {
951        match precheck {
952            TrackedOnlyPrecheck::Deleted(_) => {
953                if remove_index_entries_with_path(&mut index.entries, &path) {
954                    actions.push(AddUpdateTrackedAction::Remove(path));
955                    index_dirty = true;
956                }
957            }
958            TrackedOnlyPrecheck::Slow(_) => {
959                let (action, dirty) = add_update_tracked_path(
960                    worktree_root,
961                    git_dir,
962                    format,
963                    Some(clean_config),
964                    &odb,
965                    &stat_cache,
966                    &mut clean_filter,
967                    &mut index,
968                    &path,
969                )?;
970                index_dirty |= dirty;
971                if let Some(action) = action {
972                    actions.push(action);
973                }
974            }
975        }
976    }
977
978    if index_dirty {
979        normalize_index_version_for_extended_flags(&mut index);
980        index.extensions = index_extensions_without_cache_tree(&index.extensions);
981        fs::write(index_path, index.write(format)?)?;
982    }
983    Ok(actions)
984}
985
986pub fn add_exact_tracked_path_from_disk(
987    worktree_root: impl AsRef<Path>,
988    git_dir: impl AsRef<Path>,
989    format: ObjectFormat,
990    git_path: &[u8],
991    ignore_removal: bool,
992    config_parameters_env: Option<&str>,
993) -> Result<AddExactTrackedPathResult> {
994    let worktree_root = worktree_root.as_ref();
995    let git_dir = git_dir.as_ref();
996    let index_path = repository_index_path(git_dir);
997    let index_metadata = match fs::metadata(&index_path) {
998        Ok(metadata) => metadata,
999        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
1000            return Ok(AddExactTrackedPathResult::Unsupported);
1001        }
1002        Err(err) => return Err(err.into()),
1003    };
1004    let mut index_bytes = fs::read(&index_path)?;
1005    let Some(raw) = raw_exact_index_entry(&index_bytes, format, git_path)? else {
1006        return Ok(AddExactTrackedPathResult::Unsupported);
1007    };
1008    if !raw_exact_entry_can_patch(&raw, git_path) {
1009        return Ok(AddExactTrackedPathResult::Unsupported);
1010    }
1011    if !raw_index_extensions_are_filterable(&index_bytes, raw.entries_end, raw.checksum_offset) {
1012        return Ok(AddExactTrackedPathResult::Unsupported);
1013    }
1014
1015    let entry = raw.entry.clone();
1016    if entry.stage() != Stage::Normal || index_entry_skip_worktree(&entry) || entry.mode == 0o160000
1017    {
1018        return Ok(AddExactTrackedPathResult::Unsupported);
1019    }
1020    let absolute = worktree_root.join(repo_path_to_os_path(git_path)?);
1021    let metadata = match fs::symlink_metadata(&absolute) {
1022        Ok(metadata) => metadata,
1023        Err(err)
1024            if matches!(
1025                err.kind(),
1026                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
1027            ) =>
1028        {
1029            return Ok(if ignore_removal {
1030                AddExactTrackedPathResult::Handled(None)
1031            } else {
1032                AddExactTrackedPathResult::Unsupported
1033            });
1034        }
1035        Err(err) => return Err(err.into()),
1036    };
1037    let file_type = metadata.file_type();
1038    if metadata.is_dir() || !(file_type.is_file() || file_type.is_symlink()) {
1039        return Ok(AddExactTrackedPathResult::Unsupported);
1040    }
1041    let index_mtime = file_mtime_parts(&index_metadata);
1042    let stat_cache = IndexStatCache::from_index_mtime_only(index_mtime);
1043    if stat_cache.reuse_index_entry(&entry, &metadata).is_some() {
1044        return Ok(AddExactTrackedPathResult::Handled(None));
1045    }
1046
1047    let odb = FileObjectDatabase::from_git_dir(git_dir, format);
1048    let is_symlink = file_type.is_symlink();
1049    let body = if is_symlink {
1050        symlink_target_bytes(&absolute)?
1051    } else {
1052        let body = fs::read(&absolute)?;
1053        // Resolve the effective config WITH command-line `-c` / `--config-env`
1054        // overrides folded in (e.g. upstream t0027's `git -c core.autocrlf=true
1055        // add`); the plain repo-config reader would drop them and the fast path
1056        // would convert/warn against the wrong EOL policy.
1057        let config = sley_config::read_repo_config(git_dir, config_parameters_env)
1058            .unwrap_or_default();
1059        let mut clean_filter = None;
1060        let clean_filter =
1061            tracked_only_clean_filter_with_config(&mut clean_filter, worktree_root, &config);
1062        clean_filter.read_attributes_for_path(worktree_root, git_path)?;
1063        let checks =
1064            clean_filter
1065                .matcher
1066                .attributes_for_path(git_path, &clean_filter.requested, false);
1067        // git's index update folds in `global_conv_flags_eol`, so `git add`
1068        // emits the `core.safecrlf` round-trip warning (default: warn). The
1069        // current index blob (`entry.oid`) drives the auto-crlf
1070        // `has_crlf_in_index` decision. Mirror the slow `add_update_tracked_path`
1071        // path here so the exact-patch fast path does not silently drop the
1072        // warning (upstream t0020 'safecrlf: print warning only once').
1073        let conv_flags = ConvFlags::from_config(&clean_filter.config);
1074        let index_blob = match conv_flags {
1075            ConvFlags::Off => SafeCrlfIndexBlob::None,
1076            _ => SafeCrlfIndexBlob::Lookup {
1077                odb: &odb,
1078                oid: entry.oid,
1079            },
1080        };
1081        apply_clean_filter_with_attributes_cow_safecrlf(
1082            &clean_filter.config,
1083            &checks,
1084            git_path,
1085            &body,
1086            conv_flags,
1087            index_blob,
1088        )?
1089        .into_owned()
1090    };
1091    let object = EncodedObject::new(ObjectType::Blob, body);
1092    let oid = object.object_id(format)?;
1093    if oid != entry.oid {
1094        odb.write_object(object)?;
1095    }
1096
1097    let mut updated_entry = index_entry_from_metadata(entry.path.clone(), oid, &metadata);
1098    if is_symlink {
1099        updated_entry.mode = 0o120000;
1100    }
1101    if updated_entry == entry {
1102        return Ok(AddExactTrackedPathResult::Handled(None));
1103    }
1104    if !raw_updated_entry_can_patch(&entry, &updated_entry, git_path) {
1105        return Ok(AddExactTrackedPathResult::Unsupported);
1106    }
1107    patch_raw_index_entry(&mut index_bytes, format, &raw, &updated_entry)?;
1108    fs::write(index_path, index_bytes)?;
1109    let changed = updated_entry.oid != entry.oid || updated_entry.mode != entry.mode;
1110    Ok(AddExactTrackedPathResult::Handled(
1111        changed.then(|| AddUpdateTrackedAction::Add(git_path.to_vec())),
1112    ))
1113}
1114
1115pub fn add_exact_tracked_path_with_index(
1116    worktree_root: impl AsRef<Path>,
1117    git_dir: impl AsRef<Path>,
1118    format: ObjectFormat,
1119    mut index: Index,
1120    git_path: &[u8],
1121) -> Result<Option<AddUpdateTrackedAction>> {
1122    let worktree_root = worktree_root.as_ref();
1123    let git_dir = git_dir.as_ref();
1124    let range = index_entries_path_range(&index.entries, git_path);
1125    if range.len() != 1 {
1126        return Ok(None);
1127    }
1128    let entry = &index.entries[range.start];
1129    if entry.stage() != Stage::Normal || index_entry_skip_worktree(entry) {
1130        return Ok(None);
1131    }
1132    let index_path = repository_index_path(git_dir);
1133    let index_mtime = fs::metadata(&index_path)
1134        .ok()
1135        .and_then(|metadata| file_mtime_parts(&metadata));
1136    let stat_cache = IndexStatCache::from_index_mtime_only(index_mtime);
1137    let odb = FileObjectDatabase::from_git_dir(git_dir, format);
1138    let mut clean_filter = None;
1139    let (action, dirty) = add_update_tracked_path(
1140        worktree_root,
1141        git_dir,
1142        format,
1143        None,
1144        &odb,
1145        &stat_cache,
1146        &mut clean_filter,
1147        &mut index,
1148        git_path,
1149    )?;
1150    if dirty {
1151        normalize_index_version_for_extended_flags(&mut index);
1152        index.extensions = index_extensions_without_cache_tree(&index.extensions);
1153        fs::write(index_path, index.write(format)?)?;
1154    }
1155    Ok(action)
1156}
1157
1158struct RawExactIndexEntry {
1159    version: u32,
1160    entry: IndexEntry,
1161    entry_start: usize,
1162    entries_end: usize,
1163    checksum_offset: usize,
1164}
1165
1166fn raw_exact_index_entry(
1167    bytes: &[u8],
1168    format: ObjectFormat,
1169    git_path: &[u8],
1170) -> Result<Option<RawExactIndexEntry>> {
1171    let hash_len = format.raw_len();
1172    if bytes.len() < 12 + hash_len {
1173        return Err(GitError::InvalidFormat("index header too short".into()));
1174    }
1175    let checksum_offset = bytes.len() - hash_len;
1176    let actual_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
1177    let expected_checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
1178    if actual_checksum != expected_checksum {
1179        return Err(GitError::InvalidFormat(format!(
1180            "index checksum mismatch: expected {expected_checksum}, got {actual_checksum}"
1181        )));
1182    }
1183    if &bytes[..4] != b"DIRC" {
1184        return Err(GitError::InvalidFormat("missing DIRC signature".into()));
1185    }
1186    let version = u32_from_be(&bytes[4..8]);
1187    if !(2..=3).contains(&version) {
1188        return Ok(None);
1189    }
1190    let count = u32_from_be(&bytes[8..12]) as usize;
1191    let mut offset = 12;
1192    let mut found = None;
1193    for _ in 0..count {
1194        let entry_header_len = 40 + hash_len + 2;
1195        if checksum_offset.saturating_sub(offset) < entry_header_len {
1196            return Err(GitError::InvalidFormat("truncated index entry".into()));
1197        }
1198        let start = offset;
1199        let oid_start = offset + 40;
1200        let oid_end = oid_start + hash_len;
1201        let flags = u16_from_be(&bytes[oid_end..oid_end + 2]);
1202        offset = oid_end + 2;
1203        let flags_extended = if flags & INDEX_FLAG_EXTENDED != 0 {
1204            if checksum_offset.saturating_sub(offset) < 2 {
1205                return Err(GitError::InvalidFormat(
1206                    "truncated index extended flags".into(),
1207                ));
1208            }
1209            let flags_extended = u16_from_be(&bytes[offset..offset + 2]);
1210            offset += 2;
1211            flags_extended
1212        } else {
1213            0
1214        };
1215        let path_start = offset;
1216        while bytes.get(offset).copied() != Some(0) {
1217            offset += 1;
1218            if offset >= checksum_offset {
1219                return Err(GitError::InvalidFormat("unterminated index path".into()));
1220            }
1221        }
1222        let path = &bytes[path_start..offset];
1223        offset += 1;
1224        while (offset - start) % 8 != 0 {
1225            offset += 1;
1226            if offset > checksum_offset {
1227                return Err(GitError::InvalidFormat("truncated index padding".into()));
1228            }
1229        }
1230        if path == git_path {
1231            if found.is_some() {
1232                return Ok(None);
1233            }
1234            let oid = ObjectId::from_raw(format, &bytes[oid_start..oid_end])?;
1235            found = Some(RawExactIndexEntry {
1236                version,
1237                entry: IndexEntry {
1238                    ctime_seconds: u32_from_be(&bytes[start..start + 4]),
1239                    ctime_nanoseconds: u32_from_be(&bytes[start + 4..start + 8]),
1240                    mtime_seconds: u32_from_be(&bytes[start + 8..start + 12]),
1241                    mtime_nanoseconds: u32_from_be(&bytes[start + 12..start + 16]),
1242                    dev: u32_from_be(&bytes[start + 16..start + 20]),
1243                    ino: u32_from_be(&bytes[start + 20..start + 24]),
1244                    mode: u32_from_be(&bytes[start + 24..start + 28]),
1245                    uid: u32_from_be(&bytes[start + 28..start + 32]),
1246                    gid: u32_from_be(&bytes[start + 32..start + 36]),
1247                    size: u32_from_be(&bytes[start + 36..start + 40]),
1248                    oid,
1249                    flags,
1250                    flags_extended,
1251                    path: BString::from(path),
1252                },
1253                entry_start: start,
1254                entries_end: 0,
1255                checksum_offset,
1256            });
1257        } else if found.is_none() && path > git_path {
1258            return Ok(None);
1259        }
1260    }
1261    if let Some(mut found) = found {
1262        found.entries_end = offset;
1263        Ok(Some(found))
1264    } else {
1265        Ok(None)
1266    }
1267}
1268
1269fn raw_exact_entry_can_patch(raw: &RawExactIndexEntry, git_path: &[u8]) -> bool {
1270    raw.version == 2
1271        && raw.entry.flags_extended == 0
1272        && raw.entry.flags & INDEX_FLAG_EXTENDED == 0
1273        && raw.entry.flags == index_flags(git_path.len(), 0)
1274        && raw.entry.path.as_bytes() == git_path
1275}
1276
1277fn raw_updated_entry_can_patch(
1278    previous: &IndexEntry,
1279    updated: &IndexEntry,
1280    git_path: &[u8],
1281) -> bool {
1282    updated.path.as_bytes() == git_path
1283        && updated.flags_extended == 0
1284        && updated.flags & INDEX_FLAG_EXTENDED == 0
1285        && updated.flags == previous.flags
1286}
1287
1288fn raw_index_extensions_are_filterable(bytes: &[u8], entries_end: usize, checksum_offset: usize) -> bool {
1289    let mut offset = entries_end;
1290    while offset < checksum_offset {
1291        if checksum_offset.saturating_sub(offset) < 8 {
1292            return false;
1293        }
1294        let size = u32_from_be(&bytes[offset + 4..offset + 8]) as usize;
1295        let Some(end) = offset.checked_add(8).and_then(|offset| offset.checked_add(size)) else {
1296            return false;
1297        };
1298        if end > checksum_offset {
1299            return false;
1300        }
1301        offset = end;
1302    }
1303    true
1304}
1305
1306fn patch_raw_index_entry(
1307    bytes: &mut Vec<u8>,
1308    format: ObjectFormat,
1309    raw: &RawExactIndexEntry,
1310    entry: &IndexEntry,
1311) -> Result<()> {
1312    let hash_len = format.raw_len();
1313    let start = raw.entry_start;
1314    bytes[start..start + 4].copy_from_slice(&entry.ctime_seconds.to_be_bytes());
1315    bytes[start + 4..start + 8].copy_from_slice(&entry.ctime_nanoseconds.to_be_bytes());
1316    bytes[start + 8..start + 12].copy_from_slice(&entry.mtime_seconds.to_be_bytes());
1317    bytes[start + 12..start + 16].copy_from_slice(&entry.mtime_nanoseconds.to_be_bytes());
1318    bytes[start + 16..start + 20].copy_from_slice(&entry.dev.to_be_bytes());
1319    bytes[start + 20..start + 24].copy_from_slice(&entry.ino.to_be_bytes());
1320    bytes[start + 24..start + 28].copy_from_slice(&entry.mode.to_be_bytes());
1321    bytes[start + 28..start + 32].copy_from_slice(&entry.uid.to_be_bytes());
1322    bytes[start + 32..start + 36].copy_from_slice(&entry.gid.to_be_bytes());
1323    bytes[start + 36..start + 40].copy_from_slice(&entry.size.to_be_bytes());
1324    bytes[start + 40..start + 40 + hash_len].copy_from_slice(entry.oid.as_bytes());
1325    bytes[start + 40 + hash_len..start + 40 + hash_len + 2]
1326        .copy_from_slice(&entry.flags.to_be_bytes());
1327
1328    let mut extension_offset = raw.entries_end;
1329    let mut removed_cache_tree = false;
1330    let mut rewritten = Vec::new();
1331    while extension_offset < raw.checksum_offset {
1332        let signature = &bytes[extension_offset..extension_offset + 4];
1333        let size = u32_from_be(&bytes[extension_offset + 4..extension_offset + 8]) as usize;
1334        let end = extension_offset + 8 + size;
1335        if signature == b"TREE" {
1336            removed_cache_tree = true;
1337        } else {
1338            rewritten.extend_from_slice(&bytes[extension_offset..end]);
1339        }
1340        extension_offset = end;
1341    }
1342
1343    if removed_cache_tree {
1344        bytes.truncate(raw.entries_end);
1345        bytes.extend_from_slice(&rewritten);
1346        let checksum = sley_core::digest_bytes(format, bytes)?;
1347        bytes.extend_from_slice(checksum.as_bytes());
1348    } else {
1349        let checksum = sley_core::digest_bytes(format, &bytes[..raw.checksum_offset])?;
1350        bytes[raw.checksum_offset..raw.checksum_offset + hash_len]
1351            .copy_from_slice(checksum.as_bytes());
1352    }
1353    Ok(())
1354}
1355
1356fn u32_from_be(bytes: &[u8]) -> u32 {
1357    u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
1358}
1359
1360fn u16_from_be(bytes: &[u8]) -> u16 {
1361    u16::from_be_bytes([bytes[0], bytes[1]])
1362}
1363
1364fn add_update_tracked_path(
1365    worktree_root: &Path,
1366    git_dir: &Path,
1367    format: ObjectFormat,
1368    clean_config: Option<&GitConfig>,
1369    odb: &FileObjectDatabase,
1370    stat_cache: &IndexStatCache,
1371    clean_filter: &mut Option<TrackedOnlyCleanFilter>,
1372    index: &mut Index,
1373    git_path: &[u8],
1374) -> Result<(Option<AddUpdateTrackedAction>, bool)> {
1375    let range = index_entries_path_range(&index.entries, git_path);
1376    if range.is_empty() {
1377        return Ok((None, false));
1378    }
1379    let entry = index.entries[range.start].clone();
1380    if entry.stage() != Stage::Normal {
1381        return Ok((None, false));
1382    }
1383    let absolute = worktree_root.join(repo_path_to_os_path(git_path)?);
1384    let metadata = match fs::symlink_metadata(&absolute) {
1385        Ok(metadata) => metadata,
1386        Err(err)
1387            if matches!(
1388                err.kind(),
1389                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
1390            ) =>
1391        {
1392            if remove_index_entries_with_path(&mut index.entries, git_path) {
1393                return Ok((
1394                    Some(AddUpdateTrackedAction::Remove(git_path.to_vec())),
1395                    true,
1396                ));
1397            }
1398            return Ok((None, false));
1399        }
1400        Err(err) => return Err(err.into()),
1401    };
1402    if metadata.is_dir() {
1403        if entry.mode != 0o160000 {
1404            return Ok((None, false));
1405        }
1406        let oid = sley_diff_merge::gitlink_head_oid(&absolute, format).unwrap_or(entry.oid);
1407        let mut updated_entry = index_entry_from_metadata(entry.path.clone(), oid, &metadata);
1408        updated_entry.mode = 0o160000;
1409        let changed = updated_entry.oid != entry.oid || updated_entry.mode != entry.mode;
1410        if updated_entry != entry {
1411            replace_index_entries_with_entry(&mut index.entries, updated_entry);
1412            return Ok((
1413                changed.then(|| AddUpdateTrackedAction::Add(git_path.to_vec())),
1414                true,
1415            ));
1416        }
1417        return Ok((None, false));
1418    }
1419    if !(metadata.is_file() || metadata.file_type().is_symlink()) {
1420        return Ok((None, false));
1421    }
1422    if stat_cache.reuse_index_entry(&entry, &metadata).is_some() {
1423        return Ok((None, false));
1424    }
1425
1426    let is_symlink = metadata.file_type().is_symlink();
1427    let body = if is_symlink {
1428        symlink_target_bytes(&absolute)?
1429    } else {
1430        let body = fs::read(&absolute)?;
1431        let clean_filter = match clean_config {
1432            Some(config) => {
1433                tracked_only_clean_filter_with_config(clean_filter, worktree_root, config)
1434            }
1435            None => tracked_only_clean_filter(clean_filter, worktree_root, git_dir),
1436        };
1437        clean_filter.read_attributes_for_path(worktree_root, git_path)?;
1438        let checks =
1439            clean_filter
1440                .matcher
1441                .attributes_for_path(git_path, &clean_filter.requested, false);
1442        // git's `add -u` index update folds in `global_conv_flags_eol`, so emit
1443        // the `core.safecrlf` round-trip warning (default: warn). The current
1444        // index blob (`entry.oid`) drives the auto-crlf `has_crlf_in_index`
1445        // decision.
1446        let conv_flags = ConvFlags::from_config(&clean_filter.config);
1447        let index_blob = match conv_flags {
1448            ConvFlags::Off => SafeCrlfIndexBlob::None,
1449            _ => SafeCrlfIndexBlob::Lookup {
1450                odb,
1451                oid: entry.oid,
1452            },
1453        };
1454        apply_clean_filter_with_attributes_cow_safecrlf(
1455            &clean_filter.config,
1456            &checks,
1457            git_path,
1458            &body,
1459            conv_flags,
1460            index_blob,
1461        )?
1462        .into_owned()
1463    };
1464    let object = EncodedObject::new(ObjectType::Blob, body);
1465    let oid = object.object_id(format)?;
1466    if oid != entry.oid {
1467        odb.write_object(object)?;
1468    }
1469    let mut updated_entry = index_entry_from_metadata(entry.path.clone(), oid, &metadata);
1470    if is_symlink {
1471        updated_entry.mode = 0o120000;
1472    }
1473    let changed = updated_entry.oid != entry.oid || updated_entry.mode != entry.mode;
1474    if updated_entry != entry {
1475        replace_index_entries_with_entry(&mut index.entries, updated_entry);
1476        return Ok((
1477            changed.then(|| AddUpdateTrackedAction::Add(git_path.to_vec())),
1478            true,
1479        ));
1480    }
1481    Ok((None, false))
1482}
1483
1484enum UpdateIndexCleanFilter {
1485    Full(AttributeMatcher),
1486    PathLocal,
1487}
1488
1489fn index_entries_path_range(entries: &[IndexEntry], path: &[u8]) -> std::ops::Range<usize> {
1490    let mut start = match entries.binary_search_by(|entry| entry.path.as_bytes().cmp(path)) {
1491        Ok(index) => index,
1492        Err(insert) => return insert..insert,
1493    };
1494    while start > 0 && entries[start - 1].path.as_bytes() == path {
1495        start -= 1;
1496    }
1497    let mut end = start;
1498    while end < entries.len() && entries[end].path.as_bytes() == path {
1499        end += 1;
1500    }
1501    start..end
1502}
1503
1504fn remove_index_entries_with_path(entries: &mut Vec<IndexEntry>, path: &[u8]) -> bool {
1505    let range = index_entries_path_range(entries, path);
1506    if range.is_empty() {
1507        return false;
1508    }
1509    entries.drain(range);
1510    true
1511}
1512
1513fn replace_index_entries_with_entry(entries: &mut Vec<IndexEntry>, entry: IndexEntry) {
1514    let path = entry.path.as_bytes().to_vec();
1515    let range = index_entries_path_range(entries, &path);
1516    if range.is_empty() {
1517        entries.insert(range.start, entry);
1518    } else {
1519        entries.splice(range, [entry]);
1520    }
1521}
1522
1523fn update_index_paths_impl(
1524    worktree_root: &Path,
1525    git_dir: &Path,
1526    format: ObjectFormat,
1527    mut index: Index,
1528    paths: &[UpdateIndexPath],
1529    options: UpdateIndexOptions,
1530    clean_config: Option<&GitConfig>,
1531    verbose: bool,
1532) -> Result<UpdateIndexResult> {
1533    let index_path = repository_index_path(git_dir);
1534    let odb = FileObjectDatabase::from_git_dir(git_dir, format);
1535    // For small batches, read only each path's `.gitattributes` chain; a
1536    // whole-worktree matcher can dominate `add -u` when only a few files are
1537    // dirty in a huge checkout. Large batches still amortize the full matcher.
1538    let clean_filter = match clean_config {
1539        Some(_) if paths.len() >= 64 => Some(UpdateIndexCleanFilter::Full(
1540            AttributeMatcher::from_worktree_root(worktree_root)?,
1541        )),
1542        Some(_) => Some(UpdateIndexCleanFilter::PathLocal),
1543        None => None,
1544    };
1545    // git's index-update path (object-file.c `get_conv_flags`) folds in
1546    // `global_conv_flags_eol`, so `git add`/`commit` emit the `core.safecrlf`
1547    // round-trip warning (default: warn). It only applies when content filters
1548    // run at all (i.e. when we have a config).
1549    let conv_flags = clean_config.map_or(ConvFlags::Off, ConvFlags::from_config);
1550    let requested_filter_attrs = filter_attribute_names();
1551    let mut updated = Vec::new();
1552    let mut reports: Vec<String> = Vec::new();
1553    for update_path in paths {
1554        let path = &update_path.path;
1555        // Each path carries the sticky mode that was in effect when it was
1556        // parsed on the command line (git processes argv left-to-right). Read
1557        // the action from the path's own mode, NOT a batch-wide flag, so
1558        // `--add foo --force-remove bar` adds foo and force-removes bar.
1559        let path_mode = update_path.mode;
1560        let path_chmod = path_mode.chmod;
1561        let absolute = if path.is_absolute() {
1562            path.clone()
1563        } else {
1564            worktree_root.join(path)
1565        };
1566        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
1567            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
1568        })?;
1569        let git_path = git_path_bytes(relative)?;
1570        if path_mode.force_remove {
1571            remove_index_entries_with_path(&mut index.entries, &git_path);
1572            // git's update_one() reports `remove` for a --force-remove path.
1573            reports.push(format!("remove '{}'", String::from_utf8_lossy(&git_path)));
1574            continue;
1575        }
1576        let existing_range = index_entries_path_range(&index.entries, &git_path);
1577        if index.entries[existing_range.clone()]
1578            .iter()
1579            .any(index_entry_skip_worktree)
1580        {
1581            if path_mode.remove && !options.ignore_skip_worktree_entries {
1582                index.entries.drain(existing_range);
1583            }
1584            continue;
1585        }
1586        // lstat (not stat): a symlink must be inspected as the link itself, never
1587        // followed to its target. `Path::exists`/`fs::metadata` both stat through
1588        // the link, which makes a symlink-to-directory look like a directory
1589        // (fs::read then fails with "Is a directory") and a symlink-to-file get
1590        // staged with the target's content + a regular-file mode. git stages a
1591        // symlink as mode 120000 whose blob is the link target string, regardless
1592        // of what (if anything) the target resolves to.
1593        let symlink_metadata = match fs::symlink_metadata(&absolute) {
1594            Ok(metadata) => Some(metadata),
1595            Err(err) if err.kind() == std::io::ErrorKind::NotFound => None,
1596            Err(err) => return Err(err.into()),
1597        };
1598        let Some(metadata) = symlink_metadata else {
1599            if path_mode.remove {
1600                remove_index_entries_with_path(&mut index.entries, &git_path);
1601                // git's update_one() unconditionally reports `add '<path>'`
1602                // after process_path(), even when the missing file was removed
1603                // from the index via the `--remove` (not --force-remove) path.
1604                reports.push(format!("add '{}'", String::from_utf8_lossy(&git_path)));
1605                continue;
1606            }
1607            print_update_index_path_error(&git_path, "does not exist and --remove not passed");
1608            return Err(GitError::Exit(128));
1609        };
1610        if !path_mode.add && index_entries_path_range(&index.entries, &git_path).is_empty() {
1611            print_update_index_path_error(
1612                &git_path,
1613                "cannot add to the index - missing --add option?",
1614            );
1615            return Err(GitError::Exit(128));
1616        }
1617        if metadata.is_dir() {
1618            // A directory is stageable only as a gitlink: when it is an
1619            // embedded repository with a commit checked out, git records a
1620            // mode-160000 entry whose oid is that commit (no object is
1621            // written). Otherwise it errors — with upstream's exact messages
1622            // for the embedded-repo-without-commit and plain-directory cases
1623            // (object-file.c index_path / builtin/update-index.c
1624            // process_directory).
1625            let display = String::from_utf8_lossy(&git_path).into_owned();
1626            let has_dot_git = absolute.join(".git").exists();
1627            let Some(head_oid) = sley_diff_merge::gitlink_head_oid(&absolute, format) else {
1628                if has_dot_git {
1629                    eprintln!("error: '{display}' does not have a commit checked out");
1630                } else {
1631                    eprintln!("error: {display}: is a directory - add files inside instead");
1632                }
1633                eprintln!("fatal: Unable to process path {display}");
1634                return Err(GitError::Exit(128));
1635            };
1636            if path_chmod.is_some() {
1637                eprintln!(
1638                    "fatal: git update-index: cannot chmod {}x '{display}'",
1639                    if path_chmod == Some(true) { '+' } else { '-' },
1640                );
1641                return Err(GitError::Exit(128));
1642            }
1643            let mut entry = index_entry_from_metadata(git_path.clone(), head_oid, &metadata);
1644            entry.mode = 0o160000;
1645            reports.push(format!("add '{display}'"));
1646            replace_index_entries_with_entry(&mut index.entries, entry);
1647            updated.push(head_oid);
1648            continue;
1649        }
1650        let is_symlink = metadata.file_type().is_symlink();
1651        let body = if is_symlink {
1652            // The blob is the raw link target bytes; clean filters never apply to
1653            // a symlink (git treats it as binary content, not a text path).
1654            symlink_target_bytes(&absolute)?
1655        } else {
1656            let body = fs::read(&absolute)?;
1657            // The safecrlf auto-crlf decision needs the path's *current* index
1658            // blob (git's `has_crlf_in_index`); the stage-0 entry, if any, has it.
1659            let index_blob = match conv_flags {
1660                ConvFlags::Off => SafeCrlfIndexBlob::None,
1661                _ => stage0_oid_in_range(&index.entries, existing_range.clone())
1662                    .map_or(SafeCrlfIndexBlob::None, |oid| SafeCrlfIndexBlob::Lookup {
1663                        odb: &odb,
1664                        oid,
1665                    }),
1666            };
1667            match (clean_config, &clean_filter) {
1668                (Some(config), Some(UpdateIndexCleanFilter::Full(matcher))) => {
1669                    // Identical to `apply_clean_filter`, but reuses the batch's
1670                    // matcher instead of rebuilding it (and re-walking the tree)
1671                    // for this path.
1672                    let checks =
1673                        matcher.attributes_for_path(&git_path, &requested_filter_attrs, false);
1674                    apply_clean_filter_with_attributes_cow_safecrlf(
1675                        config, &checks, &git_path, &body, conv_flags, index_blob,
1676                    )?
1677                    .into_owned()
1678                }
1679                (Some(config), Some(UpdateIndexCleanFilter::PathLocal)) => {
1680                    let checks = filter_attribute_checks(worktree_root, &git_path)?;
1681                    apply_clean_filter_with_attributes_cow_safecrlf(
1682                        config, &checks, &git_path, &body, conv_flags, index_blob,
1683                    )?
1684                    .into_owned()
1685                }
1686                _ => body,
1687            }
1688        };
1689        let object = EncodedObject::new(ObjectType::Blob, body);
1690        let oid = if path_mode.info_only {
1691            object.object_id(format)?
1692        } else {
1693            odb.write_object(object)?
1694        };
1695        let mut entry = index_entry_from_metadata(git_path.clone(), oid, &metadata);
1696        if is_symlink {
1697            entry.mode = 0o120000;
1698        }
1699        // git's update_one() reports `add` for every staged path (whether the
1700        // entry is new or an update), then chmod_path() reports the chmod after.
1701        reports.push(format!("add '{}'", String::from_utf8_lossy(&git_path)));
1702        if let Some(executable) = path_chmod {
1703            // git's chmod_path() refuses to flip the executable bit on anything
1704            // that is not a regular file (a symlink/gitlink has no such bit). It
1705            // writes the blob first, then errors with this exact message and
1706            // leaves the index untouched.
1707            if is_symlink {
1708                eprintln!(
1709                    "fatal: git update-index: cannot chmod {}x '{}'",
1710                    if executable { '+' } else { '-' },
1711                    String::from_utf8_lossy(&git_path)
1712                );
1713                return Err(GitError::Exit(128));
1714            }
1715            entry.mode = if executable { 0o100755 } else { 0o100644 };
1716            reports.push(format!(
1717                "chmod {}x '{}'",
1718                if executable { '+' } else { '-' },
1719                String::from_utf8_lossy(&git_path)
1720            ));
1721        }
1722        replace_index_entries_with_entry(&mut index.entries, entry);
1723        updated.push(oid);
1724    }
1725    normalize_index_version_for_extended_flags(&mut index);
1726    index.extensions = index_extensions_without_cache_tree(&index.extensions);
1727    fs::write(index_path, index.write(format)?)?;
1728    if verbose {
1729        let mut stdout = std::io::stdout().lock();
1730        for line in &reports {
1731            writeln!(stdout, "{line}")?;
1732        }
1733        stdout.flush()?;
1734    }
1735    Ok(UpdateIndexResult {
1736        entries: index.entries.len(),
1737        updated,
1738    })
1739}
1740
1741pub fn refresh_index_paths(
1742    worktree_root: impl AsRef<Path>,
1743    git_dir: impl AsRef<Path>,
1744    format: ObjectFormat,
1745    paths: &[PathBuf],
1746    quiet: bool,
1747    ignore_missing: bool,
1748    really_refresh: bool,
1749) -> Result<UpdateIndexResult> {
1750    let worktree_root = worktree_root.as_ref();
1751    let git_dir = git_dir.as_ref();
1752    let index_path = repository_index_path(git_dir);
1753    if !index_path.exists() {
1754        return Ok(UpdateIndexResult {
1755            entries: 0,
1756            updated: Vec::new(),
1757        });
1758    }
1759    let mut index = Index::parse(&fs::read(&index_path)?, format)?;
1760    // git's `update-index --refresh` trusts the cached stat: a stage-0 entry
1761    // whose size+mtime still match the worktree file (and is not racily clean) is
1762    // known unchanged, so its content is NOT re-read or re-hashed
1763    // (read-cache.c `refresh_cache_ent` → `ie_match_stat`). Without this shortcut
1764    // sley re-hashed every tracked file on every refresh — the 3.2x slowdown in
1765    // sley#27. We build the cache from the same parsed index + the index file's
1766    // own mtime (the racy-clean reference) so no extra parse is needed.
1767    let index_mtime = fs::metadata(&index_path)
1768        .ok()
1769        .and_then(|metadata| file_mtime_parts(&metadata));
1770    let stat_cache = IndexStatCache::from_index_mtime_only(index_mtime);
1771    let selected_paths = paths
1772        .iter()
1773        .map(|path| {
1774            let absolute = if path.is_absolute() {
1775                path.clone()
1776            } else {
1777                worktree_root.join(path)
1778            };
1779            let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
1780                GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
1781            })?;
1782            git_path_bytes(relative)
1783        })
1784        .collect::<Result<Vec<_>>>()?;
1785    let selected_paths = selected_paths.into_iter().collect::<BTreeSet<_>>();
1786    if selected_paths.is_empty()
1787        && !really_refresh
1788        && !index
1789            .entries
1790            .iter()
1791            .any(|entry| entry.flags & INDEX_FLAG_ASSUME_UNCHANGED != 0)
1792    {
1793        return refresh_all_index_paths_parallel(
1794            worktree_root,
1795            &index_path,
1796            format,
1797            index,
1798            stat_cache,
1799            quiet,
1800            ignore_missing,
1801        );
1802    }
1803    let mut needs_update = false;
1804    let mut index_dirty = false;
1805    for entry in &mut index.entries {
1806        if index_entry_stage(entry) != 0 {
1807            continue;
1808        }
1809        let selected_for_update =
1810            !selected_paths.is_empty() && selected_paths.contains(entry.path.as_bytes());
1811        if entry.flags & INDEX_FLAG_ASSUME_UNCHANGED != 0 {
1812            if !really_refresh {
1813                continue;
1814            }
1815            entry.flags &= !INDEX_FLAG_ASSUME_UNCHANGED;
1816            index_dirty = true;
1817        }
1818        let absolute = worktree_root.join(repo_path_to_os_path(entry.path.as_bytes())?);
1819        let Ok(metadata) = fs::metadata(&absolute) else {
1820            if ignore_missing {
1821                continue;
1822            }
1823            if !quiet {
1824                print_update_index_needs_update(entry.path.as_bytes());
1825            }
1826            needs_update = true;
1827            continue;
1828        };
1829        if !metadata.is_file() {
1830            if !quiet {
1831                print_update_index_needs_update(entry.path.as_bytes());
1832            }
1833            needs_update = true;
1834            continue;
1835        }
1836        // Stat shortcut: when the cached stat proves the file is unchanged since
1837        // it was staged, its content hashes to the cached oid by construction
1838        // (see `IndexStatCache`'s safety invariant). Skip the read+hash and just
1839        // refresh the stat fields from current metadata — byte-identical to the
1840        // clean arm below, since the oid stamped is the cached one and the
1841        // metadata is the same one that re-stamp would read.
1842        if stat_cache
1843            .reuse_index_entry(entry, &metadata)
1844            .is_some()
1845        {
1846            continue;
1847        }
1848        let body = fs::read(&absolute)?;
1849        let object = EncodedObject::new(ObjectType::Blob, body);
1850        let oid = object.object_id(format)?;
1851        if oid != entry.oid || file_mode(&metadata) != entry.mode {
1852            if !quiet {
1853                print_update_index_needs_update(entry.path.as_bytes());
1854            }
1855            needs_update = true;
1856            if selected_for_update {
1857                let updated_entry = index_entry_from_metadata(entry.path.clone(), oid, &metadata);
1858                if updated_entry != *entry {
1859                    *entry = updated_entry;
1860                    index_dirty = true;
1861                }
1862            }
1863            continue;
1864        }
1865        let updated_entry = index_entry_from_metadata(entry.path.clone(), oid, &metadata);
1866        if updated_entry != *entry {
1867            *entry = updated_entry;
1868            index_dirty = true;
1869        }
1870    }
1871    if index_dirty {
1872        fs::write(&index_path, index.write(format)?)?;
1873    }
1874    if needs_update && !quiet {
1875        return Err(GitError::Exit(1));
1876    }
1877    Ok(UpdateIndexResult {
1878        entries: index.entries.len(),
1879        updated: Vec::new(),
1880    })
1881}
1882
1883fn refresh_all_index_paths_parallel(
1884    worktree_root: &Path,
1885    index_path: &Path,
1886    format: ObjectFormat,
1887    mut index: Index,
1888    stat_cache: IndexStatCache,
1889    quiet: bool,
1890    ignore_missing: bool,
1891) -> Result<UpdateIndexResult> {
1892    let prechecks = tracked_only_non_clean_prechecks_parallel(worktree_root, &index, &stat_cache)?;
1893    let mut needs_update = false;
1894    let mut index_dirty = false;
1895    for precheck in prechecks {
1896        match precheck {
1897            TrackedOnlyPrecheck::Deleted(idx) => {
1898                if ignore_missing {
1899                    continue;
1900                }
1901                if !quiet {
1902                    print_update_index_needs_update(index.entries[idx].path.as_bytes());
1903                }
1904                needs_update = true;
1905            }
1906            TrackedOnlyPrecheck::Slow(idx) => {
1907                let entry = &mut index.entries[idx];
1908                let path = entry.path.as_bytes().to_vec();
1909                let absolute = worktree_root.join(repo_path_to_os_path(&path)?);
1910                let Ok(metadata) = fs::metadata(&absolute) else {
1911                    if ignore_missing {
1912                        continue;
1913                    }
1914                    if !quiet {
1915                        print_update_index_needs_update(&path);
1916                    }
1917                    needs_update = true;
1918                    continue;
1919                };
1920                if !metadata.is_file() {
1921                    if !quiet {
1922                        print_update_index_needs_update(&path);
1923                    }
1924                    needs_update = true;
1925                    continue;
1926                }
1927                if stat_cache.reuse_index_entry(entry, &metadata).is_some() {
1928                    continue;
1929                }
1930                let body = fs::read(&absolute)?;
1931                let object = EncodedObject::new(ObjectType::Blob, body);
1932                let oid = object.object_id(format)?;
1933                if oid != entry.oid || file_mode(&metadata) != entry.mode {
1934                    if !quiet {
1935                        print_update_index_needs_update(&path);
1936                    }
1937                    needs_update = true;
1938                    continue;
1939                }
1940                let updated_entry = index_entry_from_metadata(entry.path.clone(), oid, &metadata);
1941                if updated_entry != *entry {
1942                    *entry = updated_entry;
1943                    index_dirty = true;
1944                }
1945            }
1946        }
1947    }
1948    if index_dirty {
1949        fs::write(index_path, index.write(format)?)?;
1950    }
1951    if needs_update && !quiet {
1952        return Err(GitError::Exit(1));
1953    }
1954    Ok(UpdateIndexResult {
1955        entries: index.entries.len(),
1956        updated: Vec::new(),
1957    })
1958}
1959
1960pub fn update_index_again(
1961    worktree_root: impl AsRef<Path>,
1962    git_dir: impl AsRef<Path>,
1963    format: ObjectFormat,
1964    paths: &[PathBuf],
1965    options: UpdateIndexOptions,
1966) -> Result<UpdateIndexResult> {
1967    let worktree_root = worktree_root.as_ref();
1968    let git_dir = git_dir.as_ref();
1969    let index_path = repository_index_path(git_dir);
1970    if !index_path.exists() {
1971        return Ok(UpdateIndexResult {
1972            entries: 0,
1973            updated: Vec::new(),
1974        });
1975    }
1976    let index = Index::parse(&fs::read(&index_path)?, format)?;
1977    let db = FileObjectDatabase::from_git_dir(git_dir, format);
1978    let head_entries = head_tree_entries(git_dir, format, &db)?;
1979    let selected_paths = selected_git_paths(worktree_root, paths)?;
1980    let mut again_paths = Vec::new();
1981    for entry in &index.entries {
1982        if index_entry_stage(entry) != 0 {
1983            continue;
1984        }
1985        if !selected_paths.is_empty() && !git_path_selected(entry.path.as_bytes(), &selected_paths)
1986        {
1987            continue;
1988        }
1989        let differs_from_head = match head_entries.get(entry.path.as_bytes()) {
1990            Some(head_entry) => head_entry.oid != entry.oid || head_entry.mode != entry.mode,
1991            None => true,
1992        };
1993        if differs_from_head {
1994            again_paths.push(worktree_root.join(repo_path_to_os_path(entry.path.as_bytes())?));
1995        }
1996    }
1997    if again_paths.is_empty() {
1998        return Ok(UpdateIndexResult {
1999            entries: index.entries.len(),
2000            updated: Vec::new(),
2001        });
2002    }
2003    update_index_paths(worktree_root, git_dir, format, &again_paths, options)
2004}
2005
2006pub fn set_index_assume_unchanged_paths(
2007    worktree_root: impl AsRef<Path>,
2008    git_dir: impl AsRef<Path>,
2009    format: ObjectFormat,
2010    paths: &[PathBuf],
2011    assume_unchanged: bool,
2012) -> Result<UpdateIndexResult> {
2013    let worktree_root = worktree_root.as_ref();
2014    let git_dir = git_dir.as_ref();
2015    let index_path = repository_index_path(git_dir);
2016    let mut index = if index_path.exists() {
2017        Index::parse(&fs::read(&index_path)?, format)?
2018    } else {
2019        Index {
2020            version: 2,
2021            entries: Vec::new(),
2022            extensions: Vec::new(),
2023            checksum: None,
2024        }
2025    };
2026    let selected_paths = paths
2027        .iter()
2028        .map(|path| {
2029            let absolute = if path.is_absolute() {
2030                path.clone()
2031            } else {
2032                worktree_root.join(path)
2033            };
2034            let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
2035                GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
2036            })?;
2037            git_path_bytes(relative)
2038        })
2039        .collect::<Result<Vec<_>>>()?;
2040    for path in selected_paths {
2041        if let Some(entry) = index.entries.iter_mut().find(|entry| entry.path == path) {
2042            if assume_unchanged {
2043                entry.flags |= INDEX_FLAG_ASSUME_UNCHANGED;
2044            } else {
2045                entry.flags &= !INDEX_FLAG_ASSUME_UNCHANGED;
2046            }
2047        }
2048    }
2049    normalize_index_version_for_extended_flags(&mut index);
2050    fs::write(index_path, index.write(format)?)?;
2051    Ok(UpdateIndexResult {
2052        entries: index.entries.len(),
2053        updated: Vec::new(),
2054    })
2055}
2056
2057fn selected_git_paths(worktree_root: &Path, paths: &[PathBuf]) -> Result<BTreeSet<Vec<u8>>> {
2058    paths
2059        .iter()
2060        .map(|path| {
2061            let absolute = if path.is_absolute() {
2062                path.clone()
2063            } else {
2064                worktree_root.join(path)
2065            };
2066            let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
2067                GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
2068            })?;
2069            git_path_bytes(relative)
2070        })
2071        .collect()
2072}
2073
2074fn git_path_selected(path: &[u8], selected_paths: &BTreeSet<Vec<u8>>) -> bool {
2075    selected_paths
2076        .iter()
2077        .any(|selected| path == selected || index_entry_is_under_path(path, selected))
2078}
2079
2080pub fn set_index_skip_worktree_paths(
2081    worktree_root: impl AsRef<Path>,
2082    git_dir: impl AsRef<Path>,
2083    format: ObjectFormat,
2084    paths: &[PathBuf],
2085    skip_worktree: bool,
2086) -> Result<UpdateIndexResult> {
2087    let worktree_root = worktree_root.as_ref();
2088    let git_dir = git_dir.as_ref();
2089    let index_path = repository_index_path(git_dir);
2090    let mut index = if index_path.exists() {
2091        Index::parse(&fs::read(&index_path)?, format)?
2092    } else {
2093        Index {
2094            version: 2,
2095            entries: Vec::new(),
2096            extensions: Vec::new(),
2097            checksum: None,
2098        }
2099    };
2100    let selected_paths = paths
2101        .iter()
2102        .map(|path| {
2103            let absolute = if path.is_absolute() {
2104                path.clone()
2105            } else {
2106                worktree_root.join(path)
2107            };
2108            let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
2109                GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
2110            })?;
2111            git_path_bytes(relative)
2112        })
2113        .collect::<Result<Vec<_>>>()?;
2114    for path in selected_paths {
2115        if let Some(entry) = index.entries.iter_mut().find(|entry| entry.path == path) {
2116            if skip_worktree {
2117                entry.flags |= INDEX_FLAG_EXTENDED;
2118                entry.flags_extended |= INDEX_EXTENDED_FLAG_SKIP_WORKTREE;
2119            } else {
2120                entry.flags_extended &= !INDEX_EXTENDED_FLAG_SKIP_WORKTREE;
2121                if entry.flags_extended == 0 {
2122                    entry.flags &= !INDEX_FLAG_EXTENDED;
2123                }
2124            }
2125        }
2126    }
2127    normalize_index_version_for_extended_flags(&mut index);
2128    fs::write(index_path, index.write(format)?)?;
2129    Ok(UpdateIndexResult {
2130        entries: index.entries.len(),
2131        updated: Vec::new(),
2132    })
2133}
2134
2135pub fn set_index_fsmonitor_valid_paths(
2136    worktree_root: impl AsRef<Path>,
2137    git_dir: impl AsRef<Path>,
2138    format: ObjectFormat,
2139    paths: &[PathBuf],
2140    _fsmonitor_valid: bool,
2141) -> Result<UpdateIndexResult> {
2142    let worktree_root = worktree_root.as_ref();
2143    let git_dir = git_dir.as_ref();
2144    let index_path = repository_index_path(git_dir);
2145    let index = if index_path.exists() {
2146        Index::parse(&fs::read(&index_path)?, format)?
2147    } else {
2148        Index {
2149            version: 2,
2150            entries: Vec::new(),
2151            extensions: Vec::new(),
2152            checksum: None,
2153        }
2154    };
2155    let selected_paths = paths
2156        .iter()
2157        .map(|path| {
2158            let absolute = if path.is_absolute() {
2159                path.clone()
2160            } else {
2161                worktree_root.join(path)
2162            };
2163            let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
2164                GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
2165            })?;
2166            git_path_bytes(relative)
2167        })
2168        .collect::<Result<Vec<_>>>()?;
2169    for path in selected_paths {
2170        if !index.entries.iter().any(|entry| entry.path == path) {
2171            eprintln!(
2172                "fatal: Unable to mark file {}",
2173                String::from_utf8_lossy(&path)
2174            );
2175            return Err(GitError::Exit(128));
2176        }
2177    }
2178    Ok(UpdateIndexResult {
2179        entries: index.entries.len(),
2180        updated: Vec::new(),
2181    })
2182}
2183
2184pub fn set_index_version(
2185    git_dir: impl AsRef<Path>,
2186    format: ObjectFormat,
2187    version: u32,
2188    verbose: bool,
2189) -> Result<UpdateIndexResult> {
2190    if !matches!(version, 2..=4) {
2191        return Err(GitError::Unsupported(format!(
2192            "update-index currently supports --index-version 2, 3, or 4, got {version}"
2193        )));
2194    }
2195    let git_dir = git_dir.as_ref();
2196    let index_path = repository_index_path(git_dir);
2197    let mut index = if index_path.exists() {
2198        Index::parse(&fs::read(&index_path)?, format)?
2199    } else {
2200        Index {
2201            version: 2,
2202            entries: Vec::new(),
2203            extensions: Vec::new(),
2204            checksum: None,
2205        }
2206    };
2207    // git reports the transition unconditionally under --verbose, even when the
2208    // requested version equals the current one ("was 4, set to 4").
2209    let previous = index.version;
2210    if verbose {
2211        println!("index-version: was {previous}, set to {version}");
2212    }
2213    index.version = version;
2214    normalize_index_version_for_extended_flags(&mut index);
2215    fs::write(index_path, index.write(format)?)?;
2216    Ok(UpdateIndexResult {
2217        entries: index.entries.len(),
2218        updated: Vec::new(),
2219    })
2220}
2221
2222pub fn force_write_index(
2223    git_dir: impl AsRef<Path>,
2224    format: ObjectFormat,
2225) -> Result<UpdateIndexResult> {
2226    let git_dir = git_dir.as_ref();
2227    let index_path = repository_index_path(git_dir);
2228    let mut index = if index_path.exists() {
2229        Index::parse(&fs::read(&index_path)?, format)?
2230    } else {
2231        Index {
2232            version: 2,
2233            entries: Vec::new(),
2234            extensions: Vec::new(),
2235            checksum: None,
2236        }
2237    };
2238    normalize_index_version_for_extended_flags(&mut index);
2239    fs::write(index_path, index.write(format)?)?;
2240    Ok(UpdateIndexResult {
2241        entries: index.entries.len(),
2242        updated: Vec::new(),
2243    })
2244}
2245
2246fn index_extensions_without_cache_tree(extensions: &[u8]) -> Vec<u8> {
2247    let mut offset = 0;
2248    let mut filtered = Vec::new();
2249    while offset < extensions.len() {
2250        if extensions.len().saturating_sub(offset) < 8 {
2251            return Vec::new();
2252        }
2253        let signature = &extensions[offset..offset + 4];
2254        let size = u32::from_be_bytes([
2255            extensions[offset + 4],
2256            extensions[offset + 5],
2257            extensions[offset + 6],
2258            extensions[offset + 7],
2259        ]) as usize;
2260        let end = offset + 8 + size;
2261        if end > extensions.len() {
2262            return Vec::new();
2263        }
2264        if signature != b"TREE" {
2265            filtered.extend_from_slice(&extensions[offset..end]);
2266        }
2267        offset = end;
2268    }
2269    filtered
2270}
2271
2272pub fn update_index_cacheinfo(
2273    git_dir: impl AsRef<Path>,
2274    format: ObjectFormat,
2275    entries: &[CacheInfoEntry],
2276    add: bool,
2277    verbose: bool,
2278) -> Result<UpdateIndexResult> {
2279    let git_dir = git_dir.as_ref();
2280    let index_path = repository_index_path(git_dir);
2281    let mut index = if index_path.exists() {
2282        Index::parse(&fs::read(&index_path)?, format)?
2283    } else {
2284        Index {
2285            version: 2,
2286            entries: Vec::new(),
2287            extensions: Vec::new(),
2288            checksum: None,
2289        }
2290    };
2291    let mut updated = Vec::new();
2292    let mut reports: Vec<String> = Vec::new();
2293    for cacheinfo in entries {
2294        if !add
2295            && !index
2296                .entries
2297                .iter()
2298                .any(|existing| existing.path == cacheinfo.path)
2299        {
2300            let path = String::from_utf8_lossy(&cacheinfo.path);
2301            eprintln!("error: {path}: cannot add to the index - missing --add option?");
2302            eprintln!("fatal: git update-index: --cacheinfo cannot add {path}");
2303            return Err(GitError::Exit(128));
2304        }
2305        let flags = index_flags(cacheinfo.path.len(), cacheinfo.stage);
2306        let entry = IndexEntry {
2307            ctime_seconds: 0,
2308            ctime_nanoseconds: 0,
2309            mtime_seconds: 0,
2310            mtime_nanoseconds: 0,
2311            dev: 0,
2312            ino: 0,
2313            mode: cacheinfo.mode,
2314            uid: 0,
2315            gid: 0,
2316            size: 0,
2317            oid: cacheinfo.oid,
2318            flags,
2319            flags_extended: 0,
2320            path: BString::from(cacheinfo.path.as_slice()),
2321        };
2322        index.entries.retain(|existing| {
2323            existing.path != cacheinfo.path || index_entry_stage(existing) != cacheinfo.stage
2324        });
2325        index.entries.push(entry);
2326        updated.push(cacheinfo.oid);
2327        // git's add_cacheinfo() calls report("add '%s'") *after* the entry is
2328        // staged, regardless of whether the subsequent index write succeeds.
2329        reports.push(format!(
2330            "add '{}'",
2331            String::from_utf8_lossy(&cacheinfo.path)
2332        ));
2333    }
2334    index
2335        .entries
2336        .sort_by(|left, right| left.path.cmp(&right.path));
2337    // git refuses to write an index entry whose object id is the null oid:
2338    // do_write_index() emits `error: cache entry has null sha1: <path>` and
2339    // returns nonzero, leaving the on-disk index untouched. The verbose `add`
2340    // line has already been printed by then.
2341    let null_entry = index.entries.iter().find(|entry| entry.oid.is_null());
2342    if let Some(entry) = null_entry {
2343        if verbose {
2344            flush_update_index_reports(&reports)?;
2345        }
2346        eprintln!(
2347            "error: cache entry has null sha1: {}",
2348            String::from_utf8_lossy(&entry.path)
2349        );
2350        return Err(GitError::Exit(128));
2351    }
2352    fs::write(index_path, index.write(format)?)?;
2353    if verbose {
2354        flush_update_index_reports(&reports)?;
2355    }
2356    Ok(UpdateIndexResult {
2357        entries: index.entries.len(),
2358        updated,
2359    })
2360}
2361
2362fn flush_update_index_reports(reports: &[String]) -> Result<()> {
2363    let mut stdout = std::io::stdout().lock();
2364    for line in reports {
2365        writeln!(stdout, "{line}")?;
2366    }
2367    stdout.flush()?;
2368    Ok(())
2369}
2370
2371pub fn update_index_index_info(
2372    git_dir: impl AsRef<Path>,
2373    format: ObjectFormat,
2374    records: &[IndexInfoRecord],
2375) -> Result<UpdateIndexResult> {
2376    let git_dir = git_dir.as_ref();
2377    let index_path = repository_index_path(git_dir);
2378    let mut index = if index_path.exists() {
2379        Index::parse(&fs::read(&index_path)?, format)?
2380    } else {
2381        Index {
2382            version: 2,
2383            entries: Vec::new(),
2384            extensions: Vec::new(),
2385            checksum: None,
2386        }
2387    };
2388    let mut updated = Vec::new();
2389    for record in records {
2390        match record {
2391            IndexInfoRecord::Remove { path } => {
2392                index.entries.retain(|existing| existing.path != *path);
2393            }
2394            IndexInfoRecord::Add(cacheinfo) => {
2395                let flags = index_flags(cacheinfo.path.len(), cacheinfo.stage);
2396                let entry = IndexEntry {
2397                    ctime_seconds: 0,
2398                    ctime_nanoseconds: 0,
2399                    mtime_seconds: 0,
2400                    mtime_nanoseconds: 0,
2401                    dev: 0,
2402                    ino: 0,
2403                    mode: cacheinfo.mode,
2404                    uid: 0,
2405                    gid: 0,
2406                    size: 0,
2407                    oid: cacheinfo.oid,
2408                    flags,
2409                    flags_extended: 0,
2410                    path: BString::from(cacheinfo.path.as_slice()),
2411                };
2412                if cacheinfo.stage == 0 {
2413                    index
2414                        .entries
2415                        .retain(|existing| existing.path != cacheinfo.path);
2416                } else {
2417                    index.entries.retain(|existing| {
2418                        existing.path != cacheinfo.path
2419                            || index_entry_stage(existing) != cacheinfo.stage
2420                    });
2421                }
2422                index.entries.push(entry);
2423                updated.push(cacheinfo.oid);
2424            }
2425        }
2426    }
2427    index.entries.sort_by(|left, right| {
2428        left.path
2429            .cmp(&right.path)
2430            .then_with(|| index_entry_stage(left).cmp(&index_entry_stage(right)))
2431    });
2432    fs::write(index_path, index.write(format)?)?;
2433    Ok(UpdateIndexResult {
2434        entries: index.entries.len(),
2435        updated,
2436    })
2437}
2438
2439fn index_flags(path_len: usize, stage: u16) -> u16 {
2440    ((stage & 0x3) << 12) | ((path_len.min(0xfff) as u16) & 0x0fff)
2441}
2442
2443const INDEX_FLAG_ASSUME_UNCHANGED: u16 = 0x8000;
2444const INDEX_FLAG_EXTENDED: u16 = 0x4000;
2445const INDEX_EXTENDED_FLAG_SKIP_WORKTREE: u16 = 0x4000;
2446
2447fn normalize_index_version_for_extended_flags(index: &mut Index) {
2448    let has_extended_flags = index
2449        .entries
2450        .iter()
2451        .any(|entry| entry.flags & INDEX_FLAG_EXTENDED != 0 || entry.flags_extended != 0);
2452    if has_extended_flags && index.version < 3 {
2453        index.version = 3;
2454    } else if !has_extended_flags && index.version == 3 {
2455        index.version = 2;
2456    }
2457}
2458
2459fn index_entry_stage(entry: &IndexEntry) -> u16 {
2460    (entry.flags >> 12) & 0x3
2461}
2462
2463/// The oid of the stage-0 entry in `range` (the path's currently-tracked blob),
2464/// if any. Used by the safecrlf check to fetch `has_crlf_in_index`.
2465fn stage0_oid_in_range(
2466    entries: &[IndexEntry],
2467    range: std::ops::Range<usize>,
2468) -> Option<ObjectId> {
2469    entries[range]
2470        .iter()
2471        .find(|entry| index_entry_stage(entry) == 0)
2472        .map(|entry| entry.oid)
2473}
2474
2475fn index_entry_skip_worktree(entry: &IndexEntry) -> bool {
2476    entry.flags & INDEX_FLAG_EXTENDED != 0
2477        && entry.flags_extended & INDEX_EXTENDED_FLAG_SKIP_WORKTREE != 0
2478}
2479
2480fn print_update_index_path_error(path: &[u8], message: &str) {
2481    let path = String::from_utf8_lossy(path);
2482    eprintln!("error: {path}: {message}");
2483    eprintln!("fatal: Unable to process path {path}");
2484}
2485
2486fn print_update_index_needs_update(path: &[u8]) {
2487    let path = String::from_utf8_lossy(path);
2488    println!("{path}: needs update");
2489}
2490
2491pub fn write_tree_from_index(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Result<ObjectId> {
2492    write_tree_from_index_with_options(git_dir, format, WriteTreeOptions::default())
2493}
2494
2495pub fn write_tree_from_index_with_odb(
2496    git_dir: impl AsRef<Path>,
2497    format: ObjectFormat,
2498    odb: &FileObjectDatabase,
2499) -> Result<ObjectId> {
2500    write_tree_from_index_with_options_and_odb(
2501        git_dir.as_ref(),
2502        format,
2503        WriteTreeOptions::default(),
2504        odb,
2505    )
2506}
2507
2508pub fn write_tree_from_index_with_options(
2509    git_dir: impl AsRef<Path>,
2510    format: ObjectFormat,
2511    options: WriteTreeOptions,
2512) -> Result<ObjectId> {
2513    let git_dir = git_dir.as_ref();
2514    let odb = FileObjectDatabase::from_git_dir(git_dir, format);
2515    write_tree_from_index_with_options_and_odb(git_dir, format, options, &odb)
2516}
2517
2518fn write_tree_from_index_with_options_and_odb(
2519    git_dir: &Path,
2520    format: ObjectFormat,
2521    options: WriteTreeOptions,
2522    odb: &FileObjectDatabase,
2523) -> Result<ObjectId> {
2524    let index_path = repository_index_path(git_dir);
2525    // A repository with no index file yet (fresh init, nothing staged) is an
2526    // empty index: `git write-tree` / `git commit --allow-empty` produce the
2527    // empty tree rather than erroring.
2528    let index_bytes = match fs::read(&index_path) {
2529        Ok(bytes) => bytes,
2530        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
2531            let mut checker = odb.presence_checker();
2532            let empty: &[WriteTreeEntry<'_>] = &[];
2533            return write_tree_entries_stream(
2534                empty,
2535                b"",
2536                None,
2537                odb,
2538                &mut checker,
2539                options.missing_ok,
2540            );
2541        }
2542        Err(err) => return Err(err.into()),
2543    };
2544    let mut checker = odb.presence_checker();
2545    match BorrowedIndex::parse(&index_bytes, format) {
2546        Ok(index) => write_tree_from_borrowed_index(&index, format, &options, odb, &mut checker),
2547        Err(GitError::Unsupported(_)) => {
2548            let index = Index::parse(&index_bytes, format)?;
2549            write_tree_from_owned_index(&index, format, &options, odb, &mut checker)
2550        }
2551        Err(err) => Err(err),
2552    }
2553}
2554
2555fn write_tree_from_borrowed_index(
2556    index: &BorrowedIndex<'_>,
2557    format: ObjectFormat,
2558    options: &WriteTreeOptions,
2559    odb: &FileObjectDatabase,
2560    checker: &mut ObjectPresenceChecker,
2561) -> Result<ObjectId> {
2562    let cache_tree = if options.prefix.is_none() {
2563        index.cache_tree(format).ok().flatten()
2564    } else {
2565        None
2566    };
2567    if options.prefix.is_none() && !index.entries.iter().any(|entry| entry.is_intent_to_add()) {
2568        return write_tree_entries_stream(
2569            &index.entries,
2570            b"",
2571            cache_tree.as_ref(),
2572            odb,
2573            checker,
2574            options.missing_ok,
2575        );
2576    }
2577    // intent-to-add entries (`git add -N`, `git reset -N`) are placeholders that do
2578    // NOT belong in a written tree — git's cache_tree_update skips CE_INTENT_TO_ADD.
2579    // Drop them before building, so `write-tree` succeeds and the tree omits them
2580    // (their empty-blob oid is also typically absent from the odb).
2581    let entries = write_tree_entries_for_prefix(
2582        index
2583            .entries
2584            .iter()
2585            .filter(|entry| !entry.is_intent_to_add()),
2586        options.prefix.as_deref(),
2587    )?;
2588    write_tree_entries_stream(
2589        &entries,
2590        b"",
2591        cache_tree.as_ref(),
2592        odb,
2593        checker,
2594        options.missing_ok,
2595    )
2596}
2597
2598fn write_tree_from_owned_index(
2599    index: &Index,
2600    format: ObjectFormat,
2601    options: &WriteTreeOptions,
2602    odb: &FileObjectDatabase,
2603    checker: &mut ObjectPresenceChecker,
2604) -> Result<ObjectId> {
2605    let cache_tree = if options.prefix.is_none() {
2606        index.cache_tree(format).ok().flatten()
2607    } else {
2608        None
2609    };
2610    if options.prefix.is_none() && !index.entries.iter().any(|entry| entry.is_intent_to_add()) {
2611        return write_tree_entries_stream(
2612            &index.entries,
2613            b"",
2614            cache_tree.as_ref(),
2615            odb,
2616            checker,
2617            options.missing_ok,
2618        );
2619    }
2620    let entries = write_tree_entries_for_prefix(
2621        index
2622            .entries
2623            .iter()
2624            .filter(|entry| !entry.is_intent_to_add()),
2625        options.prefix.as_deref(),
2626    )?;
2627    write_tree_entries_stream(
2628        &entries,
2629        b"",
2630        cache_tree.as_ref(),
2631        odb,
2632        checker,
2633        options.missing_ok,
2634    )
2635}
2636
2637#[derive(Clone, Copy)]
2638struct WriteTreeEntry<'a> {
2639    path: &'a [u8],
2640    mode: u32,
2641    oid: ObjectId,
2642}
2643
2644trait WriteTreeIndexEntry {
2645    fn write_tree_path(&self) -> &[u8];
2646    fn write_tree_mode(&self) -> u32;
2647    fn write_tree_oid(&self) -> ObjectId;
2648}
2649
2650impl WriteTreeIndexEntry for IndexEntry {
2651    fn write_tree_path(&self) -> &[u8] {
2652        self.path.as_bytes()
2653    }
2654
2655    fn write_tree_mode(&self) -> u32 {
2656        self.mode
2657    }
2658
2659    fn write_tree_oid(&self) -> ObjectId {
2660        self.oid
2661    }
2662}
2663
2664impl WriteTreeIndexEntry for IndexEntryRef<'_> {
2665    fn write_tree_path(&self) -> &[u8] {
2666        self.path
2667    }
2668
2669    fn write_tree_mode(&self) -> u32 {
2670        self.mode
2671    }
2672
2673    fn write_tree_oid(&self) -> ObjectId {
2674        self.oid
2675    }
2676}
2677
2678impl WriteTreeIndexEntry for WriteTreeEntry<'_> {
2679    fn write_tree_path(&self) -> &[u8] {
2680        self.path
2681    }
2682
2683    fn write_tree_mode(&self) -> u32 {
2684        self.mode
2685    }
2686
2687    fn write_tree_oid(&self) -> ObjectId {
2688        self.oid
2689    }
2690}
2691
2692fn write_tree_entries_for_prefix<'a, E>(
2693    entries: impl IntoIterator<Item = &'a E>,
2694    prefix: Option<&[u8]>,
2695) -> Result<Vec<WriteTreeEntry<'a>>>
2696where
2697    E: WriteTreeIndexEntry + 'a,
2698{
2699    let Some(prefix) = prefix else {
2700        return Ok(entries
2701            .into_iter()
2702            .map(|entry| WriteTreeEntry {
2703                path: entry.write_tree_path(),
2704                mode: entry.write_tree_mode(),
2705                oid: entry.write_tree_oid(),
2706            })
2707            .collect());
2708    };
2709    let trimmed_len = prefix
2710        .iter()
2711        .rposition(|byte| *byte != b'/')
2712        .map(|idx| idx + 1)
2713        .unwrap_or(0);
2714    let trimmed = &prefix[..trimmed_len];
2715    if trimmed.is_empty() {
2716        return Ok(entries
2717            .into_iter()
2718            .map(|entry| WriteTreeEntry {
2719                path: entry.write_tree_path(),
2720                mode: entry.write_tree_mode(),
2721                oid: entry.write_tree_oid(),
2722            })
2723            .collect());
2724    }
2725    let mut prefixed = Vec::new();
2726    for entry in entries {
2727        let Some(remainder) = entry.write_tree_path().strip_prefix(trimmed) else {
2728            continue;
2729        };
2730        let Some(stripped) = remainder.strip_prefix(b"/") else {
2731            continue;
2732        };
2733        if stripped.is_empty() {
2734            continue;
2735        }
2736        prefixed.push(WriteTreeEntry {
2737            path: stripped,
2738            mode: entry.write_tree_mode(),
2739            oid: entry.write_tree_oid(),
2740        });
2741    }
2742    if prefixed.is_empty() {
2743        eprintln!(
2744            "fatal: git-write-tree: prefix {} not found",
2745            String::from_utf8_lossy(prefix)
2746        );
2747        return Err(GitError::Exit(128));
2748    }
2749    Ok(prefixed)
2750}
2751
2752fn write_tree_entries_stream<E>(
2753    entries: &[E],
2754    prefix: &[u8],
2755    cache_tree: Option<&CacheTree>,
2756    odb: &FileObjectDatabase,
2757    checker: &mut ObjectPresenceChecker,
2758    missing_ok: bool,
2759) -> Result<ObjectId>
2760where
2761    E: WriteTreeIndexEntry,
2762{
2763    if let Some(oid) = valid_cache_tree_oid(cache_tree, entries.len()) {
2764        return Ok(oid);
2765    }
2766
2767    let mut tree_entries = Vec::new();
2768    let mut index = 0usize;
2769    while index < entries.len() {
2770        let entry = &entries[index];
2771        let path = entry.write_tree_path();
2772        let Some(remainder) = path.strip_prefix(prefix) else {
2773            return Err(GitError::InvalidPath(format!(
2774                "invalid index path {}",
2775                String::from_utf8_lossy(path)
2776            )));
2777        };
2778        if remainder.is_empty() || remainder[0] == b'/' {
2779            return Err(GitError::InvalidPath(format!(
2780                "invalid index path {}",
2781                String::from_utf8_lossy(path)
2782            )));
2783        }
2784
2785        if let Some(slash) = remainder.iter().position(|byte| *byte == b'/') {
2786            let name = &remainder[..slash];
2787            if name.is_empty() {
2788                return Err(GitError::InvalidPath(format!(
2789                    "invalid index path {}",
2790                    String::from_utf8_lossy(path)
2791                )));
2792            }
2793            let start = index;
2794            let child_cache = cache_tree.and_then(|tree| {
2795                tree.subtrees
2796                    .iter()
2797                    .find(|child| child.name.as_slice() == name)
2798                    .map(|child| &child.tree)
2799            });
2800            if let Some(cached_count) = valid_cache_tree_entry_count(child_cache) {
2801                let end = start.saturating_add(cached_count);
2802                if cached_count > 0
2803                    && end <= entries.len()
2804                    && same_tree_component(entries[end - 1].write_tree_path(), prefix, name)?
2805                    && (end == entries.len()
2806                        || !same_tree_component(entries[end].write_tree_path(), prefix, name)?)
2807                {
2808                    index = end;
2809                } else {
2810                    index += 1;
2811                    while index < entries.len()
2812                        && same_tree_component(entries[index].write_tree_path(), prefix, name)?
2813                    {
2814                        index += 1;
2815                    }
2816                }
2817            } else {
2818                index += 1;
2819                while index < entries.len()
2820                    && same_tree_component(entries[index].write_tree_path(), prefix, name)?
2821                {
2822                    index += 1;
2823                }
2824            }
2825            if let Some(oid) = valid_cache_tree_oid(child_cache, index - start) {
2826                tree_entries.push(TreeEntry {
2827                    mode: 0o040000,
2828                    name: BString::from(name),
2829                    oid,
2830                });
2831                continue;
2832            }
2833            let mut child_prefix = Vec::with_capacity(prefix.len() + name.len() + 1);
2834            child_prefix.extend_from_slice(prefix);
2835            child_prefix.extend_from_slice(name);
2836            child_prefix.push(b'/');
2837            let oid = write_tree_entries_stream(
2838                &entries[start..index],
2839                &child_prefix,
2840                child_cache,
2841                odb,
2842                checker,
2843                missing_ok,
2844            )?;
2845            tree_entries.push(TreeEntry {
2846                mode: 0o040000,
2847                name: BString::from(name),
2848                oid,
2849            });
2850            continue;
2851        }
2852
2853        let mode = entry.write_tree_mode();
2854        let oid = entry.write_tree_oid();
2855        if !missing_ok && mode != 0o160000 && !checker.contains(&oid)? {
2856            eprintln!(
2857                "error: invalid object {:o} {} for '{}'",
2858                mode,
2859                oid,
2860                String::from_utf8_lossy(path)
2861            );
2862            eprintln!("fatal: git-write-tree: error building trees");
2863            return Err(GitError::Exit(128));
2864        }
2865        tree_entries.push(TreeEntry {
2866            mode,
2867            name: BString::from(remainder),
2868            oid,
2869        });
2870        index += 1;
2871    }
2872
2873    tree_entries.sort_by(|left, right| {
2874        git_tree_entry_cmp(
2875            left.name.as_bytes(),
2876            left.mode,
2877            right.name.as_bytes(),
2878            right.mode,
2879        )
2880    });
2881    odb.write_object(EncodedObject::new(
2882        ObjectType::Tree,
2883        Tree {
2884            entries: tree_entries,
2885        }
2886        .write(),
2887    ))
2888}
2889
2890fn valid_cache_tree_oid(tree: Option<&CacheTree>, entry_count: usize) -> Option<ObjectId> {
2891    let tree = tree?;
2892    if valid_cache_tree_entry_count(Some(tree))? != entry_count {
2893        return None;
2894    }
2895    tree.oid
2896}
2897
2898fn valid_cache_tree_entry_count(tree: Option<&CacheTree>) -> Option<usize> {
2899    let tree = tree?;
2900    if tree.entry_count < 0 || tree.oid.is_none() {
2901        return None;
2902    }
2903    Some(tree.entry_count as usize)
2904}
2905
2906fn same_tree_component(path: &[u8], prefix: &[u8], name: &[u8]) -> Result<bool> {
2907    let Some(remainder) = path.strip_prefix(prefix) else {
2908        return Err(GitError::InvalidPath(format!(
2909            "invalid index path {}",
2910            String::from_utf8_lossy(path)
2911        )));
2912    };
2913    Ok(remainder.starts_with(name) && remainder.get(name.len()) == Some(&b'/'))
2914}
2915
2916pub fn short_status(
2917    worktree_root: impl AsRef<Path>,
2918    git_dir: impl AsRef<Path>,
2919    format: ObjectFormat,
2920) -> Result<Vec<ShortStatusEntry>> {
2921    short_status_with_options(
2922        worktree_root,
2923        git_dir,
2924        format,
2925        ShortStatusOptions::default(),
2926    )
2927}
2928
2929/// Compare one expected tracked entry to the worktree path named by `path`.
2930///
2931/// `path` is repository-relative and uses the platform path representation. For
2932/// callers that already carry git's byte path form, use
2933/// [`worktree_entry_state_by_git_path`].
2934pub fn worktree_entry_state(
2935    worktree_root: impl AsRef<Path>,
2936    git_dir: impl AsRef<Path>,
2937    format: ObjectFormat,
2938    path: impl AsRef<Path>,
2939    expected_oid: &ObjectId,
2940    expected_mode: u32,
2941    index_probe: Option<&IndexStatProbe>,
2942) -> Result<WorktreeEntryState> {
2943    let path = path.as_ref();
2944    if path.is_absolute() {
2945        return Err(GitError::InvalidPath(format!(
2946            "worktree entry path {} is absolute",
2947            path.display()
2948        )));
2949    }
2950    let git_path = git_path_bytes(path)?;
2951    worktree_entry_state_by_git_path(
2952        worktree_root,
2953        git_dir,
2954        format,
2955        &git_path,
2956        expected_oid,
2957        expected_mode,
2958        index_probe,
2959    )
2960}
2961
2962/// Compare one expected tracked entry to the worktree path named by a
2963/// repository-relative git path (`/` separators, raw bytes).
2964///
2965/// The comparison uses the same clean-filter, symlink-target, gitlink, and
2966/// racy-clean stat shortcut rules as [`short_status_with_options`].
2967pub fn worktree_entry_state_by_git_path(
2968    worktree_root: impl AsRef<Path>,
2969    git_dir: impl AsRef<Path>,
2970    format: ObjectFormat,
2971    git_path: &[u8],
2972    expected_oid: &ObjectId,
2973    expected_mode: u32,
2974    index_probe: Option<&IndexStatProbe>,
2975) -> Result<WorktreeEntryState> {
2976    let worktree_root = worktree_root.as_ref();
2977    let git_dir = git_dir.as_ref();
2978    let stat_cache =
2979        index_probe.and_then(|probe| probe.stat_cache_for(git_path, expected_oid, expected_mode));
2980    let Some(worktree_entry) = worktree_entry_for_git_path(
2981        worktree_root,
2982        git_dir,
2983        format,
2984        git_path,
2985        expected_oid,
2986        expected_mode,
2987        stat_cache.as_ref(),
2988    )?
2989    else {
2990        return Ok(WorktreeEntryState::Deleted);
2991    };
2992    if worktree_entry.mode == expected_mode && worktree_entry.oid == *expected_oid {
2993        Ok(WorktreeEntryState::Clean)
2994    } else {
2995        Ok(WorktreeEntryState::Modified)
2996    }
2997}
2998
2999pub fn short_status_with_options(
3000    worktree_root: impl AsRef<Path>,
3001    git_dir: impl AsRef<Path>,
3002    format: ObjectFormat,
3003    options: ShortStatusOptions,
3004) -> Result<Vec<ShortStatusEntry>> {
3005    let worktree_root = worktree_root.as_ref();
3006    let git_dir = git_dir.as_ref();
3007    let db = FileObjectDatabase::from_git_dir(git_dir, format);
3008    if !options.include_ignored
3009        && let Some(entries) = short_status_borrowed_head_matches_index_if_possible(
3010            worktree_root,
3011            git_dir,
3012            format,
3013            &db,
3014            options.untracked_mode,
3015        )?
3016    {
3017        return Ok(entries);
3018    }
3019    // Parse the index once: the stat cache lets the worktree walk skip
3020    // re-hashing files whose stat proves they are unchanged since staging
3021    // (git's racy-git shortcut). When HEAD matches the index, the status
3022    // comparison can stream directly from the parsed index and avoid building a
3023    // second path-sorted copy of every tracked entry.
3024    let (parsed_index, stat_cache, head_matches_index) =
3025        read_index_with_stat_cache(git_dir, format, &db)?;
3026    if head_matches_index && !options.include_ignored {
3027        let mut ignores = IgnoreMatcher::from_worktree_base(worktree_root)?;
3028        let entries = short_status_tracked_only(
3029            worktree_root,
3030            git_dir,
3031            format,
3032            &db,
3033            &parsed_index,
3034            &stat_cache,
3035            true,
3036            options.untracked_mode,
3037        );
3038        let mut entries = entries?;
3039        let untracked_paths = status_untracked_paths_from_index(
3040            worktree_root,
3041            git_dir,
3042            &parsed_index,
3043            &stat_cache,
3044            &mut ignores,
3045            options.untracked_mode,
3046        )?;
3047        for path in untracked_paths {
3048            entries.push(ShortStatusEntry {
3049                index: b'?',
3050                worktree: b'?',
3051                path,
3052                head_mode: None,
3053                index_mode: None,
3054                worktree_mode: None,
3055                head_oid: None,
3056                index_oid: None,
3057                submodule: None,
3058            });
3059        }
3060        entries.sort_by(|left, right| {
3061            status_sort_category(left)
3062                .cmp(&status_sort_category(right))
3063                .then_with(|| left.path.cmp(&right.path))
3064        });
3065        return Ok(entries);
3066    }
3067    let index = index_entries_from_index(parsed_index);
3068    let head = if head_matches_index {
3069        None
3070    } else {
3071        Some(head_tree_entries(git_dir, format, &db)?)
3072    };
3073    let tracked_paths = if options.untracked_mode == StatusUntrackedMode::None {
3074        Some(index.keys().cloned().collect::<BTreeSet<_>>())
3075    } else {
3076        None
3077    };
3078    let mut ignores = IgnoreMatcher::from_worktree_base(worktree_root)?;
3079    let (worktree, submodule_dirt_map, tracked_presence) =
3080        status_worktree_entries_with_submodule_dirt(
3081            worktree_root,
3082            git_dir,
3083            format,
3084            &stat_cache,
3085            tracked_paths.as_ref(),
3086            Some(&mut ignores),
3087        )?;
3088    let mut entries = Vec::new();
3089    if head_matches_index {
3090        collect_status_entries_head_matches_index(
3091            &index,
3092            &worktree,
3093            &tracked_presence,
3094            &submodule_dirt_map,
3095            options.untracked_mode,
3096            &mut entries,
3097        );
3098    } else if let Some(head) = head.as_ref() {
3099        collect_status_entries_with_head(
3100            StatusComparisonInputs {
3101                head,
3102                index: &index,
3103                worktree: &worktree,
3104                tracked_presence: &tracked_presence,
3105                submodule_dirt_map: &submodule_dirt_map,
3106                ignores: &ignores,
3107            },
3108            options.untracked_mode,
3109            &mut entries,
3110        );
3111    }
3112    if options.include_ignored {
3113        let ignored_paths = ignored_untracked_paths(worktree_root, git_dir, &index, &ignores, true)?;
3114        let ignored_paths: Vec<Vec<u8>> = match options.ignored_mode {
3115            StatusIgnoredMode::Matching => ignored_paths,
3116            StatusIgnoredMode::Traditional => {
3117                let mut rolled = BTreeSet::new();
3118                for path in ignored_paths {
3119                    let path = untracked_normal_rollup_path(&path, &index, &ignores);
3120                    if ignored_traditional_path_is_empty_directory(worktree_root, &path)? {
3121                        continue;
3122                    }
3123                    rolled.insert(path);
3124                }
3125                rolled.into_iter().collect()
3126            }
3127        };
3128        for path in ignored_paths {
3129            entries.push(ShortStatusEntry {
3130                index: b'!',
3131                worktree: b'!',
3132                path,
3133                head_mode: None,
3134                index_mode: None,
3135                worktree_mode: None,
3136                head_oid: None,
3137                index_oid: None,
3138                submodule: None,
3139            });
3140        }
3141    }
3142    let untracked_paths: Vec<Vec<u8>> = match options.untracked_mode {
3143        StatusUntrackedMode::All => worktree
3144            .keys()
3145            .filter(|path| !index.contains_key(*path) && !ignores.is_ignored(path, false))
3146            .cloned()
3147            .collect(),
3148        StatusUntrackedMode::Normal => {
3149            normal_untracked_paths_from_worktree(&worktree, &index, &ignores)
3150        }
3151        StatusUntrackedMode::None => Vec::new(),
3152    };
3153    for path in untracked_paths {
3154        entries.push(ShortStatusEntry {
3155            index: b'?',
3156            worktree: b'?',
3157            path,
3158            head_mode: None,
3159            index_mode: None,
3160            worktree_mode: None,
3161            head_oid: None,
3162            index_oid: None,
3163            submodule: None,
3164        });
3165    }
3166    entries.sort_by(|left, right| {
3167        status_sort_category(left)
3168            .cmp(&status_sort_category(right))
3169            .then_with(|| left.path.cmp(&right.path))
3170    });
3171    Ok(entries)
3172}
3173
3174fn collect_status_entries_head_matches_index(
3175    index: &BTreeMap<Vec<u8>, TrackedEntry>,
3176    worktree: &BTreeMap<Vec<u8>, TrackedEntry>,
3177    tracked_presence: &HashSet<Vec<u8>>,
3178    submodule_dirt_map: &BTreeMap<Vec<u8>, u8>,
3179    untracked_mode: StatusUntrackedMode,
3180    entries: &mut Vec<ShortStatusEntry>,
3181) {
3182    for (path, index_entry) in index {
3183        let worktree_entry = worktree.get(path);
3184        let worktree_present =
3185            worktree_entry.is_some() || tracked_presence.contains(path.as_slice());
3186        let submodule = status_submodule_from_entries(
3187            path,
3188            index_entry,
3189            worktree_entry,
3190            submodule_dirt_map,
3191            untracked_mode,
3192        );
3193        let worktree_code = match worktree_entry {
3194            None if !worktree_present => b'D',
3195            Some(worktree_entry) if worktree_entry != index_entry => b'M',
3196            _ if submodule.is_some_and(|sub| sub.any()) => b'M',
3197            _ => b' ',
3198        };
3199        if worktree_code != b' ' {
3200            entries.push(ShortStatusEntry {
3201                index: b' ',
3202                worktree: worktree_code,
3203                path: path.clone(),
3204                head_mode: Some(index_entry.mode),
3205                index_mode: Some(index_entry.mode),
3206                worktree_mode: status_worktree_mode(
3207                    Some(index_entry),
3208                    worktree_entry,
3209                    worktree_present,
3210                ),
3211                head_oid: Some(index_entry.oid),
3212                index_oid: Some(index_entry.oid),
3213                submodule: submodule.filter(|sub| sub.any()),
3214            });
3215        }
3216    }
3217}
3218
3219struct StatusComparisonInputs<'a> {
3220    head: &'a BTreeMap<Vec<u8>, TrackedEntry>,
3221    index: &'a BTreeMap<Vec<u8>, TrackedEntry>,
3222    worktree: &'a BTreeMap<Vec<u8>, TrackedEntry>,
3223    tracked_presence: &'a HashSet<Vec<u8>>,
3224    submodule_dirt_map: &'a BTreeMap<Vec<u8>, u8>,
3225    ignores: &'a IgnoreMatcher,
3226}
3227
3228fn collect_status_entries_with_head(
3229    inputs: StatusComparisonInputs<'_>,
3230    untracked_mode: StatusUntrackedMode,
3231    entries: &mut Vec<ShortStatusEntry>,
3232) {
3233    let mut paths = BTreeSet::new();
3234    paths.extend(inputs.head.keys().cloned());
3235    paths.extend(inputs.index.keys().cloned());
3236    paths.extend(
3237        inputs
3238            .worktree
3239            .keys()
3240            .filter(|path| inputs.index.contains_key(*path))
3241            .cloned(),
3242    );
3243
3244    for path in paths {
3245        let head_entry = inputs.head.get(&path);
3246        let index_entry = inputs.index.get(&path);
3247        let worktree_entry = inputs.worktree.get(&path);
3248        let worktree_present =
3249            worktree_entry.is_some() || inputs.tracked_presence.contains(path.as_slice());
3250        if head_entry.is_none()
3251            && index_entry.is_none()
3252            && worktree_entry.is_some()
3253            && inputs.ignores.is_ignored(&path, false)
3254        {
3255            continue;
3256        }
3257        let submodule = match index_entry {
3258            Some(index_entry) => status_submodule_from_entries(
3259                &path,
3260                index_entry,
3261                worktree_entry,
3262                inputs.submodule_dirt_map,
3263                untracked_mode,
3264            ),
3265            None => None,
3266        };
3267        let (index_code, worktree_code) =
3268            if head_entry.is_none() && index_entry.is_none() && worktree_entry.is_some() {
3269                (b'?', b'?')
3270            } else {
3271                let index_code = match (head_entry, index_entry) {
3272                    (None, Some(_)) => b'A',
3273                    (Some(_), None) => b'D',
3274                    (Some(left), Some(right)) if left != right => b'M',
3275                    _ => b' ',
3276                };
3277                let worktree_code = match (index_entry, worktree_entry) {
3278                    (None, Some(_)) => b'?',
3279                    (Some(_), None) if !worktree_present => b'D',
3280                    (Some(left), Some(right)) if left != right => b'M',
3281                    _ if submodule.is_some_and(|sub| sub.any()) => b'M',
3282                    _ => b' ',
3283                };
3284                (index_code, worktree_code)
3285            };
3286        if index_code != b' ' || worktree_code != b' ' {
3287            entries.push(ShortStatusEntry {
3288                index: index_code,
3289                worktree: worktree_code,
3290                path,
3291                head_mode: head_entry.map(|entry| entry.mode),
3292                index_mode: index_entry.map(|entry| entry.mode),
3293                worktree_mode: status_worktree_mode(index_entry, worktree_entry, worktree_present),
3294                head_oid: head_entry.map(|entry| entry.oid),
3295                index_oid: index_entry.map(|entry| entry.oid),
3296                submodule: submodule.filter(|sub| sub.any()),
3297            });
3298        }
3299    }
3300}
3301
3302fn status_worktree_mode(
3303    index_entry: Option<&TrackedEntry>,
3304    worktree_entry: Option<&TrackedEntry>,
3305    worktree_present: bool,
3306) -> Option<u32> {
3307    worktree_entry.map(|entry| entry.mode).or_else(|| {
3308        worktree_present
3309            .then(|| index_entry.map(|entry| entry.mode))
3310            .flatten()
3311    })
3312}
3313
3314fn status_submodule_from_entries(
3315    path: &[u8],
3316    index_entry: &TrackedEntry,
3317    worktree_entry: Option<&TrackedEntry>,
3318    submodule_dirt_map: &BTreeMap<Vec<u8>, u8>,
3319    untracked_mode: StatusUntrackedMode,
3320) -> Option<SubmoduleStatus> {
3321    let worktree_entry = worktree_entry?;
3322    if index_entry.mode != 0o160000 || worktree_entry.mode != 0o160000 {
3323        return None;
3324    }
3325    let dirt = submodule_dirt_map.get(path).copied().unwrap_or(0);
3326    Some(SubmoduleStatus {
3327        new_commits: index_entry.oid != worktree_entry.oid,
3328        modified_content: dirt & DIRTY_SUBMODULE_MODIFIED != 0,
3329        untracked_content: dirt & DIRTY_SUBMODULE_UNTRACKED != 0
3330            && !matches!(untracked_mode, StatusUntrackedMode::None),
3331    })
3332}
3333
3334fn short_status_tracked_only(
3335    worktree_root: &Path,
3336    git_dir: &Path,
3337    format: ObjectFormat,
3338    db: &FileObjectDatabase,
3339    index: &Index,
3340    stat_cache: &IndexStatCache,
3341    head_matches_index: bool,
3342    untracked_mode: StatusUntrackedMode,
3343) -> Result<Vec<ShortStatusEntry>> {
3344    let normal_entry_count = index
3345        .entries
3346        .iter()
3347        .filter(|entry| entry.stage() == Stage::Normal)
3348        .count();
3349    if head_matches_index && normal_entry_count >= 512 {
3350        return short_status_tracked_only_head_matches_index_parallel(
3351            worktree_root,
3352            git_dir,
3353            format,
3354            index,
3355            stat_cache,
3356            untracked_mode,
3357        );
3358    }
3359    let head = if head_matches_index {
3360        None
3361    } else {
3362        Some(head_tree_entries(git_dir, format, db)?)
3363    };
3364    if !head_matches_index && normal_entry_count >= 512 {
3365        if let Some(head) = head.as_ref() {
3366            return short_status_tracked_only_with_head_parallel(
3367                worktree_root,
3368                git_dir,
3369                format,
3370                index,
3371                stat_cache,
3372                head,
3373                untracked_mode,
3374            );
3375        }
3376    }
3377    let mut clean_filter = None;
3378    let mut entries = Vec::new();
3379    for entry in index
3380        .entries
3381        .iter()
3382        .filter(|entry| entry.stage() == Stage::Normal)
3383    {
3384        let path = entry.path.as_bytes();
3385        let index_entry = TrackedEntry {
3386            mode: entry.mode,
3387            oid: entry.oid,
3388        };
3389        let head_entry = if head_matches_index {
3390            Some(&index_entry)
3391        } else {
3392            head.as_ref().and_then(|head| head.get(path))
3393        };
3394        let worktree_entry = worktree_entry_for_index_entry_with_attributes(
3395            worktree_root,
3396            git_dir,
3397            format,
3398            entry,
3399            stat_cache,
3400            &mut clean_filter,
3401        )?;
3402        let submodule = tracked_only_submodule_status(
3403            worktree_root,
3404            path,
3405            &index_entry,
3406            worktree_entry.as_ref(),
3407            untracked_mode,
3408        )?;
3409        let index_code = match head_entry {
3410            None => b'A',
3411            Some(head_entry) if *head_entry != index_entry => b'M',
3412            _ => b' ',
3413        };
3414        let worktree_code = match worktree_entry.as_ref() {
3415            None => b'D',
3416            Some(worktree_entry) if *worktree_entry != index_entry => b'M',
3417            _ if submodule.is_some_and(|sub| sub.any()) => b'M',
3418            _ => b' ',
3419        };
3420        if index_code != b' ' || worktree_code != b' ' {
3421            entries.push(ShortStatusEntry {
3422                index: index_code,
3423                worktree: worktree_code,
3424                path: path.to_vec(),
3425                head_mode: head_entry.map(|entry| entry.mode),
3426                index_mode: Some(index_entry.mode),
3427                worktree_mode: worktree_entry.as_ref().map(|entry| entry.mode),
3428                head_oid: head_entry.map(|entry| entry.oid),
3429                index_oid: Some(index_entry.oid),
3430                submodule: submodule.filter(|sub| sub.any()),
3431            });
3432        }
3433    }
3434    if let Some(head) = head.as_ref() {
3435        let index_paths = index
3436            .entries
3437            .iter()
3438            .filter(|entry| entry.stage() == Stage::Normal)
3439            .map(|entry| entry.path.as_bytes().to_vec())
3440            .collect::<HashSet<_>>();
3441        for (path, head_entry) in head {
3442            if index_paths.contains(path.as_slice()) {
3443                continue;
3444            }
3445            entries.push(ShortStatusEntry {
3446                index: b'D',
3447                worktree: b' ',
3448                path: path.clone(),
3449                head_mode: Some(head_entry.mode),
3450                index_mode: None,
3451                worktree_mode: None,
3452                head_oid: Some(head_entry.oid),
3453                index_oid: None,
3454                submodule: None,
3455            });
3456        }
3457    }
3458    entries.sort_by(|left, right| {
3459        status_sort_category(left)
3460            .cmp(&status_sort_category(right))
3461            .then_with(|| left.path.cmp(&right.path))
3462    });
3463    Ok(entries)
3464}
3465
3466fn short_status_borrowed_head_matches_index_if_possible(
3467    worktree_root: &Path,
3468    git_dir: &Path,
3469    format: ObjectFormat,
3470    db: &FileObjectDatabase,
3471    untracked_mode: StatusUntrackedMode,
3472) -> Result<Option<Vec<ShortStatusEntry>>> {
3473    let index_path = repository_index_path(git_dir);
3474    let index_metadata = match fs::metadata(&index_path) {
3475        Ok(metadata) => metadata,
3476        Err(err)
3477            if err.kind() == std::io::ErrorKind::NotFound
3478                && matches!(untracked_mode, StatusUntrackedMode::None) =>
3479        {
3480            return Ok(Some(Vec::new()));
3481        }
3482        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
3483        Err(err) => return Err(err.into()),
3484    };
3485    let index_bytes = fs::read(&index_path)?;
3486    let borrowed = match BorrowedIndex::parse(&index_bytes, format) {
3487        Ok(index) => index,
3488        Err(GitError::Unsupported(_)) => return Ok(None),
3489        Err(err) => return Err(err),
3490    };
3491    let Some(head_tree_oid) = resolve_head_tree_oid(git_dir, format, db)? else {
3492        return Ok(None);
3493    };
3494    let stage0_entry_count = borrowed
3495        .entries
3496        .iter()
3497        .filter(|entry| entry.stage() == Stage::Normal)
3498        .count();
3499    if !head_matches_borrowed_index_from_cache_tree(
3500        &borrowed,
3501        format,
3502        &head_tree_oid,
3503        stage0_entry_count,
3504    )? {
3505        return Ok(None);
3506    }
3507
3508    let index_mtime = file_mtime_parts(&index_metadata);
3509    let stat_cache = IndexStatCache::from_index_mtime_only(index_mtime);
3510    let mut entries = short_status_borrowed_tracked_only_head_matches_index_parallel(
3511        worktree_root,
3512        git_dir,
3513        format,
3514        &borrowed,
3515        &stat_cache,
3516        untracked_mode,
3517    )?;
3518    if !matches!(untracked_mode, StatusUntrackedMode::None) {
3519        let mut ignores = IgnoreMatcher::from_worktree_base(worktree_root)?;
3520        let untracked_paths = status_untracked_paths_from_borrowed_index(
3521            worktree_root,
3522            git_dir,
3523            &borrowed,
3524            &mut ignores,
3525            untracked_mode,
3526        )?;
3527        for path in untracked_paths {
3528            entries.push(ShortStatusEntry {
3529                index: b'?',
3530                worktree: b'?',
3531                path,
3532                head_mode: None,
3533                index_mode: None,
3534                worktree_mode: None,
3535                head_oid: None,
3536                index_oid: None,
3537                submodule: None,
3538            });
3539        }
3540        entries.sort_by(|left, right| {
3541            status_sort_category(left)
3542                .cmp(&status_sort_category(right))
3543                .then_with(|| left.path.cmp(&right.path))
3544        });
3545    }
3546    Ok(Some(entries))
3547}
3548
3549#[derive(Debug, Clone, Copy)]
3550enum TrackedOnlyPrecheck {
3551    Deleted(usize),
3552    Slow(usize),
3553}
3554
3555#[derive(Debug)]
3556enum TrackedOnlyPrecheckOutcome {
3557    Clean,
3558    Deleted,
3559    Slow,
3560}
3561
3562fn short_status_tracked_only_head_matches_index_parallel(
3563    worktree_root: &Path,
3564    git_dir: &Path,
3565    format: ObjectFormat,
3566    index: &Index,
3567    stat_cache: &IndexStatCache,
3568    untracked_mode: StatusUntrackedMode,
3569) -> Result<Vec<ShortStatusEntry>> {
3570    let prechecks = tracked_only_non_clean_prechecks_parallel(worktree_root, index, stat_cache)?;
3571
3572    let mut clean_filter = None;
3573    let mut entries = Vec::new();
3574    for precheck in prechecks {
3575        match precheck {
3576            TrackedOnlyPrecheck::Deleted(idx) => {
3577                let entry = &index.entries[idx];
3578                let path = entry.path.as_bytes();
3579                entries.push(ShortStatusEntry {
3580                    index: b' ',
3581                    worktree: b'D',
3582                    path: path.to_vec(),
3583                    head_mode: Some(entry.mode),
3584                    index_mode: Some(entry.mode),
3585                    worktree_mode: None,
3586                    head_oid: Some(entry.oid),
3587                    index_oid: Some(entry.oid),
3588                    submodule: None,
3589                });
3590            }
3591            TrackedOnlyPrecheck::Slow(idx) => {
3592                let entry = &index.entries[idx];
3593                let path = entry.path.as_bytes();
3594                let index_entry = TrackedEntry {
3595                    mode: entry.mode,
3596                    oid: entry.oid,
3597                };
3598                let worktree_entry = worktree_entry_for_index_entry_with_attributes(
3599                    worktree_root,
3600                    git_dir,
3601                    format,
3602                    entry,
3603                    stat_cache,
3604                    &mut clean_filter,
3605                )?;
3606                let submodule = tracked_only_submodule_status(
3607                    worktree_root,
3608                    path,
3609                    &index_entry,
3610                    worktree_entry.as_ref(),
3611                    untracked_mode,
3612                )?;
3613                let worktree_code = match worktree_entry.as_ref() {
3614                    None => b'D',
3615                    Some(worktree_entry) if *worktree_entry != index_entry => b'M',
3616                    _ if submodule.is_some_and(|sub| sub.any()) => b'M',
3617                    _ => b' ',
3618                };
3619                if worktree_code != b' ' {
3620                    entries.push(ShortStatusEntry {
3621                        index: b' ',
3622                        worktree: worktree_code,
3623                        path: path.to_vec(),
3624                        head_mode: Some(index_entry.mode),
3625                        index_mode: Some(index_entry.mode),
3626                        worktree_mode: worktree_entry.as_ref().map(|entry| entry.mode),
3627                        head_oid: Some(index_entry.oid),
3628                        index_oid: Some(index_entry.oid),
3629                        submodule: submodule.filter(|sub| sub.any()),
3630                    });
3631                }
3632            }
3633        }
3634    }
3635    entries.sort_by(|left, right| {
3636        status_sort_category(left)
3637            .cmp(&status_sort_category(right))
3638            .then_with(|| left.path.cmp(&right.path))
3639    });
3640    Ok(entries)
3641}
3642
3643fn short_status_borrowed_tracked_only_head_matches_index_parallel(
3644    worktree_root: &Path,
3645    git_dir: &Path,
3646    format: ObjectFormat,
3647    index: &BorrowedIndex<'_>,
3648    stat_cache: &IndexStatCache,
3649    untracked_mode: StatusUntrackedMode,
3650) -> Result<Vec<ShortStatusEntry>> {
3651    let prechecks =
3652        tracked_only_borrowed_non_clean_prechecks_parallel(worktree_root, index, stat_cache)?;
3653
3654    let mut clean_filter = None;
3655    let mut entries = Vec::new();
3656    for precheck in prechecks {
3657        match precheck {
3658            TrackedOnlyPrecheck::Deleted(idx) => {
3659                let entry = &index.entries[idx];
3660                entries.push(ShortStatusEntry {
3661                    index: b' ',
3662                    worktree: b'D',
3663                    path: entry.path.to_vec(),
3664                    head_mode: Some(entry.mode),
3665                    index_mode: Some(entry.mode),
3666                    worktree_mode: None,
3667                    head_oid: Some(entry.oid),
3668                    index_oid: Some(entry.oid),
3669                    submodule: None,
3670                });
3671            }
3672            TrackedOnlyPrecheck::Slow(idx) => {
3673                let entry = &index.entries[idx];
3674                let index_entry = TrackedEntry {
3675                    mode: entry.mode,
3676                    oid: entry.oid,
3677                };
3678                let worktree_entry = worktree_entry_for_index_entry_ref_with_attributes(
3679                    worktree_root,
3680                    git_dir,
3681                    format,
3682                    entry,
3683                    stat_cache,
3684                    &mut clean_filter,
3685                )?;
3686                let submodule = tracked_only_submodule_status(
3687                    worktree_root,
3688                    entry.path,
3689                    &index_entry,
3690                    worktree_entry.as_ref(),
3691                    untracked_mode,
3692                )?;
3693                let worktree_code = match worktree_entry.as_ref() {
3694                    None => b'D',
3695                    Some(worktree_entry) if *worktree_entry != index_entry => b'M',
3696                    _ if submodule.is_some_and(|sub| sub.any()) => b'M',
3697                    _ => b' ',
3698                };
3699                if worktree_code != b' ' {
3700                    entries.push(ShortStatusEntry {
3701                        index: b' ',
3702                        worktree: worktree_code,
3703                        path: entry.path.to_vec(),
3704                        head_mode: Some(index_entry.mode),
3705                        index_mode: Some(index_entry.mode),
3706                        worktree_mode: worktree_entry.as_ref().map(|entry| entry.mode),
3707                        head_oid: Some(index_entry.oid),
3708                        index_oid: Some(index_entry.oid),
3709                        submodule: submodule.filter(|sub| sub.any()),
3710                    });
3711                }
3712            }
3713        }
3714    }
3715    entries.sort_by(|left, right| {
3716        status_sort_category(left)
3717            .cmp(&status_sort_category(right))
3718            .then_with(|| left.path.cmp(&right.path))
3719    });
3720    Ok(entries)
3721}
3722
3723fn short_status_tracked_only_with_head_parallel(
3724    worktree_root: &Path,
3725    git_dir: &Path,
3726    format: ObjectFormat,
3727    index: &Index,
3728    stat_cache: &IndexStatCache,
3729    head: &BTreeMap<Vec<u8>, TrackedEntry>,
3730    untracked_mode: StatusUntrackedMode,
3731) -> Result<Vec<ShortStatusEntry>> {
3732    let prechecks = tracked_only_non_clean_prechecks_parallel(worktree_root, index, stat_cache)?;
3733    let mut precheck_cursor = 0usize;
3734    let mut clean_filter = None;
3735    let mut entries = Vec::new();
3736
3737    for (idx, entry) in index.entries.iter().enumerate() {
3738        if entry.stage() != Stage::Normal {
3739            continue;
3740        }
3741        let path = entry.path.as_bytes();
3742        let index_entry = TrackedEntry {
3743            mode: entry.mode,
3744            oid: entry.oid,
3745        };
3746        let head_entry = head.get(path);
3747        let index_code = match head_entry {
3748            None => b'A',
3749            Some(head_entry) if *head_entry != index_entry => b'M',
3750            _ => b' ',
3751        };
3752        let precheck = prechecks
3753            .get(precheck_cursor)
3754            .copied()
3755            .and_then(|precheck| {
3756                if tracked_only_precheck_index(precheck) == idx {
3757                    precheck_cursor += 1;
3758                    Some(precheck)
3759                } else {
3760                    None
3761                }
3762            });
3763        let (worktree_code, worktree_mode, submodule) = match precheck {
3764            None => (b' ', Some(index_entry.mode), None),
3765            Some(TrackedOnlyPrecheck::Deleted(_)) => (b'D', None, None),
3766            Some(TrackedOnlyPrecheck::Slow(_)) => {
3767                let worktree_entry = worktree_entry_for_index_entry_with_attributes(
3768                    worktree_root,
3769                    git_dir,
3770                    format,
3771                    entry,
3772                    stat_cache,
3773                    &mut clean_filter,
3774                )?;
3775                let submodule = tracked_only_submodule_status(
3776                    worktree_root,
3777                    path,
3778                    &index_entry,
3779                    worktree_entry.as_ref(),
3780                    untracked_mode,
3781                )?;
3782                let worktree_code = match worktree_entry.as_ref() {
3783                    None => b'D',
3784                    Some(worktree_entry) if *worktree_entry != index_entry => b'M',
3785                    _ if submodule.is_some_and(|sub| sub.any()) => b'M',
3786                    _ => b' ',
3787                };
3788                (
3789                    worktree_code,
3790                    worktree_entry.as_ref().map(|entry| entry.mode),
3791                    submodule.filter(|sub| sub.any()),
3792                )
3793            }
3794        };
3795        if index_code != b' ' || worktree_code != b' ' {
3796            entries.push(ShortStatusEntry {
3797                index: index_code,
3798                worktree: worktree_code,
3799                path: path.to_vec(),
3800                head_mode: head_entry.map(|entry| entry.mode),
3801                index_mode: Some(index_entry.mode),
3802                worktree_mode,
3803                head_oid: head_entry.map(|entry| entry.oid),
3804                index_oid: Some(index_entry.oid),
3805                submodule,
3806            });
3807        }
3808    }
3809
3810    let index_paths = index
3811        .entries
3812        .iter()
3813        .filter(|entry| entry.stage() == Stage::Normal)
3814        .map(|entry| entry.path.as_bytes().to_vec())
3815        .collect::<HashSet<_>>();
3816    for (path, head_entry) in head {
3817        if index_paths.contains(path.as_slice()) {
3818            continue;
3819        }
3820        entries.push(ShortStatusEntry {
3821            index: b'D',
3822            worktree: b' ',
3823            path: path.clone(),
3824            head_mode: Some(head_entry.mode),
3825            index_mode: None,
3826            worktree_mode: None,
3827            head_oid: Some(head_entry.oid),
3828            index_oid: None,
3829            submodule: None,
3830        });
3831    }
3832    entries.sort_by(|left, right| {
3833        status_sort_category(left)
3834            .cmp(&status_sort_category(right))
3835            .then_with(|| left.path.cmp(&right.path))
3836    });
3837    Ok(entries)
3838}
3839
3840fn tracked_only_precheck_index(precheck: TrackedOnlyPrecheck) -> usize {
3841    match precheck {
3842        TrackedOnlyPrecheck::Deleted(idx) | TrackedOnlyPrecheck::Slow(idx) => idx,
3843    }
3844}
3845
3846fn tracked_only_non_clean_prechecks_parallel(
3847    worktree_root: &Path,
3848    index: &Index,
3849    stat_cache: &IndexStatCache,
3850) -> Result<Vec<TrackedOnlyPrecheck>> {
3851    let normal_indices = index
3852        .entries
3853        .iter()
3854        .enumerate()
3855        .filter_map(|(idx, entry)| (entry.stage() == Stage::Normal).then_some(idx))
3856        .collect::<Vec<_>>();
3857    if normal_indices.is_empty() {
3858        return Ok(Vec::new());
3859    }
3860    let max_workers = std::thread::available_parallelism()
3861        .map(|count| count.get())
3862        .unwrap_or(1)
3863        .min(16);
3864    let worker_count = max_workers.min(normal_indices.len().div_ceil(512)).max(1);
3865    if worker_count == 1 {
3866        let mut prechecks = Vec::new();
3867        let mut absolute = PathBuf::new();
3868        for idx in normal_indices {
3869            let entry = &index.entries[idx];
3870            match tracked_only_stat_precheck(worktree_root, entry, stat_cache, &mut absolute)? {
3871                TrackedOnlyPrecheckOutcome::Clean => {}
3872                TrackedOnlyPrecheckOutcome::Deleted => {
3873                    prechecks.push(TrackedOnlyPrecheck::Deleted(idx));
3874                }
3875                TrackedOnlyPrecheckOutcome::Slow => {
3876                    prechecks.push(TrackedOnlyPrecheck::Slow(idx));
3877                }
3878            }
3879        }
3880        return Ok(prechecks);
3881    }
3882    let chunk_size = normal_indices.len().div_ceil(worker_count);
3883    let mut prechecks = std::thread::scope(|scope| -> Result<Vec<TrackedOnlyPrecheck>> {
3884        let mut handles = Vec::new();
3885        for chunk in normal_indices.chunks(chunk_size) {
3886            handles.push(scope.spawn(move || -> Result<Vec<TrackedOnlyPrecheck>> {
3887                let mut prechecks = Vec::new();
3888                let mut absolute = PathBuf::new();
3889                for &idx in chunk {
3890                    let entry = &index.entries[idx];
3891                    match tracked_only_stat_precheck(
3892                        worktree_root,
3893                        entry,
3894                        stat_cache,
3895                        &mut absolute,
3896                    )? {
3897                        TrackedOnlyPrecheckOutcome::Clean => {}
3898                        TrackedOnlyPrecheckOutcome::Deleted => {
3899                            prechecks.push(TrackedOnlyPrecheck::Deleted(idx));
3900                        }
3901                        TrackedOnlyPrecheckOutcome::Slow => {
3902                            prechecks.push(TrackedOnlyPrecheck::Slow(idx));
3903                        }
3904                    }
3905                }
3906                Ok(prechecks)
3907            }));
3908        }
3909        let mut prechecks = Vec::new();
3910        for handle in handles {
3911            let mut chunk = handle
3912                .join()
3913                .map_err(|_| GitError::Command("status worker panicked".into()))??;
3914            prechecks.append(&mut chunk);
3915        }
3916        Ok(prechecks)
3917    })?;
3918    prechecks.sort_by_key(|precheck| match precheck {
3919        TrackedOnlyPrecheck::Deleted(idx) | TrackedOnlyPrecheck::Slow(idx) => *idx,
3920    });
3921    Ok(prechecks)
3922}
3923
3924fn tracked_only_borrowed_non_clean_prechecks_parallel(
3925    worktree_root: &Path,
3926    index: &BorrowedIndex<'_>,
3927    stat_cache: &IndexStatCache,
3928) -> Result<Vec<TrackedOnlyPrecheck>> {
3929    let normal_indices = index
3930        .entries
3931        .iter()
3932        .enumerate()
3933        .filter_map(|(idx, entry)| (entry.stage() == Stage::Normal).then_some(idx))
3934        .collect::<Vec<_>>();
3935    if normal_indices.is_empty() {
3936        return Ok(Vec::new());
3937    }
3938    let max_workers = std::thread::available_parallelism()
3939        .map(|count| count.get())
3940        .unwrap_or(1)
3941        .min(16);
3942    let worker_count = max_workers.min(normal_indices.len().div_ceil(512)).max(1);
3943    if worker_count == 1 {
3944        let mut prechecks = Vec::new();
3945        let mut absolute = PathBuf::new();
3946        for idx in normal_indices {
3947            let entry = &index.entries[idx];
3948            match tracked_only_borrowed_stat_precheck(
3949                worktree_root,
3950                entry,
3951                stat_cache,
3952                &mut absolute,
3953            )? {
3954                TrackedOnlyPrecheckOutcome::Clean => {}
3955                TrackedOnlyPrecheckOutcome::Deleted => {
3956                    prechecks.push(TrackedOnlyPrecheck::Deleted(idx));
3957                }
3958                TrackedOnlyPrecheckOutcome::Slow => {
3959                    prechecks.push(TrackedOnlyPrecheck::Slow(idx));
3960                }
3961            }
3962        }
3963        return Ok(prechecks);
3964    }
3965    let chunk_size = normal_indices.len().div_ceil(worker_count);
3966    let mut prechecks = std::thread::scope(|scope| -> Result<Vec<TrackedOnlyPrecheck>> {
3967        let mut handles = Vec::new();
3968        for chunk in normal_indices.chunks(chunk_size) {
3969            handles.push(scope.spawn(move || -> Result<Vec<TrackedOnlyPrecheck>> {
3970                let mut prechecks = Vec::new();
3971                let mut absolute = PathBuf::new();
3972                for &idx in chunk {
3973                    let entry = &index.entries[idx];
3974                    match tracked_only_borrowed_stat_precheck(
3975                        worktree_root,
3976                        entry,
3977                        stat_cache,
3978                        &mut absolute,
3979                    )? {
3980                        TrackedOnlyPrecheckOutcome::Clean => {}
3981                        TrackedOnlyPrecheckOutcome::Deleted => {
3982                            prechecks.push(TrackedOnlyPrecheck::Deleted(idx));
3983                        }
3984                        TrackedOnlyPrecheckOutcome::Slow => {
3985                            prechecks.push(TrackedOnlyPrecheck::Slow(idx));
3986                        }
3987                    }
3988                }
3989                Ok(prechecks)
3990            }));
3991        }
3992        let mut prechecks = Vec::new();
3993        for handle in handles {
3994            let mut chunk = handle
3995                .join()
3996                .map_err(|_| GitError::Command("status worker panicked".into()))??;
3997            prechecks.append(&mut chunk);
3998        }
3999        Ok(prechecks)
4000    })?;
4001    prechecks.sort_by_key(|precheck| match precheck {
4002        TrackedOnlyPrecheck::Deleted(idx) | TrackedOnlyPrecheck::Slow(idx) => *idx,
4003    });
4004    Ok(prechecks)
4005}
4006
4007fn tracked_only_stat_precheck(
4008    worktree_root: &Path,
4009    index_entry: &IndexEntry,
4010    stat_cache: &IndexStatCache,
4011    absolute: &mut PathBuf,
4012) -> Result<TrackedOnlyPrecheckOutcome> {
4013    if index_entry.mode == 0o160000 {
4014        return Ok(TrackedOnlyPrecheckOutcome::Slow);
4015    }
4016    let git_path = index_entry.path.as_bytes();
4017    set_worktree_path_from_repo_path(worktree_root, git_path, absolute)?;
4018    let metadata = match fs::symlink_metadata(&absolute) {
4019        Ok(metadata) => metadata,
4020        Err(err)
4021            if matches!(
4022                err.kind(),
4023                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
4024            ) =>
4025        {
4026            return Ok(TrackedOnlyPrecheckOutcome::Deleted);
4027        }
4028        Err(err) => return Err(err.into()),
4029    };
4030    let file_type = metadata.file_type();
4031    if file_type.is_dir() || !(file_type.is_file() || file_type.is_symlink()) {
4032        return Ok(TrackedOnlyPrecheckOutcome::Slow);
4033    }
4034    if stat_cache
4035        .reuse_index_entry(index_entry, &metadata)
4036        .is_some()
4037    {
4038        Ok(TrackedOnlyPrecheckOutcome::Clean)
4039    } else {
4040        Ok(TrackedOnlyPrecheckOutcome::Slow)
4041    }
4042}
4043
4044fn tracked_only_borrowed_stat_precheck(
4045    worktree_root: &Path,
4046    index_entry: &IndexEntryRef<'_>,
4047    stat_cache: &IndexStatCache,
4048    absolute: &mut PathBuf,
4049) -> Result<TrackedOnlyPrecheckOutcome> {
4050    if index_entry.mode == 0o160000 {
4051        return Ok(TrackedOnlyPrecheckOutcome::Slow);
4052    }
4053    set_worktree_path_from_repo_path(worktree_root, index_entry.path, absolute)?;
4054    let metadata = match fs::symlink_metadata(&absolute) {
4055        Ok(metadata) => metadata,
4056        Err(err)
4057            if matches!(
4058                err.kind(),
4059                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
4060            ) =>
4061        {
4062            return Ok(TrackedOnlyPrecheckOutcome::Deleted);
4063        }
4064        Err(err) => return Err(err.into()),
4065    };
4066    let file_type = metadata.file_type();
4067    if file_type.is_dir() || !(file_type.is_file() || file_type.is_symlink()) {
4068        return Ok(TrackedOnlyPrecheckOutcome::Slow);
4069    }
4070    if stat_cache
4071        .reuse_index_entry_ref(index_entry, &metadata)
4072        .is_some()
4073    {
4074        Ok(TrackedOnlyPrecheckOutcome::Clean)
4075    } else {
4076        Ok(TrackedOnlyPrecheckOutcome::Slow)
4077    }
4078}
4079
4080fn set_worktree_path_from_repo_path(
4081    worktree_root: &Path,
4082    git_path: &[u8],
4083    out: &mut PathBuf,
4084) -> Result<()> {
4085    out.clear();
4086    out.push(worktree_root);
4087    push_repo_path(out, git_path)
4088}
4089
4090#[cfg(unix)]
4091fn push_repo_path(out: &mut PathBuf, path: &[u8]) -> Result<()> {
4092    use std::os::unix::ffi::OsStrExt;
4093
4094    out.push(Path::new(std::ffi::OsStr::from_bytes(path)));
4095    Ok(())
4096}
4097
4098#[cfg(not(unix))]
4099fn push_repo_path(out: &mut PathBuf, path: &[u8]) -> Result<()> {
4100    let path = std::str::from_utf8(path)
4101        .map_err(|_| GitError::InvalidPath("index path is not utf8".into()))?;
4102    for component in path.split('/') {
4103        out.push(component);
4104    }
4105    Ok(())
4106}
4107
4108fn tracked_only_submodule_status(
4109    worktree_root: &Path,
4110    path: &[u8],
4111    index_entry: &TrackedEntry,
4112    worktree_entry: Option<&TrackedEntry>,
4113    untracked_mode: StatusUntrackedMode,
4114) -> Result<Option<SubmoduleStatus>> {
4115    let Some(worktree_entry) = worktree_entry else {
4116        return Ok(None);
4117    };
4118    if index_entry.mode != 0o160000 || worktree_entry.mode != 0o160000 {
4119        return Ok(None);
4120    }
4121    let absolute = worktree_root.join(repo_path_to_os_path(path)?);
4122    let dirt = if absolute.is_dir() {
4123        submodule_dirt(&absolute)
4124    } else {
4125        0
4126    };
4127    Ok(Some(SubmoduleStatus {
4128        new_commits: index_entry.oid != worktree_entry.oid,
4129        modified_content: dirt & DIRTY_SUBMODULE_MODIFIED != 0,
4130        untracked_content: dirt & DIRTY_SUBMODULE_UNTRACKED != 0
4131            && !matches!(untracked_mode, StatusUntrackedMode::None),
4132    }))
4133}
4134
4135fn status_sort_category(entry: &ShortStatusEntry) -> u8 {
4136    match (entry.index, entry.worktree) {
4137        (b'?', b'?') => 1,
4138        (b'!', b'!') => 2,
4139        _ => 0,
4140    }
4141}
4142
4143pub fn untracked_paths(
4144    worktree_root: impl AsRef<Path>,
4145    git_dir: impl AsRef<Path>,
4146    format: ObjectFormat,
4147) -> Result<Vec<Vec<u8>>> {
4148    untracked_paths_with_options(
4149        worktree_root,
4150        git_dir,
4151        format,
4152        UntrackedPathOptions::default(),
4153    )
4154}
4155
4156/// Pathspec filter for untracked collection. Mirrors git `ls-files` pathspec
4157/// semantics: literal paths, recursive directory prefixes, and fnmatch globs.
4158#[derive(Debug, Clone, PartialEq, Eq)]
4159pub struct UntrackedPathspecFilter {
4160    pub path: Vec<u8>,
4161    pub recursive: bool,
4162    pub is_glob: bool,
4163}
4164
4165#[derive(Debug, Clone, PartialEq, Eq, Default)]
4166pub struct UntrackedPathOptions {
4167    pub directory: bool,
4168    pub no_empty_directory: bool,
4169    pub preserve_ignored_directories: bool,
4170    pub exclude_standard: bool,
4171    pub ignored_only: bool,
4172    pub exclude_patterns: Vec<Vec<u8>>,
4173    pub exclude_per_directory: Vec<String>,
4174    pub pathspecs: Vec<UntrackedPathspecFilter>,
4175}
4176
4177// The wildmatch engine and the single-item pathspec matcher now live in the
4178// shared `sley-pathspec` crate. Re-export them so existing `sley-worktree`
4179// callers (and the t3070 `ls-files` path) keep their public surface unchanged.
4180pub use sley_pathspec::{
4181    PathspecMatchMagic, WM_CASEFOLD, WM_PATHNAME, pathspec_is_glob, pathspec_item_matches,
4182    wildmatch,
4183};
4184
4185/// Whether `path` matches an `ls-files` pathspec (literal, directory prefix, or glob).
4186pub fn untracked_pathspec_matches(spec: &UntrackedPathspecFilter, path: &[u8]) -> bool {
4187    if spec.path.is_empty() {
4188        return true;
4189    }
4190    let path_no_slash = path.strip_suffix(b"/").unwrap_or(path);
4191    if path == spec.path.as_slice() || path_no_slash == spec.path.as_slice() {
4192        return true;
4193    }
4194    if spec.recursive
4195        && let Some(rest) = path
4196            .strip_prefix(spec.path.as_slice())
4197            .and_then(|rest| rest.strip_prefix(b"/"))
4198        && !rest.is_empty()
4199    {
4200        return true;
4201    }
4202    if spec.is_glob {
4203        return untracked_wildmatch(&spec.path, path)
4204            || untracked_wildmatch(&spec.path, path_no_slash);
4205    }
4206    false
4207}
4208
4209/// Whether a directory walk must descend into `parent` to satisfy active pathspecs.
4210pub fn untracked_pathspec_needs_descent(parent: &[u8], specs: &[UntrackedPathspecFilter]) -> bool {
4211    if specs.is_empty() {
4212        return false;
4213    }
4214    let parent_prefix = if parent.is_empty() {
4215        Vec::new()
4216    } else {
4217        let mut prefix = parent.to_vec();
4218        prefix.push(b'/');
4219        prefix
4220    };
4221    for spec in specs {
4222        if !parent.is_empty()
4223            && spec.path.starts_with(&parent_prefix)
4224            && spec.path.as_slice() != parent
4225        {
4226            return true;
4227        }
4228        if spec.is_glob && glob_pathspec_may_match_under(&spec.path, parent) {
4229            return true;
4230        }
4231        if spec.recursive
4232            && !parent.is_empty()
4233            && parent.starts_with(spec.path.as_slice())
4234            && parent != spec.path.as_slice()
4235        {
4236            return true;
4237        }
4238    }
4239    false
4240}
4241
4242/// Whether some pathspec selects the directory `git_path` *as a whole* (so an
4243/// untracked directory can roll up to `dir/` under `--directory`), as opposed to
4244/// only matching something strictly below it (which forces descent). A
4245/// directory-prefix pathspec covering the directory, an exact directory match, or
4246/// a glob matching the directory's own name all count; a deeper glob such as
4247/// `dir/*.c` or an exact file path inside the directory does not.
4248fn untracked_pathspec_selects_directory(
4249    specs: &[UntrackedPathspecFilter],
4250    git_path: &[u8],
4251) -> bool {
4252    specs
4253        .iter()
4254        .any(|spec| untracked_pathspec_matches(spec, git_path))
4255}
4256
4257fn glob_pathspec_may_match_under(pattern: &[u8], dir: &[u8]) -> bool {
4258    let literal_prefix = literal_prefix_before_glob(pattern);
4259    if literal_prefix.is_empty() {
4260        return true;
4261    }
4262    if dir.is_empty() {
4263        return true;
4264    }
4265    let mut dir_prefix = dir.to_vec();
4266    dir_prefix.push(b'/');
4267    if literal_prefix.starts_with(&dir_prefix) {
4268        return true;
4269    }
4270    if dir_prefix.starts_with(&literal_prefix) {
4271        return true;
4272    }
4273    literal_prefix
4274        .strip_suffix(b"/")
4275        .is_some_and(|prefix| prefix == dir)
4276}
4277
4278fn literal_prefix_before_glob(pattern: &[u8]) -> Vec<u8> {
4279    let mut prefix = Vec::new();
4280    for &byte in pattern {
4281        if matches!(byte, b'*' | b'?' | b'[') {
4282            break;
4283        }
4284        prefix.push(byte);
4285    }
4286    prefix
4287}
4288
4289fn insert_untracked_directory(paths: &mut BTreeSet<Vec<u8>>, git_path: &[u8]) {
4290    let mut directory = git_path.to_vec();
4291    if directory.last() != Some(&b'/') {
4292        directory.push(b'/');
4293    }
4294    paths.insert(directory);
4295}
4296
4297/// fnmatch-style glob where `*` and `?` match any byte including `/`.
4298fn untracked_wildmatch(pattern: &[u8], text: &[u8]) -> bool {
4299    // Untracked-walk pathspec globs match with PATHMATCH semantics (`*` crosses
4300    // `/`), matching git's default (non-GLOB-magic) pathspec behavior.
4301    wildmatch(pattern, text, 0)
4302}
4303
4304#[derive(Debug, Clone, PartialEq, Eq)]
4305pub struct IgnoreMatch {
4306    pub source: Vec<u8>,
4307    pub line_number: usize,
4308    pub pattern: Vec<u8>,
4309    pub ignored: bool,
4310}
4311
4312#[derive(Debug, Clone, PartialEq, Eq)]
4313pub enum AttributeState {
4314    Set,
4315    Unset,
4316    Value(Vec<u8>),
4317}
4318
4319#[derive(Debug, Clone, PartialEq, Eq)]
4320pub struct AttributeCheck {
4321    pub attribute: Vec<u8>,
4322    pub state: Option<AttributeState>,
4323}
4324
4325pub fn untracked_paths_with_options(
4326    worktree_root: impl AsRef<Path>,
4327    git_dir: impl AsRef<Path>,
4328    format: ObjectFormat,
4329    options: UntrackedPathOptions,
4330) -> Result<Vec<Vec<u8>>> {
4331    let worktree_root = worktree_root.as_ref();
4332    let git_dir = git_dir.as_ref();
4333    let db = FileObjectDatabase::from_git_dir(git_dir, format);
4334    let (index, stat_cache, _) = read_index_entries_with_stat_cache(git_dir, format, &db)?;
4335    let ignores = IgnoreMatcher::from_sources(
4336        worktree_root,
4337        options.exclude_standard,
4338        &options.exclude_patterns,
4339        &options.exclude_per_directory,
4340    )?;
4341    if options.ignored_only {
4342        return ignored_untracked_paths(
4343            worktree_root,
4344            git_dir,
4345            &index,
4346            &ignores,
4347            options.directory,
4348        );
4349    }
4350    if options.directory {
4351        let mut paths = BTreeSet::new();
4352        collect_untracked_directory_paths(
4353            worktree_root,
4354            git_dir,
4355            worktree_root,
4356            &index,
4357            &ignores,
4358            &options,
4359            &mut paths,
4360        )?;
4361        return Ok(paths.into_iter().collect());
4362    }
4363    let worktree = worktree_entries_with_stat_cache(
4364        worktree_root,
4365        git_dir,
4366        format,
4367        Some(&stat_cache),
4368        None,
4369        None,
4370    )?;
4371    Ok(ls_files_untracked_paths_from_worktree(
4372        &worktree, &index, &ignores,
4373    ))
4374}
4375
4376/// Untracked paths for `ls-files --others` (without `--directory`): every
4377/// untracked file is listed individually, except embedded-repository boundaries
4378/// which are emitted as `dir/` to match git's non-submodule `.git` handling.
4379fn ls_files_untracked_paths_from_worktree(
4380    worktree: &BTreeMap<Vec<u8>, TrackedEntry>,
4381    index: &BTreeMap<Vec<u8>, TrackedEntry>,
4382    ignores: &IgnoreMatcher,
4383) -> Vec<Vec<u8>> {
4384    let mut paths = BTreeSet::new();
4385    for (path, entry) in worktree {
4386        if index.contains_key(path) || ignores.is_ignored(path, false) {
4387            continue;
4388        }
4389        if entry.mode == 0o040000 && entry.oid.is_null() {
4390            insert_untracked_directory(&mut paths, path);
4391            continue;
4392        }
4393        paths.insert(path.clone());
4394    }
4395    paths.into_iter().collect()
4396}
4397
4398pub fn path_matches_standard_ignore(
4399    worktree_root: impl AsRef<Path>,
4400    path: &[u8],
4401    is_dir: bool,
4402) -> Result<bool> {
4403    path_matches_ignore(worktree_root, path, is_dir, true, &[])
4404}
4405
4406pub fn standard_ignore_match(
4407    worktree_root: impl AsRef<Path>,
4408    path: &[u8],
4409    is_dir: bool,
4410) -> Result<Option<IgnoreMatch>> {
4411    let ignores = IgnoreMatcher::from_worktree_root(worktree_root.as_ref())?;
4412    Ok(ignores.match_for(path, is_dir).map(IgnorePattern::to_match))
4413}
4414
4415pub fn standard_attributes_for_path(
4416    worktree_root: impl AsRef<Path>,
4417    path: &[u8],
4418    requested: &[Vec<u8>],
4419    all: bool,
4420) -> Result<Vec<AttributeCheck>> {
4421    let matcher = AttributeMatcher::from_worktree_root(worktree_root.as_ref())?;
4422    Ok(matcher.attributes_for_path(path, requested, all))
4423}
4424
4425/// A reusable matcher for standard worktree attributes (global or
4426/// `core.attributesFile`, every in-tree `.gitattributes`, and
4427/// `$GIT_DIR/info/attributes`).
4428///
4429/// This is behaviourally identical to [`standard_attributes_for_path`] except
4430/// the attribute sources are read once and reused for each path.
4431pub struct StandardAttributeMatcher {
4432    matcher: AttributeMatcher,
4433}
4434
4435impl StandardAttributeMatcher {
4436    pub fn from_worktree_root(worktree_root: impl AsRef<Path>) -> Result<Self> {
4437        Ok(Self {
4438            matcher: AttributeMatcher::from_worktree_root(worktree_root.as_ref())?,
4439        })
4440    }
4441
4442    pub fn attributes_for_path(
4443        &self,
4444        path: &[u8],
4445        requested: &[Vec<u8>],
4446        all: bool,
4447    ) -> Vec<AttributeCheck> {
4448        self.matcher.attributes_for_path(path, requested, all)
4449    }
4450}
4451
4452pub fn standard_attributes_for_path_from_tree(
4453    worktree_root: impl AsRef<Path>,
4454    db: &FileObjectDatabase,
4455    format: ObjectFormat,
4456    tree_oid: &ObjectId,
4457    path: &[u8],
4458    requested: &[Vec<u8>],
4459    all: bool,
4460) -> Result<Vec<AttributeCheck>> {
4461    let mut matcher = AttributeMatcher::default();
4462    let worktree_root = worktree_root.as_ref();
4463    if !matcher.read_configured_attributes(worktree_root) {
4464        matcher.read_default_global_attributes();
4465    }
4466    collect_attribute_patterns_from_tree(db, format, tree_oid, Vec::new(), &mut matcher)?;
4467    read_attribute_patterns(
4468        worktree_root.join(".git").join("info").join("attributes"),
4469        &mut matcher,
4470        &[],
4471        b".git/info/attributes",
4472    );
4473    Ok(matcher.attributes_for_path(path, requested, all))
4474}
4475
4476pub fn standard_attributes_for_path_from_index(
4477    worktree_root: impl AsRef<Path>,
4478    git_dir: impl AsRef<Path>,
4479    format: ObjectFormat,
4480    path: &[u8],
4481    requested: &[Vec<u8>],
4482    all: bool,
4483) -> Result<Vec<AttributeCheck>> {
4484    let worktree_root = worktree_root.as_ref();
4485    let git_dir = git_dir.as_ref();
4486    let mut matcher = AttributeMatcher::default();
4487    if !matcher.read_configured_attributes(worktree_root) {
4488        matcher.read_default_global_attributes();
4489    }
4490    let db = FileObjectDatabase::from_git_dir(git_dir, format);
4491    collect_attribute_patterns_from_index(git_dir, format, &db, &mut matcher)?;
4492    read_attribute_patterns(
4493        worktree_root.join(".git").join("info").join("attributes"),
4494        &mut matcher,
4495        &[],
4496        b".git/info/attributes",
4497    );
4498    Ok(matcher.attributes_for_path(path, requested, all))
4499}
4500
4501pub fn path_matches_ignore(
4502    worktree_root: impl AsRef<Path>,
4503    path: &[u8],
4504    is_dir: bool,
4505    exclude_standard: bool,
4506    exclude_patterns: &[Vec<u8>],
4507) -> Result<bool> {
4508    path_matches_ignore_with_per_directory(
4509        worktree_root,
4510        path,
4511        is_dir,
4512        exclude_standard,
4513        exclude_patterns,
4514        &[],
4515    )
4516}
4517
4518pub fn path_matches_ignore_with_per_directory(
4519    worktree_root: impl AsRef<Path>,
4520    path: &[u8],
4521    is_dir: bool,
4522    exclude_standard: bool,
4523    exclude_patterns: &[Vec<u8>],
4524    exclude_per_directory: &[String],
4525) -> Result<bool> {
4526    let ignores = IgnoreMatcher::from_sources(
4527        worktree_root.as_ref(),
4528        exclude_standard,
4529        exclude_patterns,
4530        exclude_per_directory,
4531    )?;
4532    Ok(ignores.is_ignored(path, is_dir))
4533}
4534
4535pub fn ignored_index_entries<'a>(
4536    worktree_root: impl AsRef<Path>,
4537    entries: &'a [IndexEntry],
4538    exclude_standard: bool,
4539    exclude_patterns: &[Vec<u8>],
4540    exclude_per_directory: &[String],
4541) -> Result<Vec<&'a IndexEntry>> {
4542    let ignores = IgnoreMatcher::from_sources(
4543        worktree_root.as_ref(),
4544        exclude_standard,
4545        exclude_patterns,
4546        exclude_per_directory,
4547    )?;
4548    Ok(entries
4549        .iter()
4550        .filter(|entry| ignores.is_ignored(entry.path.as_bytes(), false))
4551        .collect())
4552}
4553
4554fn collect_untracked_directory_paths(
4555    root: &Path,
4556    git_dir: &Path,
4557    dir: &Path,
4558    index: &BTreeMap<Vec<u8>, TrackedEntry>,
4559    ignores: &IgnoreMatcher,
4560    options: &UntrackedPathOptions,
4561    paths: &mut BTreeSet<Vec<u8>>,
4562) -> Result<()> {
4563    if is_same_path(dir, git_dir) {
4564        return Ok(());
4565    }
4566    let mut entries = fs::read_dir(dir)?.collect::<std::result::Result<Vec<_>, _>>()?;
4567    entries.sort_by_key(|entry| entry.file_name());
4568    for entry in entries {
4569        let path = entry.path();
4570        if is_dot_git_entry(&path) {
4571            continue;
4572        }
4573        if is_embedded_git_internals(root, &path) {
4574            continue;
4575        }
4576        if is_same_path(&path, git_dir) {
4577            continue;
4578        }
4579        let metadata = entry.metadata()?;
4580        let relative = path.strip_prefix(root).map_err(|_| {
4581            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
4582        })?;
4583        let git_path = git_path_bytes(relative)?;
4584        if ignores.is_ignored(&git_path, metadata.is_dir()) {
4585            continue;
4586        }
4587        if metadata.is_dir() {
4588            if is_nested_repository_boundary(&path) {
4589                insert_untracked_directory(paths, &git_path);
4590                continue;
4591            }
4592            let has_tracked_below = index_has_path_under(index, &git_path);
4593            let needs_descent = untracked_pathspec_needs_descent(&git_path, &options.pathspecs);
4594            if has_tracked_below {
4595                collect_untracked_directory_paths(
4596                    root, git_dir, &path, index, ignores, options, paths,
4597                )?;
4598            } else if needs_descent {
4599                // A pathspec reaches into this wholly-untracked directory. Git's
4600                // `--directory` still rolls it up to `dir/` when a pathspec selects
4601                // the directory *as a whole* (a directory-prefix that covers it, or
4602                // a glob matching its name). It descends only when a pathspec
4603                // targets something strictly below it that does not select the
4604                // directory itself (e.g. a deeper glob like `dir/*.c` or an exact
4605                // file path).
4606                if untracked_pathspec_selects_directory(&options.pathspecs, &git_path) {
4607                    insert_untracked_directory(paths, &git_path);
4608                    continue;
4609                }
4610                collect_untracked_directory_paths(
4611                    root, git_dir, &path, index, ignores, options, paths,
4612                )?;
4613            } else if options.preserve_ignored_directories
4614                && directory_has_ignored(&path, root, git_dir, ignores)?
4615            {
4616                collect_untracked_directory_paths(
4617                    root, git_dir, &path, index, ignores, options, paths,
4618                )?;
4619            } else if !options.no_empty_directory
4620                || directory_has_file(&path, root, git_dir, ignores)?
4621            {
4622                insert_untracked_directory(paths, &git_path);
4623            }
4624        } else if !index.contains_key(&git_path)
4625            && (metadata.is_file() || metadata.file_type().is_symlink())
4626            && (options.pathspecs.is_empty()
4627                || options
4628                    .pathspecs
4629                    .iter()
4630                    .any(|spec| untracked_pathspec_matches(spec, &git_path)))
4631        {
4632            // A file reached here was found by descending into its parent
4633            // directory, which happens only when that directory is not eligible
4634            // for rollup (it contains tracked content, has ignored entries `-d`
4635            // must preserve, or a pathspec selects something strictly below it).
4636            // Git's `--directory` rollup is a directory-level decision made when
4637            // the whole directory matches; an individually-reached file is always
4638            // listed individually.
4639            paths.insert(git_path);
4640        }
4641    }
4642    Ok(())
4643}
4644
4645fn index_has_path_under(index: &BTreeMap<Vec<u8>, TrackedEntry>, directory: &[u8]) -> bool {
4646    // The index map is sorted, so a single range query finds whether any tracked
4647    // path lives under `directory/` in O(log n) — scanning every key was O(n) per
4648    // untracked directory (quadratic over a deep untracked tree).
4649    let mut prefix = directory.to_vec();
4650    prefix.push(b'/');
4651    index
4652        .range::<[u8], _>((
4653            std::ops::Bound::Included(prefix.as_slice()),
4654            std::ops::Bound::Unbounded,
4655        ))
4656        .next()
4657        .is_some_and(|(path, _)| path.starts_with(&prefix))
4658}
4659
4660/// Derives normal-mode untracked paths (directory rollup) from the worktree map
4661/// produced by the single status walk, avoiding a third filesystem traversal.
4662fn normal_untracked_paths_from_worktree(
4663    worktree: &BTreeMap<Vec<u8>, TrackedEntry>,
4664    index: &BTreeMap<Vec<u8>, TrackedEntry>,
4665    ignores: &IgnoreMatcher,
4666) -> Vec<Vec<u8>> {
4667    let mut paths = BTreeSet::new();
4668    for (path, entry) in worktree {
4669        if index.contains_key(path) || ignores.is_ignored(path, false) {
4670            continue;
4671        }
4672        if entry.mode == 0o040000 && entry.oid.is_null() {
4673            insert_untracked_directory(&mut paths, path);
4674            continue;
4675        }
4676        paths.insert(untracked_normal_rollup_path(path, index, ignores));
4677    }
4678    paths.into_iter().collect()
4679}
4680
4681fn status_untracked_paths_from_index(
4682    root: &Path,
4683    git_dir: &Path,
4684    index: &Index,
4685    stat_cache: &IndexStatCache,
4686    ignores: &mut IgnoreMatcher,
4687    untracked_mode: StatusUntrackedMode,
4688) -> Result<Vec<Vec<u8>>> {
4689    if matches!(untracked_mode, StatusUntrackedMode::None) {
4690        return Ok(Vec::new());
4691    }
4692    let mut paths = BTreeSet::new();
4693    let tracked_dirs = stage0_tracked_directories(index);
4694    let mut context = StatusUntrackedWalk {
4695        git_dir,
4696        tracked: stat_cache,
4697        tracked_dirs: &tracked_dirs,
4698        ignores,
4699        untracked_mode,
4700    };
4701    collect_status_untracked_paths(&mut context, root, &[], &mut paths)?;
4702    Ok(paths.into_iter().collect())
4703}
4704
4705fn status_untracked_paths_from_borrowed_index(
4706    root: &Path,
4707    git_dir: &Path,
4708    index: &BorrowedIndex<'_>,
4709    ignores: &mut IgnoreMatcher,
4710    untracked_mode: StatusUntrackedMode,
4711) -> Result<Vec<Vec<u8>>> {
4712    if matches!(untracked_mode, StatusUntrackedMode::None) {
4713        return Ok(Vec::new());
4714    }
4715    let mut paths = BTreeSet::new();
4716    let tracked = BorrowedIndexLookup::new(&index.entries);
4717    let mut context = StatusUntrackedWalk {
4718        git_dir,
4719        tracked: &tracked,
4720        tracked_dirs: &tracked.tracked_dirs,
4721        ignores,
4722        untracked_mode,
4723    };
4724    collect_status_untracked_paths(&mut context, root, &[], &mut paths)?;
4725    Ok(paths.into_iter().collect())
4726}
4727
4728trait StatusTrackedLookup {
4729    fn contains_tracked(&self, git_path: &[u8]) -> bool;
4730    fn is_tracked_gitlink(&self, git_path: &[u8]) -> bool;
4731}
4732
4733impl StatusTrackedLookup for IndexStatCache {
4734    fn contains_tracked(&self, git_path: &[u8]) -> bool {
4735        self.contains(git_path)
4736    }
4737
4738    fn is_tracked_gitlink(&self, git_path: &[u8]) -> bool {
4739        self.gitlink_entry(git_path).is_some()
4740    }
4741}
4742
4743struct BorrowedIndexLookup<'a> {
4744    tracked: HashSet<&'a [u8]>,
4745    gitlinks: HashSet<&'a [u8]>,
4746    tracked_dirs: HashSet<&'a [u8]>,
4747}
4748
4749impl<'a> BorrowedIndexLookup<'a> {
4750    fn new(entries: &'a [IndexEntryRef<'a>]) -> Self {
4751        let mut tracked = HashSet::with_capacity(entries.len());
4752        let mut gitlinks = HashSet::new();
4753        let mut tracked_dirs = HashSet::new();
4754        for entry in entries {
4755            if entry.stage() != Stage::Normal {
4756                continue;
4757            }
4758            let path = entry.path;
4759            tracked.insert(path);
4760            if entry.mode == 0o160000 {
4761                gitlinks.insert(path);
4762            }
4763            for (idx, byte) in path.iter().enumerate() {
4764                if *byte == b'/' && idx > 0 {
4765                    tracked_dirs.insert(&path[..idx]);
4766                }
4767            }
4768        }
4769        Self {
4770            tracked,
4771            gitlinks,
4772            tracked_dirs,
4773        }
4774    }
4775}
4776
4777impl StatusTrackedLookup for BorrowedIndexLookup<'_> {
4778    fn contains_tracked(&self, git_path: &[u8]) -> bool {
4779        self.tracked.contains(git_path)
4780    }
4781
4782    fn is_tracked_gitlink(&self, git_path: &[u8]) -> bool {
4783        self.gitlinks.contains(git_path)
4784    }
4785}
4786
4787struct StatusUntrackedWalk<'a, T: StatusTrackedLookup + ?Sized> {
4788    git_dir: &'a Path,
4789    tracked: &'a T,
4790    tracked_dirs: &'a HashSet<&'a [u8]>,
4791    ignores: &'a mut IgnoreMatcher,
4792    untracked_mode: StatusUntrackedMode,
4793}
4794
4795fn collect_status_untracked_paths<T: StatusTrackedLookup + ?Sized>(
4796    context: &mut StatusUntrackedWalk<'_, T>,
4797    dir: &Path,
4798    dir_git_path: &[u8],
4799    paths: &mut BTreeSet<Vec<u8>>,
4800) -> Result<()> {
4801    if is_same_path(dir, context.git_dir) {
4802        return Ok(());
4803    }
4804    let ignore_len = context.ignores.patterns.len();
4805    let entries = read_dir_entries_with_ignore_patterns(dir, dir_git_path, context.ignores)?;
4806    let result = (|| -> Result<()> {
4807        let mut git_path = dir_git_path.to_vec();
4808        for entry in entries {
4809            let file_name = entry.file_name();
4810            if file_name == std::ffi::OsStr::new(".git") {
4811                continue;
4812            }
4813            let file_type = entry.file_type()?;
4814            let is_dir = file_type.is_dir();
4815            let path_len = git_path_push_component(&mut git_path, &file_name);
4816            let entry_result = (|| -> Result<()> {
4817                if file_type.is_file() || file_type.is_symlink() {
4818                    if !context.tracked.contains_tracked(&git_path)
4819                        && !context.ignores.is_ignored(&git_path, false)
4820                    {
4821                        paths.insert(git_path.clone());
4822                    }
4823                    return Ok(());
4824                } else if is_dir {
4825                    if context.ignores.is_ignored(&git_path, true) {
4826                        return Ok(());
4827                    }
4828                    let path = entry.path();
4829                    if is_same_path(&path, context.git_dir) {
4830                        return Ok(());
4831                    }
4832                    if context.tracked.is_tracked_gitlink(&git_path) {
4833                        return Ok(());
4834                    }
4835                    match context.untracked_mode {
4836                        StatusUntrackedMode::All => {
4837                            if !context.tracked_dirs.contains(git_path.as_slice())
4838                                && is_nested_repository_boundary(&path)
4839                            {
4840                                insert_untracked_directory(paths, &git_path);
4841                            } else {
4842                                collect_status_untracked_paths(context, &path, &git_path, paths)?;
4843                            }
4844                        }
4845                        StatusUntrackedMode::Normal => {
4846                            if context.tracked_dirs.contains(git_path.as_slice()) {
4847                                collect_status_untracked_paths(context, &path, &git_path, paths)?;
4848                            } else if is_nested_repository_boundary(&path) {
4849                                insert_untracked_directory(paths, &git_path);
4850                            } else if status_untracked_directory_has_file(
4851                                context, &path, &git_path,
4852                            )? {
4853                                insert_untracked_directory(paths, &git_path);
4854                            }
4855                        }
4856                        StatusUntrackedMode::None => {}
4857                    }
4858                }
4859                Ok(())
4860            })();
4861            git_path.truncate(path_len);
4862            entry_result?;
4863        }
4864        Ok(())
4865    })();
4866    context.ignores.patterns.truncate(ignore_len);
4867    result
4868}
4869
4870fn stage0_tracked_directories(index: &Index) -> HashSet<&[u8]> {
4871    let mut directories = HashSet::new();
4872    for entry in index
4873        .entries
4874        .iter()
4875        .filter(|entry| entry.stage() == Stage::Normal)
4876    {
4877        let path = entry.path.as_bytes();
4878        for (idx, byte) in path.iter().enumerate() {
4879            if *byte == b'/' && idx > 0 {
4880                directories.insert(&path[..idx]);
4881            }
4882        }
4883    }
4884    directories
4885}
4886
4887fn status_untracked_directory_has_file<T: StatusTrackedLookup + ?Sized>(
4888    context: &mut StatusUntrackedWalk<'_, T>,
4889    dir: &Path,
4890    dir_git_path: &[u8],
4891) -> Result<bool> {
4892    if is_same_path(dir, context.git_dir) {
4893        return Ok(false);
4894    }
4895    let ignore_len = context.ignores.patterns.len();
4896    let entries = read_dir_entries_with_ignore_patterns(dir, dir_git_path, context.ignores)?;
4897    let result = (|| -> Result<bool> {
4898        let mut git_path = dir_git_path.to_vec();
4899        for entry in entries {
4900            let file_name = entry.file_name();
4901            if file_name == std::ffi::OsStr::new(".git") {
4902                continue;
4903            }
4904            let file_type = entry.file_type()?;
4905            let is_dir = file_type.is_dir();
4906            let path_len = git_path_push_component(&mut git_path, &file_name);
4907            let entry_result = (|| -> Result<Option<bool>> {
4908                if context.ignores.is_ignored(&git_path, is_dir) {
4909                    return Ok(None);
4910                }
4911                if file_type.is_file() || file_type.is_symlink() {
4912                    return Ok(Some(!context.tracked.contains_tracked(&git_path)));
4913                }
4914                if is_dir {
4915                    let path = entry.path();
4916                    if is_same_path(&path, context.git_dir) {
4917                        return Ok(None);
4918                    }
4919                    if is_nested_repository_boundary(&path) {
4920                        return Ok(Some(true));
4921                    }
4922                    if status_untracked_directory_has_file(context, &path, &git_path)? {
4923                        return Ok(Some(true));
4924                    }
4925                }
4926                Ok(None)
4927            })();
4928            git_path.truncate(path_len);
4929            if let Some(has_file) = entry_result? {
4930                return Ok(has_file);
4931            }
4932        }
4933        Ok(false)
4934    })();
4935    context.ignores.patterns.truncate(ignore_len);
4936    result
4937}
4938
4939fn read_dir_entries_with_ignore_patterns(
4940    dir: &Path,
4941    base: &[u8],
4942    matcher: &mut IgnoreMatcher,
4943) -> Result<Vec<fs::DirEntry>> {
4944    let mut entries = Vec::new();
4945    let mut ignore_path = None;
4946    for entry in fs::read_dir(dir)? {
4947        let entry = entry?;
4948        if entry.file_name() == std::ffi::OsStr::new(".gitignore") {
4949            ignore_path = Some(entry.path());
4950        }
4951        entries.push(entry);
4952    }
4953    if let Some(path) = ignore_path {
4954        let mut source = base.to_vec();
4955        if !source.is_empty() {
4956            source.push(b'/');
4957        }
4958        source.extend_from_slice(b".gitignore");
4959        read_ignore_patterns(path, &mut matcher.patterns, base, &source);
4960    }
4961    Ok(entries)
4962}
4963
4964fn untracked_normal_rollup_path(
4965    file_path: &[u8],
4966    index: &BTreeMap<Vec<u8>, TrackedEntry>,
4967    ignores: &IgnoreMatcher,
4968) -> Vec<u8> {
4969    let segments = file_path
4970        .split(|byte| *byte == b'/')
4971        .filter(|segment| !segment.is_empty())
4972        .collect::<Vec<_>>();
4973    if segments.len() <= 1 {
4974        return file_path.to_vec();
4975    }
4976    let mut prefix = Vec::new();
4977    for segment in &segments[..segments.len() - 1] {
4978        if !prefix.is_empty() {
4979            prefix.push(b'/');
4980        }
4981        prefix.extend_from_slice(segment);
4982        if index_has_path_under(index, &prefix) {
4983            break;
4984        }
4985        if !ignores.is_ignored(&prefix, true) {
4986            let mut directory = prefix;
4987            directory.push(b'/');
4988            return directory;
4989        }
4990    }
4991    file_path.to_vec()
4992}
4993
4994fn directory_has_file(
4995    dir: &Path,
4996    root: &Path,
4997    git_dir: &Path,
4998    ignores: &IgnoreMatcher,
4999) -> Result<bool> {
5000    if is_same_path(dir, git_dir) {
5001        return Ok(false);
5002    }
5003    for entry in fs::read_dir(dir)? {
5004        let entry = entry?;
5005        let path = entry.path();
5006        if is_dot_git_entry(&path) {
5007            continue;
5008        }
5009        if is_embedded_git_internals(root, &path) {
5010            continue;
5011        }
5012        if is_same_path(&path, git_dir) {
5013            continue;
5014        }
5015        let metadata = entry.metadata()?;
5016        let relative = path.strip_prefix(root).map_err(|_| {
5017            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
5018        })?;
5019        let git_path = git_path_bytes(relative)?;
5020        if ignores.is_ignored(&git_path, metadata.is_dir()) {
5021            continue;
5022        }
5023        if metadata.is_file() || metadata.file_type().is_symlink() {
5024            return Ok(true);
5025        }
5026        if metadata.is_dir() {
5027            if is_nested_repository_boundary(&path) {
5028                continue;
5029            }
5030            if directory_has_file(&path, root, git_dir, ignores)? {
5031                return Ok(true);
5032            }
5033        }
5034    }
5035    Ok(false)
5036}
5037
5038fn directory_has_ignored(
5039    dir: &Path,
5040    root: &Path,
5041    git_dir: &Path,
5042    ignores: &IgnoreMatcher,
5043) -> Result<bool> {
5044    if is_same_path(dir, git_dir) {
5045        return Ok(false);
5046    }
5047    for entry in fs::read_dir(dir)? {
5048        let entry = entry?;
5049        let path = entry.path();
5050        if is_dot_git_entry(&path) {
5051            continue;
5052        }
5053        if is_same_path(&path, git_dir) {
5054            continue;
5055        }
5056        let metadata = entry.metadata()?;
5057        let relative = path.strip_prefix(root).map_err(|_| {
5058            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
5059        })?;
5060        let git_path = git_path_bytes(relative)?;
5061        if ignores.is_ignored(&git_path, metadata.is_dir()) {
5062            return Ok(true);
5063        }
5064        if metadata.is_dir() && directory_has_ignored(&path, root, git_dir, ignores)? {
5065            return Ok(true);
5066        }
5067    }
5068    Ok(false)
5069}
5070
5071fn ignored_untracked_paths(
5072    root: &Path,
5073    git_dir: &Path,
5074    index: &BTreeMap<Vec<u8>, TrackedEntry>,
5075    ignores: &IgnoreMatcher,
5076    directory: bool,
5077) -> Result<Vec<Vec<u8>>> {
5078    let mut paths = BTreeSet::new();
5079    let context = IgnoredUntrackedContext {
5080        root,
5081        git_dir,
5082        index,
5083        ignores,
5084        directory,
5085    };
5086    collect_ignored_untracked_paths(&context, root, false, &mut paths)?;
5087    Ok(paths.into_iter().collect())
5088}
5089
5090fn ignored_traditional_path_is_empty_directory(root: &Path, path: &[u8]) -> Result<bool> {
5091    let Some(path) = path.strip_suffix(b"/") else {
5092        return Ok(false);
5093    };
5094    let mut absolute = PathBuf::new();
5095    set_worktree_path_from_repo_path(root, path, &mut absolute)?;
5096    match fs::read_dir(&absolute) {
5097        Ok(mut entries) => Ok(entries.next().is_none()),
5098        Err(err) if err.kind() == std::io::ErrorKind::NotADirectory => Ok(false),
5099        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
5100        Err(err) => Err(err.into()),
5101    }
5102}
5103
5104struct IgnoredUntrackedContext<'a> {
5105    root: &'a Path,
5106    git_dir: &'a Path,
5107    index: &'a BTreeMap<Vec<u8>, TrackedEntry>,
5108    ignores: &'a IgnoreMatcher,
5109    directory: bool,
5110}
5111
5112fn collect_ignored_untracked_paths(
5113    context: &IgnoredUntrackedContext<'_>,
5114    dir: &Path,
5115    parent_ignored: bool,
5116    paths: &mut BTreeSet<Vec<u8>>,
5117) -> Result<()> {
5118    if is_same_path(dir, context.git_dir) {
5119        return Ok(());
5120    }
5121    let mut entries = fs::read_dir(dir)?.collect::<std::result::Result<Vec<_>, _>>()?;
5122    entries.sort_by_key(|entry| entry.file_name());
5123    for entry in entries {
5124        let path = entry.path();
5125        if is_dot_git_entry(&path) {
5126            continue;
5127        }
5128        if is_same_path(&path, context.git_dir) {
5129            continue;
5130        }
5131        let metadata = entry.metadata()?;
5132        let relative = path.strip_prefix(context.root).map_err(|_| {
5133            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
5134        })?;
5135        let git_path = git_path_bytes(relative)?;
5136        if metadata.is_dir() {
5137            let ignored = parent_ignored || context.ignores.is_ignored(&git_path, true);
5138            if ignored && !index_has_path_under(context.index, &git_path) {
5139                if context.directory {
5140                    let mut directory_path = git_path;
5141                    directory_path.push(b'/');
5142                    paths.insert(directory_path);
5143                } else {
5144                    collect_ignored_untracked_paths(context, &path, true, paths)?;
5145                }
5146            } else {
5147                if is_nested_repository_boundary(&path) {
5148                    continue;
5149                }
5150                collect_ignored_untracked_paths(context, &path, ignored, paths)?;
5151            }
5152        } else if !context.index.contains_key(&git_path)
5153            && (metadata.is_file() || metadata.file_type().is_symlink())
5154            && (parent_ignored || context.ignores.is_ignored(&git_path, false))
5155        {
5156            paths.insert(git_path);
5157        }
5158    }
5159    Ok(())
5160}
5161
5162#[derive(Debug, Default)]
5163struct IgnoreMatcher {
5164    patterns: Vec<IgnorePattern>,
5165}
5166
5167#[derive(Debug)]
5168struct IgnorePattern {
5169    base: Vec<u8>,
5170    pattern: Vec<u8>,
5171    original: Vec<u8>,
5172    source: Vec<u8>,
5173    line_number: usize,
5174    negated: bool,
5175    directory_only: bool,
5176    anchored: bool,
5177    has_slash: bool,
5178    /// How `pattern` should be matched against a slash-free segment. Most
5179    /// `.gitignore` entries are literals or simple `*.ext` / `prefix*` globs, all
5180    /// of which match without the allocating wildcard DP engine; only genuinely
5181    /// complex globs fall through to [`wildcard_path_matches`].
5182    match_kind: MatchKind,
5183}
5184
5185/// Classification of an [`IgnorePattern`] that lets common shapes skip the
5186/// general wildcard matcher. Every variant matches a *slash-free* segment
5187/// (basename or path component); patterns containing `/` are always
5188/// [`MatchKind::Glob`] so they only ever reach the full engine.
5189#[derive(Debug)]
5190enum MatchKind {
5191    /// No metacharacters: matches by byte equality.
5192    Literal,
5193    /// `*X` with `X` literal: matches a segment ending in `X`.
5194    Suffix,
5195    /// `X*` with `X` literal: matches a segment starting with `X`.
5196    Prefix,
5197    /// Anything else: defer to [`wildcard_path_matches`].
5198    Glob,
5199}
5200
5201/// Classify `pattern` for [`MatchKind`]. `*X`/`X*` fast paths require the literal
5202/// part to be slash-free so that `ends_with`/`starts_with` on a single segment is
5203/// exactly equivalent to the glob (`*` never crosses `/`).
5204fn classify_ignore_pattern(pattern: &[u8]) -> MatchKind {
5205    let stars = pattern.iter().filter(|byte| **byte == b'*').count();
5206    let other_meta = pattern
5207        .iter()
5208        .any(|byte| matches!(byte, b'?' | b'[' | b'\\'));
5209    if stars == 0 && !other_meta {
5210        return MatchKind::Literal;
5211    }
5212    if stars == 1 && !other_meta {
5213        let literal = if pattern.first() == Some(&b'*') {
5214            Some((&pattern[1..], MatchKind::Suffix))
5215        } else if pattern.last() == Some(&b'*') {
5216            Some((&pattern[..pattern.len() - 1], MatchKind::Prefix))
5217        } else {
5218            None
5219        };
5220        if let Some((literal, kind)) = literal
5221            && !literal.is_empty()
5222            && !literal.contains(&b'/')
5223        {
5224            return kind;
5225        }
5226    }
5227    MatchKind::Glob
5228}
5229
5230impl IgnoreMatcher {
5231    fn from_sources(
5232        root: &Path,
5233        exclude_standard: bool,
5234        patterns: &[Vec<u8>],
5235        per_directory: &[String],
5236    ) -> Result<Self> {
5237        let mut matcher = if exclude_standard {
5238            Self::from_worktree_root(root)?
5239        } else {
5240            Self::default()
5241        };
5242        matcher.extend_patterns(patterns);
5243        matcher.extend_per_directory_patterns(root, per_directory)?;
5244        Ok(matcher)
5245    }
5246
5247    /// Builds only the repository-wide ignore sources — `core.excludesFile` (or the
5248    /// default global) and `$GIT_DIR/info/exclude` — *without* walking the worktree
5249    /// for `.gitignore`. The caller folds each directory's `.gitignore` into the
5250    /// matcher as it descends (see [`read_dir_ignore_patterns`]), so status reads
5251    /// the tree exactly once instead of doing a separate full-tree ignore pass.
5252    fn from_worktree_base(root: &Path) -> Result<Self> {
5253        let mut patterns = Vec::new();
5254        read_ignore_patterns(
5255            root.join(".git").join("info").join("exclude"),
5256            &mut patterns,
5257            &[],
5258            b".git/info/exclude",
5259        );
5260        if !read_core_excludes_file(root, &mut patterns) {
5261            read_default_global_excludes_file(&mut patterns);
5262        }
5263        Ok(Self { patterns })
5264    }
5265
5266    fn from_worktree_root(root: &Path) -> Result<Self> {
5267        let mut patterns = Vec::new();
5268        read_ignore_patterns(
5269            root.join(".git").join("info").join("exclude"),
5270            &mut patterns,
5271            &[],
5272            b".git/info/exclude",
5273        );
5274        if !read_core_excludes_file(root, &mut patterns) {
5275            read_default_global_excludes_file(&mut patterns);
5276        }
5277        collect_per_directory_patterns(root, root, &[String::from(".gitignore")], &mut patterns)?;
5278        Ok(Self { patterns })
5279    }
5280
5281    fn extend_patterns(&mut self, patterns: &[Vec<u8>]) {
5282        for pattern in patterns {
5283            push_ignore_pattern(&mut self.patterns, pattern, &[], &[], 0);
5284        }
5285    }
5286
5287    fn extend_per_directory_patterns(&mut self, root: &Path, names: &[String]) -> Result<()> {
5288        if names.is_empty() {
5289            return Ok(());
5290        }
5291        collect_per_directory_patterns(root, root, names, &mut self.patterns)
5292    }
5293
5294    fn is_ignored(&self, path: &[u8], is_dir: bool) -> bool {
5295        let basename = path.rsplit(|byte| *byte == b'/').next().unwrap_or(path);
5296        for pattern in self.patterns.iter().rev() {
5297            if pattern.matches_with_basename(path, basename, is_dir) {
5298                return !pattern.negated;
5299            }
5300        }
5301        false
5302    }
5303
5304    fn match_for(&self, path: &[u8], is_dir: bool) -> Option<&IgnorePattern> {
5305        let basename = path.rsplit(|byte| *byte == b'/').next().unwrap_or(path);
5306        for pattern in self.patterns.iter().rev() {
5307            if pattern.matches_with_basename(path, basename, is_dir) {
5308                return Some(pattern);
5309            }
5310        }
5311        None
5312    }
5313}
5314
5315/// Decides whether a worktree path is included by a [`SparseCheckout`].
5316///
5317/// In [`SparseCheckoutMode::Full`] the sparse patterns are compiled with the
5318/// same `.gitignore` grammar used elsewhere in this crate ([`IgnorePattern`]);
5319/// a path is *in cone* when the last matching pattern is positive. In
5320/// [`SparseCheckoutMode::Cone`] the patterns are reduced to a set of recursive
5321/// directory prefixes plus a flag for whether top-level files are kept, and
5322/// inclusion is decided by literal prefix containment.
5323#[derive(Debug)]
5324enum SparseMatcher {
5325    Full { patterns: Vec<IgnorePattern> },
5326    Cone(ConeMatcher),
5327}
5328
5329#[derive(Debug, Default)]
5330struct ConeMatcher {
5331    /// `true` when files directly at the repository root are in cone (`/*`).
5332    root_files: bool,
5333    /// Directory prefixes (without leading or trailing `/`) whose entire
5334    /// subtree is in cone, e.g. `dir1/dir2`.
5335    recursive_dirs: Vec<Vec<u8>>,
5336    /// Parent directories that are in cone only for their direct files
5337    /// (the `/dir/*` guard Git emits so intermediate directories keep their
5338    /// own files). Stored without leading or trailing `/`.
5339    parent_dirs: Vec<Vec<u8>>,
5340}
5341
5342impl SparseMatcher {
5343    fn new(sparse: &SparseCheckout, mode: SparseCheckoutMode) -> Self {
5344        let resolved = match mode {
5345            SparseCheckoutMode::Auto => {
5346                if patterns_are_cone(&sparse.patterns) {
5347                    SparseCheckoutMode::Cone
5348                } else {
5349                    SparseCheckoutMode::Full
5350                }
5351            }
5352            other => other,
5353        };
5354        match resolved {
5355            SparseCheckoutMode::Cone => SparseMatcher::Cone(ConeMatcher::compile(&sparse.patterns)),
5356            // `Auto` has been resolved above; everything else is full matching.
5357            _ => {
5358                let mut patterns = Vec::new();
5359                for pattern in &sparse.patterns {
5360                    push_ignore_pattern(&mut patterns, pattern, &[], b"sparse-checkout", 0);
5361                }
5362                SparseMatcher::Full { patterns }
5363            }
5364        }
5365    }
5366
5367    /// Returns `true` when the given file path should be present in the
5368    /// worktree under this sparse specification.
5369    fn includes_file(&self, path: &[u8]) -> bool {
5370        match self {
5371            SparseMatcher::Full { patterns } => {
5372                let mut included = false;
5373                for pattern in patterns {
5374                    if pattern.matches(path, false) {
5375                        included = !pattern.negated;
5376                    }
5377                }
5378                included
5379            }
5380            SparseMatcher::Cone(cone) => cone.includes_file(path),
5381        }
5382    }
5383}
5384
5385impl ConeMatcher {
5386    fn compile(patterns: &[Vec<u8>]) -> Self {
5387        let mut matcher = ConeMatcher::default();
5388        for raw in patterns {
5389            let line = sparse_clean_line(raw);
5390            if line.is_empty() || line.starts_with(b"#") {
5391                continue;
5392            }
5393            // Negated guards such as `!/*/` and `!/dir/*/` only exist to stop a
5394            // recursive match from pulling in nested directories; the positive
5395            // patterns already capture the cone, so we ignore the negations.
5396            if line.starts_with(b"!") {
5397                continue;
5398            }
5399            if line == b"/*" {
5400                matcher.root_files = true;
5401                continue;
5402            }
5403            // `/dir/` -> recursive subtree.
5404            if let Some(rest) = line.strip_prefix(b"/")
5405                && let Some(dir) = rest.strip_suffix(b"/")
5406                && !dir.is_empty()
5407            {
5408                matcher.recursive_dirs.push(dir.to_vec());
5409                continue;
5410            }
5411            // `/dir/*` -> direct files of `dir` only (parent guard).
5412            if let Some(rest) = line.strip_prefix(b"/")
5413                && let Some(dir) = rest.strip_suffix(b"/*")
5414                && !dir.is_empty()
5415            {
5416                matcher.parent_dirs.push(dir.to_vec());
5417                continue;
5418            }
5419        }
5420        matcher
5421    }
5422
5423    fn includes_file(&self, path: &[u8]) -> bool {
5424        let parent = match path.iter().rposition(|byte| *byte == b'/') {
5425            Some(index) => &path[..index],
5426            None => {
5427                // A path with no slash is a top-level file.
5428                return self.root_files;
5429            }
5430        };
5431        if self
5432            .recursive_dirs
5433            .iter()
5434            .any(|dir| path_is_under_dir(path, dir))
5435        {
5436            return true;
5437        }
5438        self.parent_dirs.iter().any(|dir| dir.as_slice() == parent)
5439    }
5440}
5441
5442/// Strips a CR, leading/trailing whitespace, and an optional trailing slash is
5443/// preserved (cone patterns are slash sensitive) from a raw sparse line.
5444fn sparse_clean_line(raw: &[u8]) -> &[u8] {
5445    let line = raw.strip_suffix(b"\r").unwrap_or(raw);
5446    trim_ascii_whitespace(line)
5447}
5448
5449/// Returns `true` when `path` is the directory `dir` itself or lives anywhere
5450/// beneath it.
5451fn path_is_under_dir(path: &[u8], dir: &[u8]) -> bool {
5452    if dir.is_empty() {
5453        return true;
5454    }
5455    path.strip_prefix(dir)
5456        .is_some_and(|rest| rest.first() == Some(&b'/'))
5457}
5458
5459/// Heuristic used by [`SparseCheckoutMode::Auto`]: the pattern set is cone
5460/// shaped when every (non-comment, non-blank) line is one of the restricted
5461/// cone forms Git emits.
5462fn patterns_are_cone(patterns: &[Vec<u8>]) -> bool {
5463    let mut saw_pattern = false;
5464    for raw in patterns {
5465        let line = sparse_clean_line(raw);
5466        if line.is_empty() || line.starts_with(b"#") {
5467            continue;
5468        }
5469        saw_pattern = true;
5470        let body = line.strip_prefix(b"!").unwrap_or(line);
5471        let is_cone_shaped = body == b"/*"
5472            || body == b"/*/"
5473            || (body.starts_with(b"/")
5474                && (body.ends_with(b"/") || body.ends_with(b"/*"))
5475                && !sparse_has_glob_meta(body));
5476        if !is_cone_shaped {
5477            return false;
5478        }
5479    }
5480    saw_pattern
5481}
5482
5483/// Detects glob metacharacters that disqualify a line from cone interpretation.
5484/// A single trailing `/*` is allowed by the caller and handled separately.
5485fn sparse_has_glob_meta(body: &[u8]) -> bool {
5486    let trimmed = body.strip_suffix(b"/*").unwrap_or(body);
5487    trimmed
5488        .iter()
5489        .any(|byte| matches!(byte, b'*' | b'?' | b'[' | b']' | b'\\'))
5490}
5491
5492fn read_core_excludes_file(root: &Path, patterns: &mut Vec<IgnorePattern>) -> bool {
5493    let Ok(config) = sley_config::read_repo_config(&root.join(".git"), None) else {
5494        return false;
5495    };
5496    let Some(value) = config.get("core", None, "excludesFile") else {
5497        return false;
5498    };
5499    let path = expand_core_excludes_file(root, value);
5500    read_ignore_patterns(path, patterns, &[], value.as_bytes());
5501    true
5502}
5503
5504fn expand_core_excludes_file(root: &Path, value: &str) -> PathBuf {
5505    let path = Path::new(value);
5506    if path.is_absolute() {
5507        return path.to_path_buf();
5508    }
5509    if let Some(rest) = value.strip_prefix("~/")
5510        && let Some(home) = std::env::var_os("HOME")
5511    {
5512        return PathBuf::from(home).join(rest);
5513    }
5514    root.join(path)
5515}
5516
5517fn read_default_global_excludes_file(patterns: &mut Vec<IgnorePattern>) {
5518    if let Some(config_home) = std::env::var_os("XDG_CONFIG_HOME")
5519        && !config_home.is_empty()
5520    {
5521        let path = PathBuf::from(config_home).join("git").join("ignore");
5522        let source = path.to_string_lossy().into_owned();
5523        read_ignore_patterns(path, patterns, &[], source.as_bytes());
5524        return;
5525    }
5526    if let Some(home) = std::env::var_os("HOME") {
5527        let path = PathBuf::from(home)
5528            .join(".config")
5529            .join("git")
5530            .join("ignore");
5531        let source = path.to_string_lossy().into_owned();
5532        read_ignore_patterns(path, patterns, &[], source.as_bytes());
5533    }
5534}
5535
5536fn collect_per_directory_patterns(
5537    root: &Path,
5538    dir: &Path,
5539    names: &[String],
5540    patterns: &mut Vec<IgnorePattern>,
5541) -> Result<()> {
5542    let mut entries = fs::read_dir(dir)?.collect::<std::result::Result<Vec<_>, _>>()?;
5543    entries.sort_by_key(|entry| entry.file_name());
5544    for entry in entries {
5545        let path = entry.path();
5546        if path.file_name().and_then(|name| name.to_str()) == Some(".git") {
5547            continue;
5548        }
5549        let metadata = entry.metadata()?;
5550        if metadata.is_dir() {
5551            collect_per_directory_patterns(root, &path, names, patterns)?;
5552            continue;
5553        }
5554        if !metadata.is_file() {
5555            continue;
5556        }
5557        let Some(file_name) = path.file_name().and_then(|name| name.to_str()) else {
5558            continue;
5559        };
5560        if !names.iter().any(|name| name == file_name) {
5561            continue;
5562        }
5563        let parent = path.parent().unwrap_or(root);
5564        let relative = parent.strip_prefix(root).map_err(|_| {
5565            GitError::InvalidPath(format!("path {} is outside worktree", parent.display()))
5566        })?;
5567        let base = git_path_bytes(relative)?;
5568        let mut source = base.clone();
5569        if !source.is_empty() {
5570            source.push(b'/');
5571        }
5572        source.extend_from_slice(file_name.as_bytes());
5573        read_ignore_patterns(&path, patterns, &base, &source);
5574    }
5575    Ok(())
5576}
5577
5578fn read_ignore_patterns(
5579    path: impl AsRef<Path>,
5580    patterns: &mut Vec<IgnorePattern>,
5581    base: &[u8],
5582    source: &[u8],
5583) {
5584    let Ok(contents) = fs::read(path) else {
5585        return;
5586    };
5587    for (line, raw) in contents.split(|byte| *byte == b'\n').enumerate() {
5588        push_ignore_pattern(patterns, raw, base, source, line + 1);
5589    }
5590}
5591
5592fn push_ignore_pattern(
5593    patterns: &mut Vec<IgnorePattern>,
5594    raw: &[u8],
5595    base: &[u8],
5596    source: &[u8],
5597    line_number: usize,
5598) {
5599    let mut line = raw.strip_suffix(b"\r").unwrap_or(raw).to_vec();
5600    normalize_ignore_trailing_spaces(&mut line);
5601    let original = line.clone();
5602    let mut line = line.as_slice();
5603    if line.is_empty() || line.starts_with(b"#") {
5604        return;
5605    }
5606    let negated = if line.starts_with(b"\\#") || line.starts_with(b"\\!") {
5607        line = &line[1..];
5608        false
5609    } else if let Some(pattern) = line.strip_prefix(b"!") {
5610        line = pattern;
5611        true
5612    } else {
5613        false
5614    };
5615    let directory_only = line.ends_with(b"/");
5616    let pattern = if directory_only {
5617        line.strip_suffix(b"/").unwrap_or(line)
5618    } else {
5619        line
5620    };
5621    let (anchored, pattern) = if let Some(pattern) = pattern.strip_prefix(b"/") {
5622        (true, pattern)
5623    } else {
5624        (false, pattern)
5625    };
5626    // A leading `**/` followed by a slash-free segment is, per gitignore,
5627    // identical to the bare segment ("match in all directories"): `**/Pods` ≡
5628    // `Pods`, `**/*.jks` ≡ `*.jks`. Collapse it so the pattern matches the
5629    // basename directly (a literal/suffix compare) instead of paying for the
5630    // `**` wildcard engine on the full path — verified against `git check-ignore`.
5631    let pattern = match pattern.strip_prefix(b"**/") {
5632        Some(rest) if !rest.is_empty() && !rest.contains(&b'/') => rest,
5633        _ => pattern,
5634    };
5635    if pattern.is_empty() {
5636        return;
5637    }
5638    patterns.push(IgnorePattern {
5639        base: base.to_vec(),
5640        pattern: pattern.to_vec(),
5641        original,
5642        source: source.to_vec(),
5643        line_number,
5644        negated,
5645        directory_only,
5646        anchored,
5647        has_slash: pattern.contains(&b'/'),
5648        match_kind: classify_ignore_pattern(pattern),
5649    });
5650}
5651
5652fn normalize_ignore_trailing_spaces(line: &mut Vec<u8>) {
5653    while line.last() == Some(&b' ') {
5654        let space_index = line.len() - 1;
5655        let backslashes = line[..space_index]
5656            .iter()
5657            .rev()
5658            .take_while(|byte| **byte == b'\\')
5659            .count();
5660        if backslashes % 2 == 1 {
5661            line.remove(space_index - 1);
5662            break;
5663        }
5664        line.pop();
5665    }
5666}
5667
5668impl IgnorePattern {
5669    fn to_match(&self) -> IgnoreMatch {
5670        IgnoreMatch {
5671            source: self.source.clone(),
5672            line_number: self.line_number,
5673            pattern: self.original.clone(),
5674            ignored: !self.negated,
5675        }
5676    }
5677
5678    fn matches(&self, path: &[u8], is_dir: bool) -> bool {
5679        let basename = path.rsplit(|byte| *byte == b'/').next().unwrap_or(path);
5680        self.matches_with_basename(path, basename, is_dir)
5681    }
5682
5683    fn matches_with_basename(&self, path: &[u8], basename: &[u8], is_dir: bool) -> bool {
5684        let path = if self.base.is_empty() {
5685            path
5686        } else {
5687            let Some(rest) = path
5688                .strip_prefix(self.base.as_slice())
5689                .and_then(|rest| rest.strip_prefix(b"/"))
5690            else {
5691                return false;
5692            };
5693            rest
5694        };
5695        if self.directory_only {
5696            return self.matches_directory(path, is_dir);
5697        }
5698        if self.anchored || self.has_slash {
5699            return self.match_segment(path);
5700        }
5701        self.match_segment(basename)
5702    }
5703
5704    fn matches_directory(&self, path: &[u8], is_dir: bool) -> bool {
5705        if self.anchored || self.has_slash {
5706            if is_dir && self.match_path(path) {
5707                return true;
5708            }
5709            // For a *file* path, a directory-only pattern can only apply
5710            // through an *ancestor* directory of the file: the leaf is matched
5711            // only because it lives inside a directory the pattern excludes
5712            // (e.g. `/tmp-*/` excludes `tmp-info-only`, so `tmp-info-only/x`
5713            // is excluded too). Upstream git models this through directory
5714            // traversal — `last_matching_pattern` skips a MUSTBEDIR pattern for
5715            // a non-directory leaf (`dtype != DT_DIR`), and a file is excluded
5716            // only when one of its parent directories is excluded.
5717            //
5718            // A *negated* directory-only pattern (`!data/**/`) re-includes a
5719            // directory but, per git, does NOT re-include the files inside it
5720            // (git's docs: "it is not possible to re-include a file if a parent
5721            // directory of that file is excluded" — re-including the dir with
5722            // `!dir/` still requires an explicit `!dir/*` to reach its files).
5723            // So a negated directory-only pattern must never match a file via
5724            // its ancestor, otherwise it wrongly wins the leaf scan and
5725            // un-ignores a file that an earlier positive pattern ignored
5726            // (t0008-ignores "directories and ** matches": `data/**` +
5727            // `!data/**/` must leave `data/data1/file1` ignored).
5728            if self.negated {
5729                return false;
5730            }
5731            return path
5732                .iter()
5733                .enumerate()
5734                .any(|(idx, byte)| *byte == b'/' && self.match_path(&path[..idx]));
5735        }
5736        let mut components = path.split(|byte| *byte == b'/').peekable();
5737        while let Some(component) = components.next() {
5738            if self.match_segment(component) && (is_dir || components.peek().is_some()) {
5739                return true;
5740            }
5741        }
5742        false
5743    }
5744
5745    fn match_path(&self, value: &[u8]) -> bool {
5746        match self.match_kind {
5747            MatchKind::Literal => self.pattern == value,
5748            MatchKind::Suffix => !value.contains(&b'/') && value.ends_with(&self.pattern[1..]),
5749            MatchKind::Prefix => {
5750                !value.contains(&b'/') && value.starts_with(&self.pattern[..self.pattern.len() - 1])
5751            }
5752            MatchKind::Glob => wildcard_path_matches(&self.pattern, value),
5753        }
5754    }
5755
5756    /// Match a slash-free `value` (a basename or path component) against this
5757    /// pattern. Literal and simple `*X`/`X*` patterns resolve with a direct
5758    /// comparison; only complex globs pay for the allocating wildcard engine.
5759    fn match_segment(&self, value: &[u8]) -> bool {
5760        self.match_path(value)
5761    }
5762}
5763
5764thread_local! {
5765    /// Reused dynamic-programming scratch for [`wildcard_path_matches`]. Flat
5766    /// `(pattern.len()+1) * (value.len()+1)` grid of memoised results, kept across
5767    /// calls so the hot ignore/attribute matching loop never reallocates.
5768    static WILDCARD_MEMO: RefCell<Vec<Option<bool>>> = const { RefCell::new(Vec::new()) };
5769}
5770
5771fn wildcard_path_matches(pattern: &[u8], value: &[u8]) -> bool {
5772    let stride = value.len() + 1;
5773    let cells = (pattern.len() + 1) * stride;
5774    WILDCARD_MEMO.with_borrow_mut(|memo| {
5775        // One reused allocation; clearing then resizing fills the grid with `None`.
5776        memo.clear();
5777        memo.resize(cells, None);
5778        wildcard_path_matches_from(pattern, value, 0, 0, memo, stride)
5779    })
5780}
5781
5782fn wildcard_path_matches_from(
5783    pattern: &[u8],
5784    value: &[u8],
5785    pattern_index: usize,
5786    value_index: usize,
5787    memo: &mut [Option<bool>],
5788    stride: usize,
5789) -> bool {
5790    let cell = pattern_index * stride + value_index;
5791    if let Some(cached) = memo[cell] {
5792        return cached;
5793    }
5794    let matched = if pattern_index == pattern.len() {
5795        value_index == value.len()
5796    } else {
5797        match pattern[pattern_index] {
5798            b'*' if pattern.get(pattern_index + 1) == Some(&b'*') => wildcard_double_star_matches(
5799                pattern,
5800                value,
5801                pattern_index,
5802                value_index,
5803                memo,
5804                stride,
5805            ),
5806            b'*' => {
5807                if wildcard_path_matches_from(
5808                    pattern,
5809                    value,
5810                    pattern_index + 1,
5811                    value_index,
5812                    memo,
5813                    stride,
5814                ) {
5815                    true
5816                } else {
5817                    let mut next = value_index;
5818                    while next < value.len() && value[next] != b'/' {
5819                        next += 1;
5820                        if wildcard_path_matches_from(
5821                            pattern,
5822                            value,
5823                            pattern_index + 1,
5824                            next,
5825                            memo,
5826                            stride,
5827                        ) {
5828                            return true;
5829                        }
5830                    }
5831                    false
5832                }
5833            }
5834            b'?' => {
5835                value_index < value.len()
5836                    && value[value_index] != b'/'
5837                    && wildcard_path_matches_from(
5838                        pattern,
5839                        value,
5840                        pattern_index + 1,
5841                        value_index + 1,
5842                        memo,
5843                        stride,
5844                    )
5845            }
5846            b'[' => {
5847                if value_index < value.len() && value[value_index] != b'/' {
5848                    if let Some((class_matches, next_pattern_index)) =
5849                        wildcard_class_matches(pattern, pattern_index, value[value_index])
5850                    {
5851                        class_matches
5852                            && wildcard_path_matches_from(
5853                                pattern,
5854                                value,
5855                                next_pattern_index,
5856                                value_index + 1,
5857                                memo,
5858                                stride,
5859                            )
5860                    } else {
5861                        value[value_index] == b'['
5862                            && wildcard_path_matches_from(
5863                                pattern,
5864                                value,
5865                                pattern_index + 1,
5866                                value_index + 1,
5867                                memo,
5868                                stride,
5869                            )
5870                    }
5871                } else {
5872                    false
5873                }
5874            }
5875            b'\\' if pattern_index + 1 < pattern.len() => {
5876                value_index < value.len()
5877                    && pattern[pattern_index + 1] == value[value_index]
5878                    && wildcard_path_matches_from(
5879                        pattern,
5880                        value,
5881                        pattern_index + 2,
5882                        value_index + 1,
5883                        memo,
5884                        stride,
5885                    )
5886            }
5887            literal => {
5888                value_index < value.len()
5889                    && literal == value[value_index]
5890                    && wildcard_path_matches_from(
5891                        pattern,
5892                        value,
5893                        pattern_index + 1,
5894                        value_index + 1,
5895                        memo,
5896                        stride,
5897                    )
5898            }
5899        }
5900    };
5901    memo[cell] = Some(matched);
5902    matched
5903}
5904
5905fn wildcard_double_star_matches(
5906    pattern: &[u8],
5907    value: &[u8],
5908    pattern_index: usize,
5909    value_index: usize,
5910    memo: &mut [Option<bool>],
5911    stride: usize,
5912) -> bool {
5913    let after_stars = pattern_index + 2;
5914    if pattern.get(after_stars) == Some(&b'/') {
5915        if wildcard_path_matches_from(pattern, value, after_stars + 1, value_index, memo, stride) {
5916            return true;
5917        }
5918        for next in value_index..value.len() {
5919            if value[next] == b'/'
5920                && wildcard_path_matches_from(
5921                    pattern,
5922                    value,
5923                    after_stars + 1,
5924                    next + 1,
5925                    memo,
5926                    stride,
5927                )
5928            {
5929                return true;
5930            }
5931        }
5932        return false;
5933    }
5934    for next in value_index..=value.len() {
5935        if wildcard_path_matches_from(pattern, value, after_stars, next, memo, stride) {
5936            return true;
5937        }
5938    }
5939    false
5940}
5941
5942fn wildcard_class_matches(pattern: &[u8], start: usize, value: u8) -> Option<(bool, usize)> {
5943    let mut index = start + 1;
5944    let negated = matches!(pattern.get(index), Some(b'!' | b'^'));
5945    if negated {
5946        index += 1;
5947    }
5948    let class_start = index;
5949    let end = pattern[class_start..]
5950        .iter()
5951        .position(|byte| *byte == b']')
5952        .map(|position| class_start + position)?;
5953    if end == class_start {
5954        return None;
5955    }
5956    let mut matched = false;
5957    while index < end {
5958        if index + 2 < end && pattern[index + 1] == b'-' {
5959            let lower = pattern[index].min(pattern[index + 2]);
5960            let upper = pattern[index].max(pattern[index + 2]);
5961            matched |= lower <= value && value <= upper;
5962            index += 3;
5963        } else {
5964            matched |= pattern[index] == value;
5965            index += 1;
5966        }
5967    }
5968    Some((if negated { !matched } else { matched }, end + 1))
5969}
5970
5971#[derive(Debug, Default)]
5972struct AttributeMatcher {
5973    patterns: Vec<AttributePattern>,
5974    attribute_order: BTreeMap<Vec<u8>, usize>,
5975    macros: BTreeMap<Vec<u8>, Vec<AttributeAssignment>>,
5976}
5977
5978#[derive(Debug)]
5979struct AttributePattern {
5980    base: Vec<u8>,
5981    pattern: Vec<u8>,
5982    anchored: bool,
5983    has_slash: bool,
5984    assignments: Vec<AttributeAssignment>,
5985}
5986
5987#[derive(Debug, Clone, PartialEq, Eq)]
5988struct AttributeAssignment {
5989    attribute: Vec<u8>,
5990    state: Option<AttributeState>,
5991}
5992
5993impl AttributeMatcher {
5994    fn from_worktree_root(root: &Path) -> Result<Self> {
5995        let mut matcher = Self::default();
5996        if !matcher.read_configured_attributes(root) {
5997            matcher.read_default_global_attributes();
5998        }
5999        collect_attribute_patterns(root, root, &mut matcher)?;
6000        read_attribute_patterns(
6001            root.join(".git").join("info").join("attributes"),
6002            &mut matcher,
6003            &[],
6004            b".git/info/attributes",
6005        );
6006        Ok(matcher)
6007    }
6008
6009    /// Builds only the repository-wide attribute sources — `core.attributesFile`
6010    /// (or the default global) and `$GIT_DIR/info/attributes` — *without* walking
6011    /// the worktree for `.gitattributes`. The caller is expected to fold each
6012    /// directory's `.gitattributes` into the matcher as it descends (see
6013    /// [`read_dir_attribute_patterns`]), so status/diff read the tree exactly once
6014    /// instead of doing a separate full-tree attribute pass. Lower-priority sources
6015    /// are added first, so in-tree patterns added during the walk take precedence —
6016    /// matching git's lookup order.
6017    fn from_worktree_base(root: &Path) -> Self {
6018        let mut matcher = Self::default();
6019        if !matcher.read_configured_attributes(root) {
6020            matcher.read_default_global_attributes();
6021        }
6022        read_attribute_patterns(
6023            root.join(".git").join("info").join("attributes"),
6024            &mut matcher,
6025            &[],
6026            b".git/info/attributes",
6027        );
6028        matcher
6029    }
6030
6031    fn attributes_for_path(
6032        &self,
6033        path: &[u8],
6034        requested: &[Vec<u8>],
6035        all: bool,
6036    ) -> Vec<AttributeCheck> {
6037        let mut states = BTreeMap::<Vec<u8>, Option<AttributeState>>::new();
6038        for pattern in &self.patterns {
6039            if !pattern.matches(path) {
6040                continue;
6041            }
6042            for assignment in &pattern.assignments {
6043                states.insert(assignment.attribute.clone(), assignment.state.clone());
6044            }
6045        }
6046        if all {
6047            let mut checks = states
6048                .into_iter()
6049                .filter_map(|(attribute, state)| {
6050                    state.map(|state| AttributeCheck {
6051                        attribute,
6052                        state: Some(state),
6053                    })
6054                })
6055                .collect::<Vec<_>>();
6056            checks.sort_by(|left, right| {
6057                attribute_all_rank(&left.attribute, &self.attribute_order)
6058                    .cmp(&attribute_all_rank(&right.attribute, &self.attribute_order))
6059                    .then_with(|| left.attribute.cmp(&right.attribute))
6060            });
6061            return checks;
6062        }
6063        requested
6064            .iter()
6065            .map(|attribute| AttributeCheck {
6066                attribute: attribute.clone(),
6067                state: states.get(attribute).cloned().flatten(),
6068            })
6069            .collect()
6070    }
6071
6072    fn push_attribute_order(&mut self, attribute: &[u8]) {
6073        let next = self.attribute_order.len();
6074        self.attribute_order
6075            .entry(attribute.to_vec())
6076            .or_insert(next);
6077    }
6078
6079    fn read_configured_attributes(&mut self, root: &Path) -> bool {
6080        let Ok(config) = sley_config::read_repo_config(&root.join(".git"), None) else {
6081            return false;
6082        };
6083        let Some(value) = config.get("core", None, "attributesFile") else {
6084            return false;
6085        };
6086        let path = expand_core_excludes_file(root, value);
6087        read_attribute_patterns(path, self, &[], value.as_bytes());
6088        true
6089    }
6090
6091    fn read_default_global_attributes(&mut self) {
6092        if let Some(config_home) = std::env::var_os("XDG_CONFIG_HOME")
6093            && !config_home.is_empty()
6094        {
6095            let path = PathBuf::from(config_home).join("git").join("attributes");
6096            let source = path.to_string_lossy().into_owned();
6097            read_attribute_patterns(path, self, &[], source.as_bytes());
6098            return;
6099        }
6100        if let Some(home) = std::env::var_os("HOME") {
6101            let path = PathBuf::from(home)
6102                .join(".config")
6103                .join("git")
6104                .join("attributes");
6105            let source = path.to_string_lossy().into_owned();
6106            read_attribute_patterns(path, self, &[], source.as_bytes());
6107        }
6108    }
6109}
6110
6111fn read_dir_ignore_patterns_for_base(
6112    dir: &Path,
6113    base: &[u8],
6114    matcher: &mut IgnoreMatcher,
6115) -> Result<()> {
6116    let mut source = base.to_vec();
6117    if !source.is_empty() {
6118        source.push(b'/');
6119    }
6120    source.extend_from_slice(b".gitignore");
6121    read_ignore_patterns(dir.join(".gitignore"), &mut matcher.patterns, base, &source);
6122    Ok(())
6123}
6124
6125/// Fold `dir`'s `.gitattributes` (if any) into `matcher`, scoped to `dir`'s path
6126/// within `root`. Used both by the eager full-tree pass and by the status/diff
6127/// worktree walk as it descends, so the tree is read for attributes exactly once.
6128fn read_dir_attribute_patterns(
6129    root: &Path,
6130    dir: &Path,
6131    matcher: &mut AttributeMatcher,
6132) -> Result<()> {
6133    let relative = dir.strip_prefix(root).map_err(|_| {
6134        GitError::InvalidPath(format!("path {} is outside worktree", dir.display()))
6135    })?;
6136    let base = git_path_bytes(relative)?;
6137    read_dir_attribute_patterns_for_base(dir, &base, matcher)
6138}
6139
6140fn read_dir_attribute_patterns_for_base(
6141    dir: &Path,
6142    base: &[u8],
6143    matcher: &mut AttributeMatcher,
6144) -> Result<()> {
6145    let mut source = base.to_vec();
6146    if !source.is_empty() {
6147        source.push(b'/');
6148    }
6149    source.extend_from_slice(b".gitattributes");
6150    read_attribute_patterns(dir.join(".gitattributes"), matcher, base, &source);
6151    Ok(())
6152}
6153
6154fn collect_attribute_patterns(
6155    root: &Path,
6156    dir: &Path,
6157    matcher: &mut AttributeMatcher,
6158) -> Result<()> {
6159    read_dir_attribute_patterns(root, dir, matcher)?;
6160
6161    let mut entries = fs::read_dir(dir)?.collect::<std::result::Result<Vec<_>, _>>()?;
6162    entries.sort_by_key(|entry| entry.file_name());
6163    for entry in entries {
6164        let path = entry.path();
6165        if path.file_name().and_then(|name| name.to_str()) == Some(".git") {
6166            continue;
6167        }
6168        if entry.metadata()?.is_dir() {
6169            collect_attribute_patterns(root, &path, matcher)?;
6170        }
6171    }
6172    Ok(())
6173}
6174
6175fn read_attribute_patterns(
6176    path: impl AsRef<Path>,
6177    matcher: &mut AttributeMatcher,
6178    base: &[u8],
6179    _source: &[u8],
6180) {
6181    let Ok(contents) = fs::read(path) else {
6182        return;
6183    };
6184    read_attribute_patterns_from_bytes(&contents, matcher, base);
6185}
6186
6187fn read_attribute_patterns_from_bytes(
6188    contents: &[u8],
6189    matcher: &mut AttributeMatcher,
6190    base: &[u8],
6191) {
6192    for raw in contents.split(|byte| *byte == b'\n') {
6193        push_attribute_pattern(matcher, raw, base);
6194    }
6195}
6196
6197fn collect_attribute_patterns_from_tree(
6198    db: &FileObjectDatabase,
6199    format: ObjectFormat,
6200    tree_oid: &ObjectId,
6201    base: Vec<u8>,
6202    matcher: &mut AttributeMatcher,
6203) -> Result<()> {
6204    let object = read_expected_object(db, tree_oid, ObjectType::Tree)?;
6205    let mut entries = Tree::parse(format, &object.body)?.entries;
6206    entries.sort_by(|left, right| left.name.cmp(&right.name));
6207    for entry in &entries {
6208        if entry.name == b".gitattributes" && tree_entry_object_type(entry.mode) == ObjectType::Blob
6209        {
6210            let object = db.read_object(&entry.oid).map_err(|err| {
6211                expect_missing_object_kind(err, entry.oid, MissingObjectKind::Blob)
6212            })?;
6213            if object.object_type == ObjectType::Blob {
6214                read_attribute_patterns_from_bytes(&object.body, matcher, &base);
6215            }
6216        }
6217    }
6218    for entry in entries {
6219        if tree_entry_object_type(entry.mode) != ObjectType::Tree {
6220            continue;
6221        }
6222        let mut child_base = base.clone();
6223        if !child_base.is_empty() {
6224            child_base.push(b'/');
6225        }
6226        child_base.extend_from_slice(entry.name.as_bytes());
6227        collect_attribute_patterns_from_tree(db, format, &entry.oid, child_base, matcher)?;
6228    }
6229    Ok(())
6230}
6231
6232fn collect_attribute_patterns_from_index(
6233    git_dir: &Path,
6234    format: ObjectFormat,
6235    db: &FileObjectDatabase,
6236    matcher: &mut AttributeMatcher,
6237) -> Result<()> {
6238    let index_path = repository_index_path(git_dir);
6239    if !index_path.exists() {
6240        return Ok(());
6241    }
6242    let mut entries = Index::parse(&fs::read(index_path)?, format)?.entries;
6243    entries.sort_by(|left, right| left.path.cmp(&right.path));
6244    for entry in entries {
6245        let is_attributes_file =
6246            entry.path == b".gitattributes" || entry.path.as_bytes().ends_with(b"/.gitattributes");
6247        if index_entry_stage(&entry) != 0
6248            || tree_entry_object_type(entry.mode) != ObjectType::Blob
6249            || !is_attributes_file
6250        {
6251            continue;
6252        }
6253        let base = match entry.path.as_bytes().strip_suffix(b".gitattributes") {
6254            Some(b"") => Vec::new(),
6255            Some(parent) => parent.strip_suffix(b"/").unwrap_or(parent).to_vec(),
6256            None => continue,
6257        };
6258        let object = db
6259            .read_object(&entry.oid)
6260            .map_err(|err| expect_missing_object_kind(err, entry.oid, MissingObjectKind::Blob))?;
6261        if object.object_type == ObjectType::Blob {
6262            read_attribute_patterns_from_bytes(&object.body, matcher, &base);
6263        }
6264    }
6265    Ok(())
6266}
6267
6268fn push_attribute_pattern(matcher: &mut AttributeMatcher, raw: &[u8], base: &[u8]) {
6269    let line = raw.strip_suffix(b"\r").unwrap_or(raw);
6270    let line = trim_ascii_whitespace(line);
6271    if line.is_empty() || line.starts_with(b"#") {
6272        return;
6273    }
6274    let mut fields = line
6275        .split(|byte| byte.is_ascii_whitespace())
6276        .filter(|field| !field.is_empty());
6277    let Some(raw_pattern) = fields.next() else {
6278        return;
6279    };
6280    if let Some(macro_name) = raw_pattern.strip_prefix(b"[attr]") {
6281        if macro_name.is_empty() {
6282            return;
6283        }
6284        let mut assignments = vec![AttributeAssignment {
6285            attribute: macro_name.to_vec(),
6286            state: Some(AttributeState::Set),
6287        }];
6288        for field in fields {
6289            push_attribute_assignments(&mut assignments, field, &matcher.macros);
6290        }
6291        for assignment in &assignments {
6292            matcher.push_attribute_order(&assignment.attribute);
6293        }
6294        matcher.macros.insert(macro_name.to_vec(), assignments);
6295        return;
6296    }
6297    let mut assignments = Vec::new();
6298    for field in fields {
6299        push_attribute_assignments(&mut assignments, field, &matcher.macros);
6300    }
6301    if assignments.is_empty() {
6302        return;
6303    }
6304    for assignment in &assignments {
6305        matcher.push_attribute_order(&assignment.attribute);
6306    }
6307    let (anchored, pattern) = if let Some(pattern) = raw_pattern.strip_prefix(b"/") {
6308        (true, pattern)
6309    } else {
6310        (false, raw_pattern)
6311    };
6312    if pattern.is_empty() {
6313        return;
6314    }
6315    matcher.patterns.push(AttributePattern {
6316        base: base.to_vec(),
6317        pattern: pattern.to_vec(),
6318        anchored,
6319        has_slash: pattern.contains(&b'/'),
6320        assignments,
6321    });
6322}
6323
6324fn push_attribute_assignments(
6325    assignments: &mut Vec<AttributeAssignment>,
6326    field: &[u8],
6327    macros: &BTreeMap<Vec<u8>, Vec<AttributeAssignment>>,
6328) {
6329    if let Some(macro_assignments) = macros.get(field) {
6330        assignments.extend(macro_assignments.iter().cloned());
6331        return;
6332    }
6333    if field == b"binary" {
6334        assignments.push(AttributeAssignment {
6335            attribute: b"binary".to_vec(),
6336            state: Some(AttributeState::Set),
6337        });
6338        assignments.push(AttributeAssignment {
6339            attribute: b"diff".to_vec(),
6340            state: Some(AttributeState::Unset),
6341        });
6342        assignments.push(AttributeAssignment {
6343            attribute: b"merge".to_vec(),
6344            state: Some(AttributeState::Unset),
6345        });
6346        assignments.push(AttributeAssignment {
6347            attribute: b"text".to_vec(),
6348            state: Some(AttributeState::Unset),
6349        });
6350        return;
6351    }
6352    if let Some(attribute) = field.strip_prefix(b"-") {
6353        if !attribute.is_empty() {
6354            assignments.push(AttributeAssignment {
6355                attribute: attribute.to_vec(),
6356                state: Some(AttributeState::Unset),
6357            });
6358        }
6359        return;
6360    }
6361    if let Some(attribute) = field.strip_prefix(b"!") {
6362        if !attribute.is_empty() {
6363            assignments.push(AttributeAssignment {
6364                attribute: attribute.to_vec(),
6365                state: None,
6366            });
6367        }
6368        return;
6369    }
6370    if let Some(equal) = field.iter().position(|byte| *byte == b'=') {
6371        let attribute = &field[..equal];
6372        let value = &field[equal + 1..];
6373        if !attribute.is_empty() {
6374            assignments.push(AttributeAssignment {
6375                attribute: attribute.to_vec(),
6376                state: Some(AttributeState::Value(value.to_vec())),
6377            });
6378        }
6379        return;
6380    }
6381    assignments.push(AttributeAssignment {
6382        attribute: field.to_vec(),
6383        state: Some(AttributeState::Set),
6384    });
6385}
6386
6387fn attribute_all_rank(
6388    attribute: &[u8],
6389    order: &BTreeMap<Vec<u8>, usize>,
6390) -> (usize, usize, Vec<u8>) {
6391    let rank = match attribute {
6392        b"binary" => 0,
6393        b"diff" => 1,
6394        b"merge" => 2,
6395        b"text" => 3,
6396        b"eol" => 5,
6397        _ => 4,
6398    };
6399    let order = order.get(attribute).copied().unwrap_or(usize::MAX);
6400    (rank, order, attribute.to_vec())
6401}
6402
6403fn trim_ascii_whitespace(mut value: &[u8]) -> &[u8] {
6404    while value.first().is_some_and(u8::is_ascii_whitespace) {
6405        value = &value[1..];
6406    }
6407    while value.last().is_some_and(u8::is_ascii_whitespace) {
6408        value = &value[..value.len() - 1];
6409    }
6410    value
6411}
6412
6413impl AttributePattern {
6414    fn matches(&self, path: &[u8]) -> bool {
6415        let path = if self.base.is_empty() {
6416            path
6417        } else {
6418            let Some(rest) = path
6419                .strip_prefix(self.base.as_slice())
6420                .and_then(|rest| rest.strip_prefix(b"/"))
6421            else {
6422                return false;
6423            };
6424            rest
6425        };
6426        if self.anchored || self.has_slash {
6427            return wildcard_path_matches(&self.pattern, path);
6428        }
6429        path.rsplit(|byte| *byte == b'/')
6430            .next()
6431            .is_some_and(|basename| wildcard_path_matches(&self.pattern, basename))
6432    }
6433}
6434
6435// ---------------------------------------------------------------------------
6436// Content filtering on the blob <-> worktree boundary
6437//
6438// Git runs two kinds of conversion when content crosses between the worktree
6439// and the object database:
6440//
6441//   * the line-ending / `core.autocrlf` conversion (driven by the `text`,
6442//     `eol` attributes and the `core.autocrlf` / `core.eol` config), and
6443//   * the long-running `filter.<name>.clean` / `.smudge` driver filters
6444//     (selected by the `filter=<name>` attribute and configured commands).
6445//
6446// "clean" runs on the way *into* the object store (worktree -> blob), e.g. on
6447// `git add` / `git hash-object -w`. "smudge" runs on the way *out* (blob ->
6448// worktree), e.g. on checkout / restore. The driver filter, when present,
6449// wraps the EOL conversion: on clean git first runs the configured `clean`
6450// command and then applies CRLF->LF normalization; on smudge git first applies
6451// LF->CRLF and then runs the `smudge` command.
6452// ---------------------------------------------------------------------------
6453
6454/// The line-ending conversion that applies to a path, derived from its
6455/// attributes and the repository config.
6456#[derive(Debug, Clone, Copy, PartialEq, Eq)]
6457enum EolConversion {
6458    /// No conversion: binary content, or text with `core.autocrlf=false` and no
6459    /// `eol`/`text=auto` request to add carriage returns.
6460    None,
6461    /// Normalize to LF on clean; no carriage returns on smudge (`eol=lf`, or
6462    /// `core.autocrlf=input`).
6463    Lf,
6464    /// Normalize to LF on clean; emit CRLF on smudge (`eol=crlf`, or
6465    /// `core.autocrlf=true`).
6466    Crlf,
6467}
6468
6469/// How git should decide whether a path is text for the purpose of EOL
6470/// conversion.
6471#[derive(Debug, Clone, Copy, PartialEq, Eq)]
6472enum TextDecision {
6473    /// `-text` / `binary`: never convert.
6474    Binary,
6475    /// `text` is set explicitly: always treat as text.
6476    Text,
6477    /// `text=auto` (or implied by `core.autocrlf`): treat as text unless the
6478    /// content looks binary.
6479    Auto,
6480    /// No opinion from attributes or config: leave content untouched.
6481    Unspecified,
6482}
6483
6484/// The fully resolved set of conversions that apply to a single path.
6485#[derive(Debug, Clone, PartialEq, Eq)]
6486struct ContentFilterPlan {
6487    text: TextDecision,
6488    /// The conversion to apply when `text` resolves to "this is text".
6489    eol: EolConversion,
6490    /// `filter.<name>` driver, if assigned via attributes and configured.
6491    driver: Option<FilterDriver>,
6492}
6493
6494#[derive(Debug, Clone, PartialEq, Eq)]
6495struct FilterDriver {
6496    name: Vec<u8>,
6497    clean: Option<String>,
6498    smudge: Option<String>,
6499    required: bool,
6500}
6501
6502/// Decode one crlf-family attribute (`text` or its legacy alias `crlf`) into a
6503/// text decision, plus whether the value form forced an EOL direction.
6504///
6505/// Mirrors git's `git_path_check_crlf` (convert.c): a *set* attribute is text,
6506/// an *unset* one is binary, `=auto` is auto, `=input` forces LF while still
6507/// counting as text, and any other value is "undefined" — i.e. no opinion, so
6508/// the caller falls through to the next source (the `crlf` alias, then config).
6509fn decode_crlf_family_attribute(state: Option<&AttributeState>) -> (TextDecision, EolConversion) {
6510    match state {
6511        Some(AttributeState::Set) => (TextDecision::Text, EolConversion::None),
6512        Some(AttributeState::Unset) => (TextDecision::Binary, EolConversion::None),
6513        Some(AttributeState::Value(value)) if value == b"auto" => {
6514            (TextDecision::Auto, EolConversion::None)
6515        }
6516        // `crlf=input` / `text=input`: text content normalized to LF (no CR on
6517        // smudge), exactly like `core.autocrlf=input`.
6518        Some(AttributeState::Value(value)) if value == b"input" => {
6519            (TextDecision::Text, EolConversion::Lf)
6520        }
6521        // `=<other>` is CRLF_UNDEFINED in git for the `crlf` alias: no opinion.
6522        _ => (TextDecision::Unspecified, EolConversion::None),
6523    }
6524}
6525
6526impl ContentFilterPlan {
6527    /// Build the plan for `path` from the parsed attributes and repo config.
6528    fn resolve(config: &GitConfig, checks: &[AttributeCheck]) -> Self {
6529        let text_attr = checks.iter().find(|check| check.attribute == b"text");
6530        let crlf_attr = checks.iter().find(|check| check.attribute == b"crlf");
6531        let eol_attr = checks.iter().find(|check| check.attribute == b"eol");
6532        let filter_attr = checks.iter().find(|check| check.attribute == b"filter");
6533
6534        // Resolve the eol attribute first; `eol=crlf|lf` also forces text.
6535        let eol_value = eol_attr.and_then(|check| match &check.state {
6536            Some(AttributeState::Value(value)) => Some(value.clone()),
6537            _ => None,
6538        });
6539
6540        // The `text` attribute decides first; only when it is unspecified does
6541        // git consult the legacy `crlf` alias (convert.c `convert_attrs`).
6542        let mut forced_eol = EolConversion::None;
6543        let mut text = match text_attr.map(|check| &check.state) {
6544            Some(Some(AttributeState::Set)) => TextDecision::Text,
6545            Some(Some(AttributeState::Unset)) => TextDecision::Binary,
6546            Some(Some(AttributeState::Value(value))) if value == b"auto" => TextDecision::Auto,
6547            Some(Some(AttributeState::Value(value))) if value == b"input" => {
6548                forced_eol = EolConversion::Lf;
6549                TextDecision::Text
6550            }
6551            // `text=<other>` is treated by git as a set text attribute.
6552            Some(Some(AttributeState::Value(_))) => TextDecision::Text,
6553            // `!text` (unspecified) or no text attribute: fall through to `crlf`.
6554            _ => {
6555                let (decision, eol) =
6556                    decode_crlf_family_attribute(crlf_attr.and_then(|check| check.state.as_ref()));
6557                forced_eol = eol;
6558                decision
6559            }
6560        };
6561
6562        // A concrete `eol` attribute implies the path is text even when `text`
6563        // was left unspecified (git: `eol` without `text` is treated as
6564        // `text=auto`-ish; upstream forces conversion). We honour eol only when
6565        // text is not explicitly binary.
6566        let eol = match (&text, eol_value.as_deref()) {
6567            (TextDecision::Binary, _) => EolConversion::None,
6568            (_, Some(b"crlf")) => {
6569                if text == TextDecision::Unspecified {
6570                    text = TextDecision::Text;
6571                }
6572                EolConversion::Crlf
6573            }
6574            (_, Some(b"lf")) => {
6575                if text == TextDecision::Unspecified {
6576                    text = TextDecision::Text;
6577                }
6578                EolConversion::Lf
6579            }
6580            // No explicit `eol` attribute, but `text=input`/`crlf=input` already
6581            // forced the LF direction (git's CRLF_TEXT_INPUT). Honour it over the
6582            // config-derived default.
6583            _ if forced_eol == EolConversion::Lf => EolConversion::Lf,
6584            // No eol attribute: derive direction from config.
6585            _ => eol_from_config(config),
6586        };
6587
6588        // When the path is text but neither `eol` nor `core.autocrlf`/`core.eol`
6589        // asked for carriage returns, we still normalize to LF on clean. That is
6590        // modelled by `EolConversion::Lf` (clean strips CR, smudge adds none).
6591        let eol = match (&text, eol) {
6592            (TextDecision::Text | TextDecision::Auto, EolConversion::None) => EolConversion::Lf,
6593            (_, eol) => eol,
6594        };
6595
6596        // If config does not enable autocrlf and there is no eol/text opinion,
6597        // there is genuinely nothing to do.
6598        let text = match (text, eol_attr.is_some()) {
6599            (TextDecision::Unspecified, _) => {
6600                // Without any text/eol attribute, only `core.autocrlf` can make a
6601                // path eligible, and then it behaves like `text=auto`.
6602                if autocrlf_enabled(config) {
6603                    TextDecision::Auto
6604                } else {
6605                    TextDecision::Unspecified
6606                }
6607            }
6608            (text, _) => text,
6609        };
6610
6611        let driver = resolve_filter_driver(config, filter_attr);
6612
6613        ContentFilterPlan { text, eol, driver }
6614    }
6615
6616    /// Whether EOL conversion should run for the given content.
6617    fn convert_eol(&self, content: &[u8]) -> bool {
6618        match self.text {
6619            TextDecision::Binary | TextDecision::Unspecified => false,
6620            TextDecision::Text => self.eol != EolConversion::None,
6621            // `text=auto`: only when the blob does not look binary.
6622            TextDecision::Auto => self.eol != EolConversion::None && !looks_binary(content),
6623        }
6624    }
6625
6626    /// The smudge-side LF->CRLF safety check, mirroring convert.c
6627    /// `will_convert_lf_to_crlf`. Returns false (no conversion) when:
6628    ///   * there is no naked LF to convert, or
6629    ///   * the action is `text=auto`-derived (the "new safer autocrlf") AND the
6630    ///     content already contains a lone CR or a CRLF pair, or looks binary.
6631    ///
6632    /// An explicit `text`/`eol=crlf` (non-auto) path always converts naked LFs.
6633    fn will_convert_lf_to_crlf(&self, content: &[u8]) -> bool {
6634        self.will_convert_lf_to_crlf_stats(&gather_convert_stats(content))
6635    }
6636
6637    /// Stats-based variant of [`will_convert_lf_to_crlf`], mirroring convert.c
6638    /// `will_convert_lf_to_crlf(struct text_stat *, ...)`. Used by the safecrlf
6639    /// round-trip simulation, which mutates a copy of the stats rather than
6640    /// re-scanning the buffer.
6641    fn will_convert_lf_to_crlf_stats(&self, stats: &ConvertStats) -> bool {
6642        // `output_eol(crlf_action) != EOL_CRLF` short-circuits in git.
6643        if self.eol != EolConversion::Crlf {
6644            return false;
6645        }
6646        // No naked LF? Nothing to convert.
6647        if stats.lonelf == 0 {
6648            return false;
6649        }
6650        if self.text == TextDecision::Auto {
6651            // Any CR or CRLF already present: leave it untouched (irreversible).
6652            if stats.lonecr > 0 || stats.crlf > 0 {
6653                return false;
6654            }
6655            if convert_is_binary(stats) {
6656                return false;
6657            }
6658        }
6659        true
6660    }
6661
6662    /// Whether this path is a candidate for the `core.safecrlf` round-trip check
6663    /// at all: git only warns for non-`CRLF_BINARY` actions. `Binary` and
6664    /// `Unspecified` (with autocrlf off) correspond to git's `CRLF_BINARY`.
6665    fn safecrlf_applies(&self) -> bool {
6666        matches!(self.text, TextDecision::Text | TextDecision::Auto)
6667    }
6668
6669    /// Emit git's `core.safecrlf` round-trip warning for `path`, mirroring the
6670    /// stderr side-effect of convert.c `crlf_to_git` (the `CONV_EOL_RNDTRP_*`
6671    /// branch). `old_stats` are the stats of the *pre-conversion* worktree
6672    /// content (already gathered by the caller so the buffer is scanned once);
6673    /// `index_has_crlf` is whether the path's current index blob already has a
6674    /// CRLF (git's `has_crlf_in_index`, used only for the auto-crlf decision).
6675    ///
6676    /// This never inspects or alters the bytes written to the object store; it is
6677    /// purely the additive warning git prints alongside `git add`/`commit`.
6678    /// Returns `Err` only under `core.safecrlf=true` when the round-trip is
6679    /// irreversible (git `die`s).
6680    fn check_safe_crlf_stats(
6681        &self,
6682        old_stats: &ConvertStats,
6683        index_has_crlf: bool,
6684        flags: ConvFlags,
6685        path: &[u8],
6686    ) -> Result<()> {
6687        if flags == ConvFlags::Off || !self.safecrlf_applies() {
6688            return Ok(());
6689        }
6690
6691        // Replicate `crlf_to_git`'s `convert_crlf_into_lf` decision (the clean
6692        // direction). It starts as "there is a CRLF to collapse"; auto paths
6693        // suppress conversion for binary content or content whose index blob
6694        // already carries a CRLF (the "new safer autocrlf").
6695        let mut convert_crlf_into_lf = old_stats.crlf > 0;
6696        if self.text == TextDecision::Auto {
6697            if convert_is_binary(old_stats) {
6698                // git returns 0 here: no conversion *and* no warning.
6699                return Ok(());
6700            }
6701            if index_has_crlf {
6702                convert_crlf_into_lf = false;
6703            }
6704        }
6705
6706        // Simulate the round-trip on a copy of the stats.
6707        let mut new_stats = old_stats.clone();
6708        // Simulate "git add" (clean: CRLF -> LF).
6709        if convert_crlf_into_lf {
6710            new_stats.lonelf += new_stats.crlf;
6711            new_stats.crlf = 0;
6712        }
6713        // Simulate "git checkout" (smudge: LF -> CRLF).
6714        if self.will_convert_lf_to_crlf_stats(&new_stats) {
6715            new_stats.crlf += new_stats.lonelf;
6716            new_stats.lonelf = 0;
6717        }
6718        check_safe_crlf(old_stats, &new_stats, flags, path)
6719    }
6720}
6721
6722/// Derive the smudge-direction line ending from `core.autocrlf` / `core.eol`.
6723fn eol_from_config(config: &GitConfig) -> EolConversion {
6724    if let Some(value) = config.get("core", None, "autocrlf") {
6725        match value.to_ascii_lowercase().as_str() {
6726            "input" => return EolConversion::Lf,
6727            "true" | "yes" | "on" | "1" => return EolConversion::Crlf,
6728            _ => {}
6729        }
6730    }
6731    if config.get_bool("core", None, "autocrlf") == Some(true) {
6732        return EolConversion::Crlf;
6733    }
6734    match config
6735        .get("core", None, "eol")
6736        .map(|v| v.to_ascii_lowercase())
6737    {
6738        Some(ref v) if v == "crlf" => EolConversion::Crlf,
6739        Some(ref v) if v == "lf" => EolConversion::Lf,
6740        _ => EolConversion::None,
6741    }
6742}
6743
6744/// Whether `core.autocrlf` is set to anything that enables conversion
6745/// (`true` or `input`).
6746fn autocrlf_enabled(config: &GitConfig) -> bool {
6747    if let Some(value) = config.get("core", None, "autocrlf")
6748        && value.eq_ignore_ascii_case("input")
6749    {
6750        return true;
6751    }
6752    config.get_bool("core", None, "autocrlf") == Some(true)
6753}
6754
6755/// Resolve the `filter=<name>` attribute against `filter.<name>.*` config.
6756fn resolve_filter_driver(
6757    config: &GitConfig,
6758    filter_attr: Option<&AttributeCheck>,
6759) -> Option<FilterDriver> {
6760    let name = match filter_attr.map(|check| &check.state) {
6761        Some(Some(AttributeState::Value(value))) => value.clone(),
6762        // `filter` set/unset without a value selects no driver.
6763        _ => return None,
6764    };
6765    let subsection = String::from_utf8_lossy(&name).into_owned();
6766    let clean = config
6767        .get("filter", Some(&subsection), "clean")
6768        .filter(|cmd| !cmd.is_empty())
6769        .map(str::to_owned);
6770    let smudge = config
6771        .get("filter", Some(&subsection), "smudge")
6772        .filter(|cmd| !cmd.is_empty())
6773        .map(str::to_owned);
6774    let required = config
6775        .get_bool("filter", Some(&subsection), "required")
6776        .unwrap_or(false);
6777    // A filter with neither command and not required is a no-op.
6778    if clean.is_none() && smudge.is_none() && !required {
6779        return None;
6780    }
6781    Some(FilterDriver {
6782        name,
6783        clean,
6784        smudge,
6785        required,
6786    })
6787}
6788
6789/// Heuristic mirroring git's `buffer_is_binary`: content is treated as binary
6790/// when a NUL byte appears within the first 8000 bytes.
6791fn looks_binary(content: &[u8]) -> bool {
6792    const FIRST_FEW_BYTES: usize = 8000;
6793    let window = &content[..content.len().min(FIRST_FEW_BYTES)];
6794    window.contains(&0)
6795}
6796
6797/// Strip carriage returns that immediately precede a line feed (CRLF -> LF).
6798/// A lone CR (old-Mac line ending) is left untouched, matching git, which only
6799/// collapses CRLF pairs.
6800fn convert_crlf_to_lf_cow(content: Cow<'_, [u8]>) -> Cow<'_, [u8]> {
6801    if !content.windows(2).any(|window| window == b"\r\n") {
6802        return content;
6803    }
6804    let mut out = Vec::with_capacity(content.len());
6805    let mut index = 0;
6806    while index < content.len() {
6807        let byte = content[index];
6808        if byte == b'\r' && content.get(index + 1) == Some(&b'\n') {
6809            // Drop the CR; the LF is emitted on the next iteration.
6810            index += 1;
6811            continue;
6812        }
6813        out.push(byte);
6814        index += 1;
6815    }
6816    Cow::Owned(out)
6817}
6818
6819/// Convert lone LF bytes to CRLF (LF -> CRLF). An LF already preceded by a CR
6820/// is left as-is so content is not double-converted, matching git.
6821fn convert_lf_to_crlf(content: &[u8]) -> Vec<u8> {
6822    let mut out = Vec::with_capacity(content.len() + content.len() / 16);
6823    let mut prev = 0u8;
6824    for &byte in content {
6825        if byte == b'\n' && prev != b'\r' {
6826            out.push(b'\r');
6827        }
6828        out.push(byte);
6829        prev = byte;
6830    }
6831    out
6832}
6833
6834/// Run a configured `clean`/`smudge` command as a subprocess, feeding `content`
6835/// on stdin and returning its stdout. Errors carry enough context for the
6836/// caller to decide whether the failure is fatal (required filter) or should be
6837/// silently ignored (optional filter passthrough).
6838fn run_filter_command(command: &str, path: &[u8], content: &[u8]) -> Result<Vec<u8>> {
6839    // Git expands `%f` in the filter command to the path of the file being
6840    // filtered (quoted). We perform the same substitution.
6841    let display_path = String::from_utf8_lossy(path);
6842    let expanded = command.replace("%f", &shell_quote(&display_path));
6843    // Run through the platform shell so pipelines / arguments in the configured
6844    // command behave the same way git's `run_command`-with-shell does.
6845    let (shell, flag) = if cfg!(windows) {
6846        ("cmd", "/C")
6847    } else {
6848        ("/bin/sh", "-c")
6849    };
6850    let mut child = Command::new(shell)
6851        .arg(flag)
6852        .arg(&expanded)
6853        .stdin(Stdio::piped())
6854        .stdout(Stdio::piped())
6855        .stderr(Stdio::piped())
6856        .spawn()
6857        .map_err(|err| GitError::Command(format!("failed to spawn filter `{command}`: {err}")))?;
6858    // Write the content to the child's stdin on a separate thread so we never
6859    // deadlock against a filter that streams output before consuming all input.
6860    let mut stdin = child
6861        .stdin
6862        .take()
6863        .ok_or_else(|| GitError::Command(format!("filter `{command}` stdin unavailable")))?;
6864    let payload = content.to_vec();
6865    let writer = std::thread::spawn(move || {
6866        let _ = stdin.write_all(&payload);
6867        // Dropping `stdin` here closes the pipe so the child sees EOF.
6868    });
6869    let output = child
6870        .wait_with_output()
6871        .map_err(|err| GitError::Command(format!("filter `{command}` failed: {err}")))?;
6872    // Join the writer; its own errors (e.g. broken pipe) are non-fatal because
6873    // the child's exit status is the authoritative signal.
6874    let _ = writer.join();
6875    if !output.status.success() {
6876        let stderr = String::from_utf8_lossy(&output.stderr);
6877        return Err(GitError::Command(format!(
6878            "filter `{command}` exited with {}: {}",
6879            output.status,
6880            stderr.trim()
6881        )));
6882    }
6883    Ok(output.stdout)
6884}
6885
6886/// Minimal POSIX single-quote escaping for substituting `%f` into a shell
6887/// command (used only for the path passed to driver filters).
6888fn shell_quote(value: &str) -> String {
6889    let mut out = String::with_capacity(value.len() + 2);
6890    out.push('\'');
6891    for ch in value.chars() {
6892        if ch == '\'' {
6893            out.push_str("'\\''");
6894        } else {
6895            out.push(ch);
6896        }
6897    }
6898    out.push('\'');
6899    out
6900}
6901
6902/// Apply the *clean* conversion to `content` for `path` (worktree -> blob):
6903/// first the configured `filter.<name>.clean` driver (if any), then CRLF->LF
6904/// normalization when EOL conversion applies.
6905///
6906/// `config` is the repository config (`GitConfig`) and `path` is the
6907/// repository-relative path of the file (forward-slash separated, e.g.
6908/// `src/main.rs`). When no filter or EOL conversion applies the input is
6909/// returned unchanged.
6910///
6911/// A *required* driver (`filter.<name>.required=true`) whose `clean` command is
6912/// missing or fails produces a [`GitError::Command`]; a non-required driver
6913/// failure (or absence of a `clean` command) passes the content through
6914/// unfiltered, matching git.
6915pub fn apply_clean_filter(
6916    worktree_root: impl AsRef<Path>,
6917    git_dir: impl AsRef<Path>,
6918    config: &GitConfig,
6919    path: &[u8],
6920    content: &[u8],
6921) -> Result<Vec<u8>> {
6922    // On clean the worktree file exists, so the live `.gitattributes` chain is
6923    // authoritative. `git_dir` is accepted for symmetry with the smudge entry
6924    // point (which falls back to the index) and for future use.
6925    let _ = git_dir.as_ref();
6926    let checks = filter_attribute_checks(worktree_root.as_ref(), path)?;
6927    apply_clean_filter_with_attributes(config, &checks, path, content)
6928}
6929
6930/// A reusable handle that captures the worktree's `.gitattributes` chain once so
6931/// repeated clean-filter calls (e.g. `hash-object --stdin-paths` hashing many
6932/// paths in one process) don't re-walk the worktree and re-read every
6933/// `.gitattributes`/global config per path.
6934///
6935/// Build it once with [`WorktreeAttributes::from_worktree_root`], then call
6936/// [`WorktreeAttributes::apply_clean_filter`] per path. This mirrors
6937/// [`apply_clean_filter`] exactly except the expensive attribute-source scan is
6938/// amortized across calls.
6939pub struct WorktreeAttributes {
6940    matcher: AttributeMatcher,
6941}
6942
6943impl WorktreeAttributes {
6944    /// Read the worktree's attribute sources once (global/`core.attributesFile`,
6945    /// every in-tree `.gitattributes`, and `$GIT_DIR/info/attributes`).
6946    pub fn from_worktree_root(worktree_root: impl AsRef<Path>) -> Result<Self> {
6947        Ok(Self {
6948            matcher: AttributeMatcher::from_worktree_root(worktree_root.as_ref())?,
6949        })
6950    }
6951
6952    /// Apply the clean conversion to `content` for `path`, reusing the cached
6953    /// attribute chain. Behaviourally identical to [`apply_clean_filter`].
6954    pub fn apply_clean_filter(
6955        &self,
6956        config: &GitConfig,
6957        path: &[u8],
6958        content: &[u8],
6959    ) -> Result<Vec<u8>> {
6960        let checks = self
6961            .matcher
6962            .attributes_for_path(path, &filter_attribute_names(), false);
6963        apply_clean_filter_with_attributes(config, &checks, path, content)
6964    }
6965}
6966
6967/// A reusable handle that captures a *tree's* `.gitattributes` chain once so
6968/// repeated smudge-filter calls (e.g. `git archive` streaming every blob in a
6969/// tree) resolve attributes from the tree being processed rather than the live
6970/// worktree.
6971///
6972/// This is the attribute direction `git archive` uses: upstream unpacks the
6973/// archived tree into a scratch index and sets `GIT_ATTR_INDEX`, so the
6974/// `.gitattributes` that govern conversion come from the *archived tree* (plus
6975/// the global/`core.attributesFile` chain and `$GIT_DIR/info/attributes`), not
6976/// from whatever happens to be checked out. `--worktree-attributes` callers
6977/// should use [`WorktreeAttributes`] instead.
6978///
6979/// Build it once with [`TreeAttributes::from_tree`], then call
6980/// [`TreeAttributes::apply_smudge_filter`] per blob. Behaviourally this mirrors
6981/// [`apply_smudge_filter`] except the attribute source is the supplied tree and
6982/// the expensive source scan is amortized across calls.
6983pub struct TreeAttributes {
6984    matcher: AttributeMatcher,
6985}
6986
6987impl TreeAttributes {
6988    /// Read the attribute sources for `tree_oid` once: the global /
6989    /// `core.attributesFile` chain, every `.gitattributes` blob found while
6990    /// walking `tree_oid`, and `$GIT_DIR/info/attributes`.
6991    ///
6992    /// `attr_root` locates the global config (`read_configured_attributes`);
6993    /// pass the worktree root for a non-bare repo, or the git dir for a bare
6994    /// one. `git_dir` locates `info/attributes` directly (so this works for bare
6995    /// repos, where there is no nested `.git`). No worktree `.gitattributes`
6996    /// files are read — use [`WorktreeAttributes`] for the
6997    /// `--worktree-attributes` direction.
6998    pub fn from_tree(
6999        attr_root: impl AsRef<Path>,
7000        git_dir: impl AsRef<Path>,
7001        db: &FileObjectDatabase,
7002        format: ObjectFormat,
7003        tree_oid: &ObjectId,
7004    ) -> Result<Self> {
7005        let attr_root = attr_root.as_ref();
7006        let mut matcher = AttributeMatcher::default();
7007        if !matcher.read_configured_attributes(attr_root) {
7008            matcher.read_default_global_attributes();
7009        }
7010        collect_attribute_patterns_from_tree(db, format, tree_oid, Vec::new(), &mut matcher)?;
7011        read_attribute_patterns(
7012            git_dir.as_ref().join("info").join("attributes"),
7013            &mut matcher,
7014            &[],
7015            b"info/attributes",
7016        );
7017        Ok(Self { matcher })
7018    }
7019
7020    /// Apply the smudge conversion (blob -> worktree: EOL `LF`->`CRLF` plus any
7021    /// configured `filter.<name>.smudge` driver) to `content` for `path`,
7022    /// reusing the cached attribute chain. Behaviourally identical to
7023    /// [`apply_smudge_filter`] except attributes come from the tree this handle
7024    /// was built from.
7025    pub fn apply_smudge_filter(
7026        &self,
7027        config: &GitConfig,
7028        path: &[u8],
7029        content: &[u8],
7030    ) -> Result<Vec<u8>> {
7031        let checks = self
7032            .matcher
7033            .attributes_for_path(path, &filter_attribute_names(), false);
7034        apply_smudge_filter_with_attributes(config, &checks, path, content)
7035    }
7036}
7037
7038/// Like [`apply_clean_filter`] but takes already-resolved attribute checks,
7039/// letting callers that have computed attributes once reuse them.
7040pub fn apply_clean_filter_with_attributes(
7041    config: &GitConfig,
7042    attributes: &[AttributeCheck],
7043    path: &[u8],
7044    content: &[u8],
7045) -> Result<Vec<u8>> {
7046    Ok(apply_clean_filter_with_attributes_cow(config, attributes, path, content)?.into_owned())
7047}
7048
7049/// Borrow-first variant of [`apply_clean_filter_with_attributes`].
7050///
7051/// When no filter or EOL conversion changes the content, the returned value
7052/// borrows `content`; callers that can consume a [`Cow`] avoid allocating for
7053/// the common pass-through case.
7054pub fn apply_clean_filter_with_attributes_cow<'a>(
7055    config: &GitConfig,
7056    attributes: &[AttributeCheck],
7057    path: &[u8],
7058    content: &'a [u8],
7059) -> Result<Cow<'a, [u8]>> {
7060    apply_clean_filter_with_attributes_cow_safecrlf(
7061        config,
7062        attributes,
7063        path,
7064        content,
7065        ConvFlags::Off,
7066        SafeCrlfIndexBlob::None,
7067    )
7068}
7069
7070/// How the safecrlf check should learn whether this path's *current index blob*
7071/// already contains a CRLF (git's `has_crlf_in_index`). Only consulted on the
7072/// `text=auto` / `core.autocrlf` path.
7073pub enum SafeCrlfIndexBlob<'a> {
7074    /// No index blob is available (the staging caller has none, or safecrlf is
7075    /// off) — treated as "no CRLF in index".
7076    None,
7077    /// The path's current index blob, read on demand from this object database
7078    /// only when the auto-crlf decision actually needs it.
7079    Lookup {
7080        odb: &'a FileObjectDatabase,
7081        oid: ObjectId,
7082    },
7083}
7084
7085impl SafeCrlfIndexBlob<'_> {
7086    fn has_crlf(&self) -> bool {
7087        match self {
7088            SafeCrlfIndexBlob::None => false,
7089            SafeCrlfIndexBlob::Lookup { odb, oid } => has_crlf_in_index(odb, oid),
7090        }
7091    }
7092}
7093
7094/// [`apply_clean_filter_with_attributes_cow`] plus git's additive `core.safecrlf`
7095/// round-trip warning (convert.c `crlf_to_git`).
7096///
7097/// The conversion result is byte-for-byte identical to the plain variant;
7098/// `flags`/`index_blob` only drive the stderr warning git prints when a
7099/// CRLF<->LF round-trip would not be reversible. The warning is computed on the
7100/// *post-driver, pre-EOL-conversion* content, matching git's ordering in
7101/// `convert_to_git` (apply_filter -> crlf_to_git).
7102pub fn apply_clean_filter_with_attributes_cow_safecrlf<'a>(
7103    config: &GitConfig,
7104    attributes: &[AttributeCheck],
7105    path: &[u8],
7106    content: &'a [u8],
7107    flags: ConvFlags,
7108    index_blob: SafeCrlfIndexBlob<'_>,
7109) -> Result<Cow<'a, [u8]>> {
7110    let plan = ContentFilterPlan::resolve(config, attributes);
7111    let mut data = Cow::Borrowed(content);
7112    if let Some(driver) = &plan.driver {
7113        data = run_driver(driver, driver.clean.as_deref(), path, data)?;
7114    }
7115    // The safecrlf check scans the (post-driver) buffer once for line-ending
7116    // stats. Gate it tightly so the extra scan never runs on the dominant
7117    // pass-through paths: only when safecrlf is enabled, the path is a real
7118    // conversion candidate (not `CRLF_BINARY`), and the buffer is non-empty.
7119    if flags != ConvFlags::Off && !data.is_empty() && plan.safecrlf_applies() {
7120        let old_stats = gather_convert_stats(&data);
7121        plan.check_safe_crlf_stats(&old_stats, index_blob.has_crlf(), flags, path)?;
7122    }
7123    if plan.convert_eol(&data) {
7124        data = convert_crlf_to_lf_cow(data);
7125    }
7126    Ok(data)
7127}
7128
7129/// Apply the *smudge* conversion to `content` for `path` (blob -> worktree):
7130/// first LF->CRLF when EOL conversion applies, then the configured
7131/// `filter.<name>.smudge` driver (if any).
7132///
7133/// Semantics mirror [`apply_clean_filter`]: a required driver with a missing or
7134/// failing `smudge` command errors, while a non-required one passes the content
7135/// through.
7136pub fn apply_smudge_filter(
7137    worktree_root: impl AsRef<Path>,
7138    git_dir: impl AsRef<Path>,
7139    format: ObjectFormat,
7140    config: &GitConfig,
7141    path: &[u8],
7142    content: &[u8],
7143) -> Result<Vec<u8>> {
7144    // On smudge (checkout) the worktree file may not exist yet, so resolve the
7145    // attributes from the `.gitattributes` recorded in the index.
7146    let checks =
7147        smudge_attribute_checks_from_index(worktree_root.as_ref(), git_dir.as_ref(), format, path)?;
7148    apply_smudge_filter_with_attributes(config, &checks, path, content)
7149}
7150
7151/// Like [`apply_smudge_filter`] but takes already-resolved attribute checks.
7152pub fn apply_smudge_filter_with_attributes(
7153    config: &GitConfig,
7154    attributes: &[AttributeCheck],
7155    path: &[u8],
7156    content: &[u8],
7157) -> Result<Vec<u8>> {
7158    Ok(apply_smudge_filter_with_attributes_cow(config, attributes, path, content)?.into_owned())
7159}
7160
7161/// Borrow-first variant of [`apply_smudge_filter_with_attributes`].
7162///
7163/// When no filter or EOL conversion changes the content, the returned value
7164/// borrows `content`; callers that can consume a [`Cow`] avoid allocating for
7165/// the common pass-through case.
7166pub fn apply_smudge_filter_with_attributes_cow<'a>(
7167    config: &GitConfig,
7168    attributes: &[AttributeCheck],
7169    path: &[u8],
7170    content: &'a [u8],
7171) -> Result<Cow<'a, [u8]>> {
7172    let plan = ContentFilterPlan::resolve(config, attributes);
7173    let mut data = Cow::Borrowed(content);
7174    if plan.eol == EolConversion::Crlf
7175        && plan.convert_eol(&data)
7176        && plan.will_convert_lf_to_crlf(&data)
7177    {
7178        data = Cow::Owned(convert_lf_to_crlf(&data));
7179    }
7180    if let Some(driver) = &plan.driver {
7181        data = run_driver(driver, driver.smudge.as_deref(), path, data)?;
7182    }
7183    Ok(data)
7184}
7185
7186/// Execute one direction of a driver filter, honouring the `required` flag.
7187fn run_driver<'a>(
7188    driver: &FilterDriver,
7189    command: Option<&str>,
7190    path: &[u8],
7191    content: Cow<'a, [u8]>,
7192) -> Result<Cow<'a, [u8]>> {
7193    let Some(command) = command else {
7194        // No command in this direction. Required filters must error; optional
7195        // ones pass content through unchanged.
7196        if driver.required {
7197            return Err(GitError::Command(format!(
7198                "required filter `{}` has no configured command for this direction",
7199                String::from_utf8_lossy(&driver.name)
7200            )));
7201        }
7202        return Ok(content);
7203    };
7204    match run_filter_command(command, path, &content) {
7205        Ok(output) => Ok(Cow::Owned(output)),
7206        Err(err) => {
7207            if driver.required {
7208                Err(err)
7209            } else {
7210                // Non-required filter failure: fall back to the unfiltered
7211                // content, matching git's behaviour.
7212                Ok(content)
7213            }
7214        }
7215    }
7216}
7217
7218/// Compute the attributes relevant to content filtering (`text`, `eol`,
7219/// `filter`) for `path` from the worktree `.gitattributes` chain.
7220fn filter_attribute_checks(worktree_root: &Path, path: &[u8]) -> Result<Vec<AttributeCheck>> {
7221    let requested = filter_attribute_names();
7222    let mut matcher = AttributeMatcher::default();
7223    if !matcher.read_configured_attributes(worktree_root) {
7224        matcher.read_default_global_attributes();
7225    }
7226    read_dir_attribute_patterns_for_base(worktree_root, &[], &mut matcher)?;
7227    let mut prefix = Vec::new();
7228    let mut parts = path.split(|byte| *byte == b'/').peekable();
7229    while let Some(part) = parts.next() {
7230        if parts.peek().is_none() {
7231            break;
7232        }
7233        if !prefix.is_empty() {
7234            prefix.push(b'/');
7235        }
7236        prefix.extend_from_slice(part);
7237        let dir = worktree_root.join(repo_path_to_os_path(&prefix)?);
7238        read_dir_attribute_patterns_for_base(&dir, &prefix, &mut matcher)?;
7239    }
7240    read_attribute_patterns(
7241        worktree_root.join(".git").join("info").join("attributes"),
7242        &mut matcher,
7243        &[],
7244        b".git/info/attributes",
7245    );
7246    Ok(matcher.attributes_for_path(path, &requested, false))
7247}
7248
7249/// Compute filtering attributes for a checkout (blob -> worktree).
7250///
7251/// `git checkout -- <pathspec>` / `git restore` materialize through git's
7252/// **default** attr direction, which is `GIT_ATTR_CHECKIN` (attr.c: the static
7253/// `direction` is zero-initialized and `builtin/checkout.c` never overrides it
7254/// for the pathspec path). Under that direction `read_attr` reads each
7255/// `.gitattributes` frame from the **worktree file first**, falling back to the
7256/// staged blob only when no worktree file exists at that directory level
7257/// (sparse-checkout). This is the precedence the smudge filter must use:
7258/// t0027 commits an *empty* root `.gitattributes`, then overwrites the worktree
7259/// copy with `*.txt text eol=crlf` *without re-staging* — and git's checkout
7260/// still honours the worktree copy. Reading the index alone (or index-first)
7261/// made checkout under-convert line endings, because the staged blob was empty.
7262fn smudge_attribute_checks_from_index(
7263    worktree_root: &Path,
7264    git_dir: &Path,
7265    format: ObjectFormat,
7266    path: &[u8],
7267) -> Result<Vec<AttributeCheck>> {
7268    let requested = filter_attribute_names();
7269    let mut matcher = AttributeMatcher::default();
7270    if !matcher.read_configured_attributes(worktree_root) {
7271        matcher.read_default_global_attributes();
7272    }
7273
7274    // Build the set of `.gitattributes` blobs the index carries, keyed by the
7275    // directory they govern, so each ancestry frame can prefer the staged copy.
7276    let index_attributes = index_gitattributes_by_base(git_dir, format)?;
7277
7278    // Walk root -> ... -> the file's parent directory, folding each frame's
7279    // `.gitattributes` in shallow-to-deep order so deeper directories win.
7280    fold_checkout_attribute_frame(
7281        worktree_root,
7282        &[],
7283        &index_attributes,
7284        &mut matcher,
7285    )?;
7286    let mut prefix = Vec::new();
7287    let mut parts = path.split(|byte| *byte == b'/').peekable();
7288    while let Some(part) = parts.next() {
7289        if parts.peek().is_none() {
7290            break;
7291        }
7292        if !prefix.is_empty() {
7293            prefix.push(b'/');
7294        }
7295        prefix.extend_from_slice(part);
7296        let dir = worktree_root.join(repo_path_to_os_path(&prefix)?);
7297        fold_checkout_attribute_frame(&dir, &prefix, &index_attributes, &mut matcher)?;
7298    }
7299
7300    read_attribute_patterns(
7301        worktree_root.join(".git").join("info").join("attributes"),
7302        &mut matcher,
7303        &[],
7304        b".git/info/attributes",
7305    );
7306    Ok(matcher.attributes_for_path(path, &requested, false))
7307}
7308
7309/// Fold the `.gitattributes` governing directory `base` (whose on-disk location
7310/// is `dir`) into `matcher`, preferring the worktree file and falling back to
7311/// the staged blob. Mirrors one attr-stack frame under `GIT_ATTR_CHECKIN`
7312/// (git's default direction, used by `checkout -- <pathspec>` / `restore`).
7313fn fold_checkout_attribute_frame(
7314    dir: &Path,
7315    base: &[u8],
7316    index_attributes: &BTreeMap<Vec<u8>, Vec<u8>>,
7317    matcher: &mut AttributeMatcher,
7318) -> Result<()> {
7319    let worktree_file = dir.join(".gitattributes");
7320    if let Ok(contents) = fs::read(&worktree_file) {
7321        // A worktree `.gitattributes` exists at this level: it wins outright
7322        // (git only consults the index when the worktree file is absent).
7323        read_attribute_patterns_from_bytes(&contents, matcher, base);
7324    } else if let Some(contents) = index_attributes.get(base) {
7325        read_attribute_patterns_from_bytes(contents, matcher, base);
7326    }
7327    Ok(())
7328}
7329
7330/// Read every staged `.gitattributes` blob, keyed by the repo-relative directory
7331/// it governs (`""` for the worktree root). Stage-0 blob entries only.
7332fn index_gitattributes_by_base(
7333    git_dir: &Path,
7334    format: ObjectFormat,
7335) -> Result<BTreeMap<Vec<u8>, Vec<u8>>> {
7336    let mut map = BTreeMap::new();
7337    let index_path = repository_index_path(git_dir);
7338    if !index_path.exists() {
7339        return Ok(map);
7340    }
7341    let db = FileObjectDatabase::from_git_dir(git_dir, format);
7342    let entries = Index::parse(&fs::read(index_path)?, format)?.entries;
7343    for entry in entries {
7344        let is_attributes_file =
7345            entry.path == b".gitattributes" || entry.path.as_bytes().ends_with(b"/.gitattributes");
7346        if index_entry_stage(&entry) != 0
7347            || tree_entry_object_type(entry.mode) != ObjectType::Blob
7348            || !is_attributes_file
7349        {
7350            continue;
7351        }
7352        let base = match entry.path.as_bytes().strip_suffix(b".gitattributes") {
7353            Some(b"") => Vec::new(),
7354            Some(parent) => parent.strip_suffix(b"/").unwrap_or(parent).to_vec(),
7355            None => continue,
7356        };
7357        let object = db
7358            .read_object(&entry.oid)
7359            .map_err(|err| expect_missing_object_kind(err, entry.oid, MissingObjectKind::Blob))?;
7360        if object.object_type == ObjectType::Blob {
7361            map.insert(base, object.body.clone());
7362        }
7363    }
7364    Ok(map)
7365}
7366
7367fn filter_attribute_names() -> Vec<Vec<u8>> {
7368    // `crlf` is git's legacy alias for `text` (convert.c registers both); it is
7369    // consulted as a fallback when `text` is unspecified, so we must resolve it.
7370    vec![
7371        b"text".to_vec(),
7372        b"crlf".to_vec(),
7373        b"eol".to_vec(),
7374        b"filter".to_vec(),
7375    ]
7376}
7377
7378// ---------------------------------------------------------------------------
7379// `ls-files --eol` line-ending information
7380//
7381// Git's `git ls-files --eol` prints, for each path, three fields:
7382//   i/<stat>  — line-ending statistics of the *index* blob content
7383//   w/<stat>  — line-ending statistics of the *worktree* file content
7384//   attr/<a>  — the resolved crlf/eol attribute action (attributes only, no
7385//               config) — `get_convert_attr_ascii` in convert.c
7386// The two stat fields mirror `gather_convert_stats_ascii`; the attr field
7387// mirrors `convert_attrs` up to `ca->attr_action` (i.e. *before* the config
7388// derived `text` -> input/crlf substitution and the `core.autocrlf` fallback).
7389// ---------------------------------------------------------------------------
7390
7391/// Line-ending statistics of a byte buffer, mirroring convert.c `gather_stats`.
7392#[derive(Clone)]
7393struct ConvertStats {
7394    nul: u32,
7395    lonecr: u32,
7396    lonelf: u32,
7397    crlf: u32,
7398    printable: u32,
7399    nonprintable: u32,
7400}
7401
7402fn gather_convert_stats(buf: &[u8]) -> ConvertStats {
7403    let mut stats = ConvertStats {
7404        nul: 0,
7405        lonecr: 0,
7406        lonelf: 0,
7407        crlf: 0,
7408        printable: 0,
7409        nonprintable: 0,
7410    };
7411    let mut i = 0;
7412    while i < buf.len() {
7413        let c = buf[i];
7414        if c == b'\r' {
7415            if buf.get(i + 1) == Some(&b'\n') {
7416                stats.crlf += 1;
7417                i += 1;
7418            } else {
7419                stats.lonecr += 1;
7420            }
7421            i += 1;
7422            continue;
7423        }
7424        if c == b'\n' {
7425            stats.lonelf += 1;
7426            i += 1;
7427            continue;
7428        }
7429        if c == 127 {
7430            // DEL
7431            stats.nonprintable += 1;
7432        } else if c < 32 {
7433            match c {
7434                // BS, HT, ESC and FF are printable.
7435                0x08 | 0x09 | 0x1b | 0x0c => stats.printable += 1,
7436                0 => {
7437                    stats.nul += 1;
7438                    stats.nonprintable += 1;
7439                }
7440                _ => stats.nonprintable += 1,
7441            }
7442        } else {
7443            stats.printable += 1;
7444        }
7445        i += 1;
7446    }
7447    // A trailing EOF (^Z, 0x1a) is not counted as non-printable.
7448    if buf.last() == Some(&0x1a) {
7449        stats.nonprintable = stats.nonprintable.saturating_sub(1);
7450    }
7451    stats
7452}
7453
7454/// Mirror of convert.c `has_crlf_in_index`: whether the blob currently recorded
7455/// in the index for this path is non-binary text containing a CRLF. Used only by
7456/// the auto-crlf safecrlf decision to keep an already-CRLF index blob from being
7457/// silently collapsed. A missing/unreadable blob (or a non-blob entry) counts as
7458/// "no CRLF", matching git's `read_blob_data_from_index` returning NULL.
7459fn has_crlf_in_index(odb: &FileObjectDatabase, oid: &ObjectId) -> bool {
7460    let Ok(object) = odb.read_object(oid) else {
7461        return false;
7462    };
7463    if object.object_type != ObjectType::Blob {
7464        return false;
7465    }
7466    let data = &object.body;
7467    // git short-circuits on the first '\r' via memchr before gathering stats.
7468    if !data.contains(&b'\r') {
7469        return false;
7470    }
7471    let stats = gather_convert_stats(data);
7472    !convert_is_binary(&stats) && stats.crlf > 0
7473}
7474
7475/// Mirror of convert.c `convert_is_binary`: a lone CR or NUL, or a high
7476/// non-printable ratio, marks the content as binary.
7477fn convert_is_binary(stats: &ConvertStats) -> bool {
7478    if stats.lonecr > 0 {
7479        return true;
7480    }
7481    if stats.nul > 0 {
7482        return true;
7483    }
7484    (stats.printable >> 7) < stats.nonprintable
7485}
7486
7487/// The `core.safecrlf` round-trip-warning mode, mirroring git's
7488/// `global_conv_flags_eol` (environment.c). git's *default* — when
7489/// `core.safecrlf` is unset — is [`ConvFlags::Warn`], so the warning fires even
7490/// without any explicit config.
7491#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7492pub enum ConvFlags {
7493    /// `core.safecrlf=false`: never warn.
7494    Off,
7495    /// `core.safecrlf=warn` (and the unset default): emit a warning when a
7496    /// CRLF<->LF round-trip would not be reversible.
7497    Warn,
7498    /// `core.safecrlf=true`: die instead of warn.
7499    Die,
7500}
7501
7502impl ConvFlags {
7503    /// Resolve `core.safecrlf` from config, mirroring environment.c
7504    /// `git_default_core_config`: `warn` -> [`ConvFlags::Warn`], a boolean-true
7505    /// value -> [`ConvFlags::Die`], a boolean-false value -> [`ConvFlags::Off`].
7506    /// When the key is absent git leaves `global_conv_flags_eol` at its initial
7507    /// [`ConvFlags::Warn`], so unset also resolves to [`ConvFlags::Warn`].
7508    pub fn from_config(config: &GitConfig) -> Self {
7509        match config.get("core", None, "safecrlf") {
7510            Some(value) if value.eq_ignore_ascii_case("warn") => ConvFlags::Warn,
7511            Some(_) => {
7512                if config.get_bool("core", None, "safecrlf") == Some(true) {
7513                    ConvFlags::Die
7514                } else {
7515                    ConvFlags::Off
7516                }
7517            }
7518            None => ConvFlags::Warn,
7519        }
7520    }
7521}
7522
7523/// Mirror of convert.c `check_global_conv_flags_eol`: compare the pre-conversion
7524/// `old_stats` against the simulated round-trip `new_stats` and, when the
7525/// CRLF/LF content would not survive a clean+smudge cycle, warn (or die under
7526/// `core.safecrlf=true`).
7527///
7528/// Returns `Err(GitError::Exit(128))` when `flags` is [`ConvFlags::Die`] and the
7529/// round-trip is irreversible (git `die`s with exit 128 here); otherwise prints
7530/// the warning to stderr and returns `Ok(())`. This is a pure stderr-side
7531/// effect: it never changes the bytes written to the object store.
7532fn check_safe_crlf(
7533    old_stats: &ConvertStats,
7534    new_stats: &ConvertStats,
7535    flags: ConvFlags,
7536    path: &[u8],
7537) -> Result<()> {
7538    if flags == ConvFlags::Off {
7539        return Ok(());
7540    }
7541    let display = String::from_utf8_lossy(path);
7542    if old_stats.crlf > 0 && new_stats.crlf == 0 {
7543        // CRLFs would not be restored by checkout.
7544        match flags {
7545            ConvFlags::Die => {
7546                eprintln!("fatal: CRLF would be replaced by LF in {display}");
7547                return Err(GitError::Exit(128));
7548            }
7549            ConvFlags::Warn => {
7550                eprintln!(
7551                    "warning: in the working copy of '{display}', CRLF will be replaced by LF the next time Git touches it"
7552                );
7553            }
7554            ConvFlags::Off => unreachable!("handled above"),
7555        }
7556    } else if old_stats.lonelf > 0 && new_stats.lonelf == 0 {
7557        // CRLFs would be added by checkout.
7558        match flags {
7559            ConvFlags::Die => {
7560                eprintln!("fatal: LF would be replaced by CRLF in {display}");
7561                return Err(GitError::Exit(128));
7562            }
7563            ConvFlags::Warn => {
7564                eprintln!(
7565                    "warning: in the working copy of '{display}', LF will be replaced by CRLF the next time Git touches it"
7566                );
7567            }
7568            ConvFlags::Off => unreachable!("handled above"),
7569        }
7570    }
7571    Ok(())
7572}
7573
7574/// Compute the `i/` or `w/` stat string for `content`, mirroring
7575/// convert.c `gather_convert_stats_ascii`.
7576fn convert_stats_ascii(content: &[u8]) -> &'static str {
7577    if content.is_empty() {
7578        return "none";
7579    }
7580    let stats = gather_convert_stats(content);
7581    if convert_is_binary(&stats) {
7582        return "-text";
7583    }
7584    match (stats.lonelf > 0, stats.crlf > 0) {
7585        (true, false) => "lf",
7586        (false, true) => "crlf",
7587        (true, true) => "mixed",
7588        (false, false) => "none",
7589    }
7590}
7591
7592/// The resolved crlf/eol attribute action for a path, mirroring convert.c
7593/// `convert_attrs` up to `ca->attr_action` (attributes only, no config), and
7594/// `get_convert_attr_ascii` for the ascii spelling.
7595fn convert_attr_ascii(checks: &[AttributeCheck]) -> &'static str {
7596    fn state_of<'a>(checks: &'a [AttributeCheck], name: &[u8]) -> Option<&'a AttributeState> {
7597        checks
7598            .iter()
7599            .find(|check| check.attribute == name)
7600            .and_then(|check| check.state.as_ref())
7601    }
7602
7603    // git_path_check_crlf: ATTR_TRUE -> TEXT, ATTR_FALSE -> BINARY,
7604    // ATTR_UNSET -> (fall through), "input" -> TEXT_INPUT, "auto" -> AUTO,
7605    // anything else -> UNDEFINED.
7606    #[derive(Clone, Copy, PartialEq)]
7607    enum Action {
7608        Undefined,
7609        Binary,
7610        Text,
7611        TextInput,
7612        TextCrlf,
7613        Auto,
7614        AutoCrlf,
7615        AutoInput,
7616    }
7617    fn check_crlf(state: Option<&AttributeState>) -> Action {
7618        match state {
7619            Some(AttributeState::Set) => Action::Text,
7620            Some(AttributeState::Unset) => Action::Binary,
7621            Some(AttributeState::Value(value)) if value == b"input" => Action::TextInput,
7622            Some(AttributeState::Value(value)) if value == b"auto" => Action::Auto,
7623            // ATTR_UNSET / any other value -> CRLF_UNDEFINED.
7624            _ => Action::Undefined,
7625        }
7626    }
7627
7628    // Resolve from the `text` attribute, then fall back to the legacy `crlf`
7629    // alias only when `text` left the action undefined.
7630    let mut action = check_crlf(state_of(checks, b"text"));
7631    if action == Action::Undefined {
7632        action = check_crlf(state_of(checks, b"crlf"));
7633    }
7634
7635    if action != Action::Binary {
7636        // git_path_check_eol: only "lf"/"crlf" values matter.
7637        let eol = match state_of(checks, b"eol") {
7638            Some(AttributeState::Value(value)) if value == b"lf" => Some(false),
7639            Some(AttributeState::Value(value)) if value == b"crlf" => Some(true),
7640            _ => None,
7641        };
7642        action = match (action, eol) {
7643            (Action::Auto, Some(false)) => Action::AutoInput,
7644            (Action::Auto, Some(true)) => Action::AutoCrlf,
7645            (_, Some(false)) if action != Action::Auto => Action::TextInput,
7646            (_, Some(true)) if action != Action::Auto => Action::TextCrlf,
7647            _ => action,
7648        };
7649    }
7650
7651    match action {
7652        Action::Undefined => "",
7653        Action::Binary => "-text",
7654        Action::Text => "text",
7655        Action::TextInput => "text eol=lf",
7656        Action::TextCrlf => "text eol=crlf",
7657        Action::Auto => "text=auto",
7658        Action::AutoCrlf => "text=auto eol=crlf",
7659        Action::AutoInput => "text=auto eol=lf",
7660    }
7661}
7662
7663/// The three `ls-files --eol` fields for a single path.
7664pub struct EolInfo {
7665    /// Stat of the index blob (`i/...`); empty when there is no index blob.
7666    pub index: &'static str,
7667    /// Stat of the worktree file (`w/...`); empty when the file is absent.
7668    pub worktree: &'static str,
7669    /// Resolved crlf/eol attribute action (`attr/...`).
7670    pub attr: &'static str,
7671}
7672
7673impl EolInfo {
7674    /// Format as git's `ls-files --eol` prefix: `i/%-5s w/%-5s attr/%-17s\t`.
7675    pub fn format_prefix(&self) -> String {
7676        format!(
7677            "i/{:<5} w/{:<5} attr/{:<17}\t",
7678            self.index, self.worktree, self.attr
7679        )
7680    }
7681}
7682
7683/// Compute the `ls-files --eol` info for `path`.
7684///
7685/// `index_content` is the raw index blob bytes (None when the path has no
7686/// index entry or is not a regular file). The worktree file is read from
7687/// `worktree_root/path`; if it is absent or not a regular file the `w/` field
7688/// is empty. Attributes are resolved from the worktree `.gitattributes` chain
7689/// via `attr_checks`.
7690pub fn eol_info_for_path(
7691    worktree_root: impl AsRef<Path>,
7692    path: &[u8],
7693    index_content: Option<&[u8]>,
7694    attr_checks: &[AttributeCheck],
7695) -> EolInfo {
7696    let index = index_content.map(convert_stats_ascii).unwrap_or("");
7697
7698    let worktree_root = worktree_root.as_ref();
7699    let worktree = match repo_path_to_os_path(path) {
7700        Ok(rel) => {
7701            let absolute = worktree_root.join(rel);
7702            match fs::symlink_metadata(&absolute) {
7703                // git: only regular files get a `w/` stat (lstat + S_ISREG).
7704                Ok(meta) if meta.file_type().is_file() => match fs::read(&absolute) {
7705                    Ok(content) => convert_stats_ascii_owned(&content),
7706                    Err(_) => "",
7707                },
7708                _ => "",
7709            }
7710        }
7711        Err(_) => "",
7712    };
7713
7714    let attr = convert_attr_ascii(attr_checks);
7715
7716    EolInfo {
7717        index,
7718        worktree,
7719        attr,
7720    }
7721}
7722
7723/// `convert_stats_ascii` over an owned buffer; the result is a `'static` str so
7724/// the buffer can be dropped.
7725fn convert_stats_ascii_owned(content: &[u8]) -> &'static str {
7726    convert_stats_ascii(content)
7727}
7728
7729/// Resolve the crlf/eol/text/filter attributes for `path` from the worktree
7730/// `.gitattributes` chain (the set `ls-files --eol` needs for its `attr/`
7731/// field).
7732pub fn eol_attribute_checks(
7733    worktree_root: impl AsRef<Path>,
7734    path: &[u8],
7735) -> Result<Vec<AttributeCheck>> {
7736    filter_attribute_checks(worktree_root.as_ref(), path)
7737}
7738
7739pub fn deleted_index_entries(
7740    worktree_root: impl AsRef<Path>,
7741    git_dir: impl AsRef<Path>,
7742    format: ObjectFormat,
7743) -> Result<Vec<IndexEntry>> {
7744    let worktree_root = worktree_root.as_ref();
7745    let git_dir = git_dir.as_ref();
7746    let index_path = repository_index_path(git_dir);
7747    if !index_path.exists() {
7748        return Ok(Vec::new());
7749    }
7750    let index = Index::parse(&fs::read(index_path)?, format)?;
7751    let mut deleted = Vec::new();
7752    for entry in index.entries {
7753        if !worktree_path(worktree_root, entry.path.as_bytes())?.exists() {
7754            deleted.push(entry);
7755        }
7756    }
7757    Ok(deleted)
7758}
7759
7760pub fn modified_index_entries(
7761    worktree_root: impl AsRef<Path>,
7762    git_dir: impl AsRef<Path>,
7763    format: ObjectFormat,
7764) -> Result<Vec<IndexEntry>> {
7765    let worktree_root = worktree_root.as_ref();
7766    let git_dir = git_dir.as_ref();
7767    let index_path = repository_index_path(git_dir);
7768    if !index_path.exists() {
7769        return Ok(Vec::new());
7770    }
7771    let index = Index::parse(&fs::read(&index_path)?, format)?;
7772    // Reuse the same racy-git stat shortcut here: build the cache from the index
7773    // we just parsed (no second parse) so the worktree walk can skip re-hashing
7774    // unchanged files. A cached oid is only trusted on a non-racy stat match, so
7775    // genuinely modified files still fall through to a hash and are reported.
7776    let stat_cache = IndexStatCache::from_index(&index, &index_path);
7777    let worktree = worktree_entries_with_stat_cache(
7778        worktree_root,
7779        git_dir,
7780        format,
7781        Some(&stat_cache),
7782        None,
7783        None,
7784    )?;
7785    let mut modified = Vec::new();
7786    for entry in index.entries {
7787        let Some(worktree_entry) = worktree.get(entry.path.as_bytes()) else {
7788            modified.push(entry);
7789            continue;
7790        };
7791        if worktree_entry.mode != entry.mode || worktree_entry.oid != entry.oid {
7792            modified.push(entry);
7793        }
7794    }
7795    Ok(modified)
7796}
7797
7798pub fn checkout_branch(
7799    worktree_root: impl AsRef<Path>,
7800    git_dir: impl AsRef<Path>,
7801    format: ObjectFormat,
7802    branch: &str,
7803    committer: Vec<u8>,
7804) -> Result<CheckoutResult> {
7805    let worktree_root = worktree_root.as_ref();
7806    let git_dir = git_dir.as_ref();
7807    let branch_ref = branch_ref_name(branch)?;
7808    let refs = FileRefStore::new(git_dir, format);
7809    let target = match sley_refs::resolve_ref_peeled(&refs, &branch_ref)? {
7810        Some(oid) => oid,
7811        None => {
7812            checkout_switch_head_symbolic(&refs, branch_ref, committer, branch, None, None)?;
7813            return Ok(CheckoutResult {
7814                branch: branch.into(),
7815                oid: ObjectId::null(format),
7816                files: 0,
7817            });
7818        }
7819    };
7820    let current_head = resolve_head_commit_oid(git_dir, format)?;
7821    let files = if current_head == Some(target) {
7822        0
7823    } else {
7824        checkout_commit_to_index_and_worktree(worktree_root, git_dir, format, &target)?
7825    };
7826    checkout_switch_head_symbolic(
7827        &refs,
7828        branch_ref,
7829        committer,
7830        branch,
7831        Some(target),
7832        Some(target),
7833    )?;
7834    Ok(CheckoutResult {
7835        branch: branch.into(),
7836        oid: target,
7837        files,
7838    })
7839}
7840
7841pub fn checkout_detached(
7842    worktree_root: impl AsRef<Path>,
7843    git_dir: impl AsRef<Path>,
7844    format: ObjectFormat,
7845    target: &ObjectId,
7846    committer: Vec<u8>,
7847    message: Vec<u8>,
7848) -> Result<CheckoutResult> {
7849    let worktree_root = worktree_root.as_ref();
7850    let git_dir = git_dir.as_ref();
7851    let files = checkout_commit_to_index_and_worktree(worktree_root, git_dir, format, target)?;
7852    let refs = FileRefStore::new(git_dir, format);
7853    let zero = ObjectId::null(format);
7854    let mut tx = refs.transaction();
7855    tx.update(RefUpdate {
7856        name: "HEAD".into(),
7857        expected: None,
7858        new: RefTarget::Direct(*target),
7859        reflog: Some(ReflogEntry {
7860            old_oid: zero,
7861            new_oid: *target,
7862            committer,
7863            message,
7864        }),
7865    });
7866    tx.commit()?;
7867    Ok(CheckoutResult {
7868        branch: target.to_string(),
7869        oid: *target,
7870        files,
7871    })
7872}
7873
7874/// Like [`checkout_branch`], but runs the smudge-side content filters
7875/// (`core.autocrlf`/`text`/`eol` EOL conversion and `filter.<name>.smudge`
7876/// drivers) on each blob as it is written to the worktree. `config` is the
7877/// repository config used to resolve the filters.
7878pub fn checkout_branch_filtered(
7879    worktree_root: impl AsRef<Path>,
7880    git_dir: impl AsRef<Path>,
7881    format: ObjectFormat,
7882    branch: &str,
7883    committer: Vec<u8>,
7884    config: &GitConfig,
7885) -> Result<CheckoutResult> {
7886    let worktree_root = worktree_root.as_ref();
7887    let git_dir = git_dir.as_ref();
7888    let branch_ref = branch_ref_name(branch)?;
7889    let refs = FileRefStore::new(git_dir, format);
7890    let target = match sley_refs::resolve_ref_peeled(&refs, &branch_ref)? {
7891        Some(oid) => oid,
7892        None => {
7893            checkout_switch_head_symbolic(&refs, branch_ref, committer, branch, None, None)?;
7894            return Ok(CheckoutResult {
7895                branch: branch.into(),
7896                oid: ObjectId::null(format),
7897                files: 0,
7898            });
7899        }
7900    };
7901    let current_head = resolve_head_commit_oid(git_dir, format)?;
7902    let files = if current_head == Some(target) {
7903        0
7904    } else {
7905        checkout_commit_to_index_and_worktree_filtered(
7906            worktree_root,
7907            git_dir,
7908            format,
7909            &target,
7910            Some(config),
7911        )?
7912    };
7913    checkout_switch_head_symbolic(
7914        &refs,
7915        branch_ref,
7916        committer,
7917        branch,
7918        Some(target),
7919        Some(target),
7920    )?;
7921    Ok(CheckoutResult {
7922        branch: branch.into(),
7923        oid: target,
7924        files,
7925    })
7926}
7927
7928/// Like [`checkout_detached`], but runs the smudge-side content filters (see
7929/// [`checkout_branch_filtered`]).
7930pub fn checkout_detached_filtered(
7931    worktree_root: impl AsRef<Path>,
7932    git_dir: impl AsRef<Path>,
7933    format: ObjectFormat,
7934    target: &ObjectId,
7935    committer: Vec<u8>,
7936    message: Vec<u8>,
7937    config: &GitConfig,
7938) -> Result<CheckoutResult> {
7939    let worktree_root = worktree_root.as_ref();
7940    let git_dir = git_dir.as_ref();
7941    let files = checkout_commit_to_index_and_worktree_filtered(
7942        worktree_root,
7943        git_dir,
7944        format,
7945        target,
7946        Some(config),
7947    )?;
7948    let refs = FileRefStore::new(git_dir, format);
7949    let zero = ObjectId::null(format);
7950    let mut tx = refs.transaction();
7951    tx.update(RefUpdate {
7952        name: "HEAD".into(),
7953        expected: None,
7954        new: RefTarget::Direct(*target),
7955        reflog: Some(ReflogEntry {
7956            old_oid: zero,
7957            new_oid: *target,
7958            committer,
7959            message,
7960        }),
7961    });
7962    tx.commit()?;
7963    Ok(CheckoutResult {
7964        branch: target.to_string(),
7965        oid: *target,
7966        files,
7967    })
7968}
7969
7970fn checkout_commit_to_index_and_worktree(
7971    worktree_root: &Path,
7972    git_dir: &Path,
7973    format: ObjectFormat,
7974    target: &ObjectId,
7975) -> Result<usize> {
7976    checkout_commit_to_index_and_worktree_filtered(worktree_root, git_dir, format, target, None)
7977}
7978
7979/// Like [`checkout_commit_to_index_and_worktree`] but optionally runs the
7980/// smudge-side content filters (see [`apply_smudge_filter`]) on each blob before
7981/// it is written to the worktree. Attribute lookups use the `.gitattributes`
7982/// recorded in the *target tree* so the rules of the checked-out commit apply.
7983fn checkout_commit_to_index_and_worktree_filtered(
7984    worktree_root: &Path,
7985    git_dir: &Path,
7986    format: ObjectFormat,
7987    target: &ObjectId,
7988    smudge_config: Option<&GitConfig>,
7989) -> Result<usize> {
7990    let status = short_status(worktree_root, git_dir, format)?;
7991    if status
7992        .iter()
7993        .any(|entry| !status_entry_is_untracked_or_ignored(entry))
7994    {
7995        return Err(GitError::Transaction(
7996            "checkout requires a clean working tree".into(),
7997        ));
7998    }
7999    let db = FileObjectDatabase::from_git_dir(git_dir, format);
8000    let commit = read_commit(&db, format, target)?;
8001    let mut target_entries = BTreeMap::new();
8002    collect_tree_entries(&db, format, &commit.tree, &mut target_entries)?;
8003
8004    let attributes = smudge_config
8005        .map(|_| build_tree_attribute_matcher(worktree_root, &db, format, &commit.tree))
8006        .transpose()?;
8007
8008    for path in read_index_entries(git_dir, format)?.keys() {
8009        if !target_entries.contains_key(path) {
8010            remove_worktree_file(worktree_root, path)?;
8011        }
8012    }
8013
8014    let mut index_entries = Vec::new();
8015    for (path, entry) in &target_entries {
8016        // Gitlinks go through the shared materialization step (mkdir + zeroed
8017        // stat); smudge filters never apply to a submodule directory.
8018        if entry.mode == 0o160000 {
8019            index_entries.push(materialize_tree_entry(&db, worktree_root, path, entry)?);
8020            continue;
8021        }
8022        let object = read_expected_object(&db, &entry.oid, ObjectType::Blob)?;
8023        let body: Cow<'_, [u8]> = match (smudge_config, &attributes) {
8024            (Some(config), Some(matcher)) => {
8025                let checks = matcher.attributes_for_path(path, &filter_attribute_names(), false);
8026                apply_smudge_filter_with_attributes_cow(config, &checks, path, &object.body)?
8027            }
8028            _ => Cow::Borrowed(&object.body),
8029        };
8030        let file_path = worktree_path(worktree_root, path)?;
8031        if let Some(parent) = file_path.parent() {
8032            fs::create_dir_all(parent)?;
8033        }
8034        fs::write(&file_path, &body)?;
8035        let metadata = fs::metadata(&file_path)?;
8036        let mut index_entry = index_entry_from_metadata(path.clone(), entry.oid, &metadata);
8037        index_entry.mode = entry.mode;
8038        index_entries.push(index_entry);
8039    }
8040    index_entries.sort_by(|left, right| left.path.cmp(&right.path));
8041    fs::write(
8042        repository_index_path(git_dir),
8043        Index {
8044            version: 2,
8045            entries: index_entries,
8046            extensions: Vec::new(),
8047            checksum: None,
8048        }
8049        .write(format)?,
8050    )?;
8051    Ok(target_entries.len())
8052}
8053
8054/// Build an [`AttributeMatcher`] from the `.gitattributes` files contained in a
8055/// tree, plus the repo-level (`core.attributesFile`, `.git/info/attributes`)
8056/// sources, mirroring [`standard_attributes_for_path_from_tree`].
8057fn build_tree_attribute_matcher(
8058    worktree_root: &Path,
8059    db: &FileObjectDatabase,
8060    format: ObjectFormat,
8061    tree_oid: &ObjectId,
8062) -> Result<AttributeMatcher> {
8063    let mut matcher = AttributeMatcher::default();
8064    if !matcher.read_configured_attributes(worktree_root) {
8065        matcher.read_default_global_attributes();
8066    }
8067    collect_attribute_patterns_from_tree(db, format, tree_oid, Vec::new(), &mut matcher)?;
8068    read_attribute_patterns(
8069        worktree_root.join(".git").join("info").join("attributes"),
8070        &mut matcher,
8071        &[],
8072        b".git/info/attributes",
8073    );
8074    Ok(matcher)
8075}
8076
8077/// Sparse- and skip-worktree-aware variant of
8078/// [`checkout_commit_to_index_and_worktree`].
8079///
8080/// When `sparse` is `None` this behaves like the plain checkout except that it
8081/// preserves any pre-existing skip-worktree bits (so an already-sparse worktree
8082/// is not silently re-expanded). When `sparse` is `Some`, every target path is
8083/// additionally classified against the patterns: in-cone paths are written and
8084/// have their skip-worktree bit cleared, while out-of-cone paths are left out
8085/// of the worktree, get their skip-worktree bit set, and have any stale file
8086/// removed.
8087fn checkout_commit_to_index_and_worktree_sparse(
8088    worktree_root: &Path,
8089    git_dir: &Path,
8090    format: ObjectFormat,
8091    target: &ObjectId,
8092    sparse: Option<(&SparseCheckout, SparseCheckoutMode)>,
8093) -> Result<usize> {
8094    let previously_skipped = skip_worktree_paths(git_dir, format)?;
8095    let db = FileObjectDatabase::from_git_dir(git_dir, format);
8096    let commit = read_commit(&db, format, target)?;
8097    let mut target_entries = BTreeMap::new();
8098    collect_tree_entries(&db, format, &commit.tree, &mut target_entries)?;
8099
8100    // Honor skip-worktree: a path whose worktree file is intentionally absent
8101    // must not be treated as a dirty (deleted) change blocking the checkout.
8102    let status = short_status(worktree_root, git_dir, format)?;
8103    if status.iter().any(|entry| {
8104        if previously_skipped.contains(entry.path.as_slice()) {
8105            return false;
8106        }
8107        // Submodule state never blocks a checkout: upstream unpack-trees
8108        // treats gitlinks as always up-to-date (ie_match_stat refuses to pay
8109        // for a submodule dirtiness probe), so new commits / dirty content in
8110        // a submodule must not fail the branch switch.
8111        if entry.index_mode == Some(0o160000) || entry.worktree_mode == Some(0o160000) {
8112            return false;
8113        }
8114        // An untracked embedded repository where the target tree records a
8115        // gitlink is reused as-is (upstream entry.c write_entry: mkdir with
8116        // EEXIST is success), so it does not block the checkout either.
8117        if entry.index == b'?' && entry.worktree == b'?' {
8118            let path = entry
8119                .path
8120                .strip_suffix(b"/")
8121                .unwrap_or(entry.path.as_slice());
8122            if target_entries
8123                .get(path)
8124                .is_some_and(|target| target.mode == 0o160000)
8125            {
8126                return false;
8127            }
8128        }
8129        true
8130    }) {
8131        return Err(GitError::Transaction(
8132            "checkout requires a clean working tree".into(),
8133        ));
8134    }
8135
8136    let matcher = sparse.map(|(spec, mode)| SparseMatcher::new(spec, mode));
8137
8138    for path in read_index_entries(git_dir, format)?.keys() {
8139        if target_entries.contains_key(path) {
8140            continue;
8141        }
8142        // Do not disturb the worktree state of an intentionally skipped path.
8143        if previously_skipped.contains(path) {
8144            continue;
8145        }
8146        remove_worktree_file(worktree_root, path)?;
8147    }
8148
8149    let mut index_entries = Vec::new();
8150    for (path, entry) in &target_entries {
8151        let in_cone = matcher.as_ref().is_none_or(|matcher| {
8152            // A path already marked skip-worktree stays out unless it now
8153            // matches the sparse cone, mirroring upstream "honor skip-worktree".
8154            matcher.includes_file(path)
8155        });
8156        let index_entry = if in_cone {
8157            // `materialize_tree_entry` leaves flags_extended at 0, so the
8158            // skip-worktree bit is already clear for in-cone paths.
8159            materialize_tree_entry(&db, worktree_root, path, entry)?
8160        } else {
8161            // Out of cone: ensure no stale worktree file remains and synthesize
8162            // an index entry straight from the tree (no worktree metadata),
8163            // then mark it skip-worktree.
8164            remove_worktree_file(worktree_root, path)?;
8165            let mut index_entry = restored_head_index_entry(worktree_root, &db, path, entry)?;
8166            set_skip_worktree(&mut index_entry);
8167            index_entry
8168        };
8169        index_entries.push(index_entry);
8170    }
8171    index_entries.sort_by(|left, right| left.path.cmp(&right.path));
8172    let mut index = Index {
8173        version: 2,
8174        entries: index_entries,
8175        extensions: Vec::new(),
8176        checksum: None,
8177    };
8178    normalize_index_version_for_extended_flags(&mut index);
8179    fs::write(repository_index_path(git_dir), index.write(format)?)?;
8180    Ok(target_entries.len())
8181}
8182
8183fn skip_worktree_paths(git_dir: &Path, format: ObjectFormat) -> Result<BTreeSet<Vec<u8>>> {
8184    let index_path = repository_index_path(git_dir);
8185    if !index_path.exists() {
8186        return Ok(BTreeSet::new());
8187    }
8188    let index = Index::parse(&fs::read(index_path)?, format)?;
8189    Ok(index
8190        .entries
8191        .into_iter()
8192        .filter(index_entry_skip_worktree)
8193        .map(|entry| entry.path.into_bytes())
8194        .collect())
8195}
8196
8197pub fn restore_worktree_paths(
8198    worktree_root: impl AsRef<Path>,
8199    git_dir: impl AsRef<Path>,
8200    format: ObjectFormat,
8201    paths: &[PathBuf],
8202) -> Result<RestoreResult> {
8203    restore_worktree_paths_inner(
8204        worktree_root.as_ref(),
8205        git_dir.as_ref(),
8206        format,
8207        paths,
8208        None,
8209    )
8210}
8211
8212/// Like [`restore_worktree_paths`], applying the smudge-side content filters
8213/// (CRLF / ident / filter drivers) the way a checkout writes blobs.
8214pub fn restore_worktree_paths_filtered(
8215    worktree_root: impl AsRef<Path>,
8216    git_dir: impl AsRef<Path>,
8217    format: ObjectFormat,
8218    paths: &[PathBuf],
8219    config: &GitConfig,
8220) -> Result<RestoreResult> {
8221    restore_worktree_paths_inner(
8222        worktree_root.as_ref(),
8223        git_dir.as_ref(),
8224        format,
8225        paths,
8226        Some(config),
8227    )
8228}
8229
8230fn restore_worktree_paths_inner(
8231    worktree_root: &Path,
8232    git_dir: &Path,
8233    format: ObjectFormat,
8234    paths: &[PathBuf],
8235    smudge_config: Option<&GitConfig>,
8236) -> Result<RestoreResult> {
8237    let index_path = repository_index_path(git_dir);
8238    if !index_path.exists() {
8239        return Err(GitError::Exit(1));
8240    }
8241    let index = Index::parse(&fs::read(index_path)?, format)?;
8242    let db = FileObjectDatabase::from_git_dir(git_dir, format);
8243    let mut restored = BTreeSet::new();
8244    for path in paths {
8245        let absolute = if path.is_absolute() {
8246            path.clone()
8247        } else {
8248            worktree_root.join(path)
8249        };
8250        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
8251            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
8252        })?;
8253        let git_path = git_path_bytes(relative)?;
8254        let recursive = path == Path::new(".")
8255            || path.to_string_lossy().ends_with('/')
8256            || absolute.is_dir()
8257            || index_has_entry_under(&index.entries, &git_path);
8258        let mut matched = false;
8259        for entry in &index.entries {
8260            if entry.path.as_bytes() == git_path.as_slice()
8261                || (recursive && index_entry_is_under_path(entry.path.as_bytes(), &git_path))
8262            {
8263                restore_index_entry(worktree_root, git_dir, format, &db, entry, smudge_config)?;
8264                restored.insert(entry.path.clone());
8265                matched = true;
8266            }
8267        }
8268        if !matched {
8269            eprintln!(
8270                "error: pathspec '{}' did not match any file(s) known to git",
8271                path.display()
8272            );
8273            return Err(GitError::Exit(1));
8274        }
8275    }
8276    Ok(RestoreResult {
8277        restored: restored.len(),
8278    })
8279}
8280
8281pub fn restore_index_paths_from_head(
8282    worktree_root: impl AsRef<Path>,
8283    git_dir: impl AsRef<Path>,
8284    format: ObjectFormat,
8285    paths: &[PathBuf],
8286) -> Result<RestoreResult> {
8287    let worktree_root = worktree_root.as_ref();
8288    let git_dir = git_dir.as_ref();
8289    let index_path = repository_index_path(git_dir);
8290    let index = if index_path.exists() {
8291        Index::parse(&fs::read(&index_path)?, format)?
8292    } else {
8293        Index {
8294            version: 2,
8295            entries: Vec::new(),
8296            extensions: Vec::new(),
8297            checksum: None,
8298        }
8299    };
8300    let db = FileObjectDatabase::from_git_dir(git_dir, format);
8301    let head_entries = head_tree_entries(git_dir, format, &db)?;
8302    restore_index_paths_from_entries(
8303        worktree_root,
8304        git_dir,
8305        format,
8306        &db,
8307        index,
8308        &head_entries,
8309        paths,
8310    )
8311}
8312
8313pub fn restore_index_paths_from_tree(
8314    worktree_root: impl AsRef<Path>,
8315    git_dir: impl AsRef<Path>,
8316    format: ObjectFormat,
8317    tree_oid: &ObjectId,
8318    paths: &[PathBuf],
8319) -> Result<RestoreResult> {
8320    let worktree_root = worktree_root.as_ref();
8321    let git_dir = git_dir.as_ref();
8322    let index_path = repository_index_path(git_dir);
8323    let index = if index_path.exists() {
8324        Index::parse(&fs::read(&index_path)?, format)?
8325    } else {
8326        Index {
8327            version: 2,
8328            entries: Vec::new(),
8329            extensions: Vec::new(),
8330            checksum: None,
8331        }
8332    };
8333    let db = FileObjectDatabase::from_git_dir(git_dir, format);
8334    let source_entries = tree_entries(&db, format, tree_oid)?;
8335    restore_index_paths_from_entries(
8336        worktree_root,
8337        git_dir,
8338        format,
8339        &db,
8340        index,
8341        &source_entries,
8342        paths,
8343    )
8344}
8345
8346fn restore_index_paths_from_entries(
8347    worktree_root: &Path,
8348    git_dir: &Path,
8349    format: ObjectFormat,
8350    db: &FileObjectDatabase,
8351    index: Index,
8352    source_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
8353    paths: &[PathBuf],
8354) -> Result<RestoreResult> {
8355    let mut index_entries = index
8356        .entries
8357        .into_iter()
8358        .map(|entry| (entry.path.as_bytes().to_vec(), entry))
8359        .collect::<BTreeMap<_, _>>();
8360    let mut restored = BTreeSet::new();
8361    for path in paths {
8362        let absolute = if path.is_absolute() {
8363            path.clone()
8364        } else {
8365            worktree_root.join(path)
8366        };
8367        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
8368            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
8369        })?;
8370        let git_path = git_path_bytes(relative)?;
8371        let recursive = path == Path::new(".")
8372            || path.to_string_lossy().ends_with('/')
8373            || absolute.is_dir()
8374            || index_entries
8375                .keys()
8376                .any(|entry| index_entry_is_under_path(entry, &git_path))
8377            || source_entries
8378                .keys()
8379                .any(|entry| index_entry_is_under_path(entry, &git_path));
8380        let mut matched_paths = BTreeSet::new();
8381        for path in index_entries.keys().chain(source_entries.keys()) {
8382            if path.as_slice() == git_path.as_slice()
8383                || (recursive && index_entry_is_under_path(path, &git_path))
8384            {
8385                matched_paths.insert(path.clone());
8386            }
8387        }
8388        if matched_paths.is_empty() {
8389            eprintln!(
8390                "error: pathspec '{}' did not match any file(s) known to git",
8391                path.display()
8392            );
8393            return Err(GitError::Exit(1));
8394        }
8395        for path in matched_paths {
8396            if let Some(entry) = source_entries.get(&path) {
8397                // git's pathspec reset (`reset_index` → diff against the source
8398                // tree) only rewrites entries that actually CHANGE: an entry whose
8399                // oid and mode already equal the source is left untouched, so its
8400                // cached stat is preserved and `git diff-files` stays clean (t7102
8401                // "resetting an unmodified path is a no-op"). Only when the entry
8402                // genuinely changes does git write a fresh, stat-zeroed entry.
8403                let unchanged = index_entries
8404                    .get(&path)
8405                    .is_some_and(|existing| existing.oid == entry.oid && existing.mode == entry.mode);
8406                if !unchanged {
8407                    index_entries.insert(
8408                        path.clone(),
8409                        restored_head_index_entry(worktree_root, db, &path, entry)?,
8410                    );
8411                }
8412            } else {
8413                index_entries.remove(&path);
8414            }
8415            restored.insert(path);
8416        }
8417    }
8418    let mut entries = index_entries.into_values().collect::<Vec<_>>();
8419    entries.sort_by(|left, right| left.path.cmp(&right.path));
8420    fs::write(
8421        repository_index_path(git_dir),
8422        Index {
8423            version: 2,
8424            entries,
8425            extensions: Vec::new(),
8426            checksum: None,
8427        }
8428        .write(format)?,
8429    )?;
8430    Ok(RestoreResult {
8431        restored: restored.len(),
8432    })
8433}
8434
8435pub fn restore_index_and_worktree_paths_from_head(
8436    worktree_root: impl AsRef<Path>,
8437    git_dir: impl AsRef<Path>,
8438    format: ObjectFormat,
8439    paths: &[PathBuf],
8440) -> Result<RestoreResult> {
8441    let worktree_root = worktree_root.as_ref();
8442    let git_dir = git_dir.as_ref();
8443    let index_path = repository_index_path(git_dir);
8444    let index = if index_path.exists() {
8445        Index::parse(&fs::read(&index_path)?, format)?
8446    } else {
8447        Index {
8448            version: 2,
8449            entries: Vec::new(),
8450            extensions: Vec::new(),
8451            checksum: None,
8452        }
8453    };
8454    let db = FileObjectDatabase::from_git_dir(git_dir, format);
8455    let head_entries = head_tree_entries(git_dir, format, &db)?;
8456    restore_index_and_worktree_paths_from_entries(
8457        worktree_root,
8458        git_dir,
8459        format,
8460        &db,
8461        index,
8462        &head_entries,
8463        paths,
8464    )
8465}
8466
8467pub fn restore_index_and_worktree_paths_from_tree(
8468    worktree_root: impl AsRef<Path>,
8469    git_dir: impl AsRef<Path>,
8470    format: ObjectFormat,
8471    tree_oid: &ObjectId,
8472    paths: &[PathBuf],
8473) -> Result<RestoreResult> {
8474    let worktree_root = worktree_root.as_ref();
8475    let git_dir = git_dir.as_ref();
8476    let index_path = repository_index_path(git_dir);
8477    let index = if index_path.exists() {
8478        Index::parse(&fs::read(&index_path)?, format)?
8479    } else {
8480        Index {
8481            version: 2,
8482            entries: Vec::new(),
8483            extensions: Vec::new(),
8484            checksum: None,
8485        }
8486    };
8487    let db = FileObjectDatabase::from_git_dir(git_dir, format);
8488    let source_entries = tree_entries(&db, format, tree_oid)?;
8489    restore_index_and_worktree_paths_from_entries(
8490        worktree_root,
8491        git_dir,
8492        format,
8493        &db,
8494        index,
8495        &source_entries,
8496        paths,
8497    )
8498}
8499
8500fn restore_index_and_worktree_paths_from_entries(
8501    worktree_root: &Path,
8502    git_dir: &Path,
8503    format: ObjectFormat,
8504    db: &FileObjectDatabase,
8505    index: Index,
8506    source_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
8507    paths: &[PathBuf],
8508) -> Result<RestoreResult> {
8509    let mut index_entries = index
8510        .entries
8511        .into_iter()
8512        .map(|entry| (entry.path.as_bytes().to_vec(), entry))
8513        .collect::<BTreeMap<_, _>>();
8514    let mut restored = BTreeSet::new();
8515    for path in paths {
8516        let absolute = if path.is_absolute() {
8517            path.clone()
8518        } else {
8519            worktree_root.join(path)
8520        };
8521        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
8522            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
8523        })?;
8524        let git_path = git_path_bytes(relative)?;
8525        let recursive = path == Path::new(".")
8526            || path.to_string_lossy().ends_with('/')
8527            || absolute.is_dir()
8528            || index_entries
8529                .keys()
8530                .any(|entry| index_entry_is_under_path(entry, &git_path))
8531            || source_entries
8532                .keys()
8533                .any(|entry| index_entry_is_under_path(entry, &git_path));
8534        let mut matched_paths = BTreeSet::new();
8535        for path in index_entries.keys().chain(source_entries.keys()) {
8536            if path.as_slice() == git_path.as_slice()
8537                || (recursive && index_entry_is_under_path(path, &git_path))
8538            {
8539                matched_paths.insert(path.clone());
8540            }
8541        }
8542        if matched_paths.is_empty() {
8543            eprintln!(
8544                "error: pathspec '{}' did not match any file(s) known to git",
8545                path.display()
8546            );
8547            return Err(GitError::Exit(1));
8548        }
8549        for path in matched_paths {
8550            if let Some(entry) = source_entries.get(&path) {
8551                index_entries.insert(
8552                    path.clone(),
8553                    restore_head_entry_to_worktree_and_index(worktree_root, db, &path, entry)?,
8554                );
8555            } else {
8556                index_entries.remove(&path);
8557                remove_worktree_file(worktree_root, &path)?;
8558            }
8559            restored.insert(path);
8560        }
8561    }
8562    let mut entries = index_entries.into_values().collect::<Vec<_>>();
8563    entries.sort_by(|left, right| left.path.cmp(&right.path));
8564    fs::write(
8565        repository_index_path(git_dir),
8566        Index {
8567            version: 2,
8568            entries,
8569            extensions: Vec::new(),
8570            checksum: None,
8571        }
8572        .write(format)?,
8573    )?;
8574    Ok(RestoreResult {
8575        restored: restored.len(),
8576    })
8577}
8578
8579pub fn reset_index_and_worktree_to_commit(
8580    worktree_root: impl AsRef<Path>,
8581    git_dir: impl AsRef<Path>,
8582    format: ObjectFormat,
8583    commit_oid: &ObjectId,
8584) -> Result<RestoreResult> {
8585    let worktree_root = worktree_root.as_ref();
8586    let git_dir = git_dir.as_ref();
8587    let db = FileObjectDatabase::from_git_dir(git_dir, format);
8588    let commit = read_commit(&db, format, commit_oid)?;
8589    let mut target_entries = BTreeMap::new();
8590    collect_tree_entries(&db, format, &commit.tree, &mut target_entries)?;
8591
8592    for path in read_index_entries(git_dir, format)?.keys() {
8593        if !target_entries.contains_key(path) {
8594            remove_worktree_file(worktree_root, path)?;
8595        }
8596    }
8597
8598    let mut index_entries = Vec::new();
8599    for (path, entry) in &target_entries {
8600        index_entries.push(materialize_tree_entry(&db, worktree_root, path, entry)?);
8601    }
8602    index_entries.sort_by(|left, right| left.path.cmp(&right.path));
8603    fs::write(
8604        repository_index_path(git_dir),
8605        Index {
8606            version: 2,
8607            entries: index_entries,
8608            extensions: Vec::new(),
8609            checksum: None,
8610        }
8611        .write(format)?,
8612    )?;
8613    Ok(RestoreResult {
8614        restored: target_entries.len(),
8615    })
8616}
8617
8618/// Write one target tree entry into the worktree and return its index entry —
8619/// the shared materialization step for every checkout/reset worktree rebuild.
8620///
8621/// Gitlinks (mode 160000) never touch the object database: their oid names a
8622/// commit in the *submodule's* repository, not an object here. Upstream
8623/// (entry.c `write_entry` S_IFGITLINK) just mkdirs the path — an
8624/// already-populated submodule is left untouched (EEXIST is success) — and
8625/// records the oid in the index with a zeroed stat so status re-evaluates the
8626/// gitlink against the embedded repository's HEAD.
8627fn materialize_tree_entry(
8628    db: &FileObjectDatabase,
8629    worktree_root: &Path,
8630    path: &[u8],
8631    entry: &TrackedEntry,
8632) -> Result<IndexEntry> {
8633    if entry.mode == 0o160000 {
8634        let dir_path = worktree_path(worktree_root, path)?;
8635        fs::create_dir_all(&dir_path)?;
8636        return Ok(IndexEntry {
8637            ctime_seconds: 0,
8638            ctime_nanoseconds: 0,
8639            mtime_seconds: 0,
8640            mtime_nanoseconds: 0,
8641            dev: 0,
8642            ino: 0,
8643            mode: entry.mode,
8644            uid: 0,
8645            gid: 0,
8646            size: 0,
8647            oid: entry.oid,
8648            flags: path.len().min(0x0fff) as u16,
8649            flags_extended: 0,
8650            path: BString::from(path),
8651        });
8652    }
8653    let object = read_expected_object(db, &entry.oid, ObjectType::Blob)?;
8654    let file_path = worktree_path(worktree_root, path)?;
8655    if let Some(parent) = file_path.parent() {
8656        fs::create_dir_all(parent)?;
8657    }
8658    fs::write(&file_path, &object.body)?;
8659    let metadata = fs::metadata(&file_path)?;
8660    let mut index_entry = index_entry_from_metadata(path.to_vec(), entry.oid, &metadata);
8661    index_entry.mode = entry.mode;
8662    Ok(index_entry)
8663}
8664
8665/// Materialize a tree object into the index and worktree.
8666pub fn checkout_tree_to_index_and_worktree(
8667    worktree_root: impl AsRef<Path>,
8668    git_dir: impl AsRef<Path>,
8669    format: ObjectFormat,
8670    tree_oid: &ObjectId,
8671) -> Result<RestoreResult> {
8672    let worktree_root = worktree_root.as_ref();
8673    let git_dir = git_dir.as_ref();
8674    let db = FileObjectDatabase::from_git_dir(git_dir, format);
8675    let mut target_entries = BTreeMap::new();
8676    collect_tree_entries(&db, format, tree_oid, &mut target_entries)?;
8677
8678    for path in read_index_entries(git_dir, format)?.keys() {
8679        if !target_entries.contains_key(path) {
8680            remove_worktree_file(worktree_root, path)?;
8681        }
8682    }
8683
8684    let mut index_entries = Vec::new();
8685    for (path, entry) in &target_entries {
8686        index_entries.push(materialize_tree_entry(&db, worktree_root, path, entry)?);
8687    }
8688    index_entries.sort_by(|left, right| left.path.cmp(&right.path));
8689    fs::write(
8690        repository_index_path(git_dir),
8691        Index {
8692            version: 2,
8693            entries: index_entries,
8694            extensions: Vec::new(),
8695            checksum: None,
8696        }
8697        .write(format)?,
8698    )?;
8699    Ok(RestoreResult {
8700        restored: target_entries.len(),
8701    })
8702}
8703
8704pub fn reset_index_to_commit(
8705    worktree_root: impl AsRef<Path>,
8706    git_dir: impl AsRef<Path>,
8707    format: ObjectFormat,
8708    commit_oid: &ObjectId,
8709) -> Result<RestoreResult> {
8710    let worktree_root = worktree_root.as_ref();
8711    let git_dir = git_dir.as_ref();
8712    let db = FileObjectDatabase::from_git_dir(git_dir, format);
8713    let commit = read_commit(&db, format, commit_oid)?;
8714    let mut target_entries = BTreeMap::new();
8715    collect_tree_entries(&db, format, &commit.tree, &mut target_entries)?;
8716    // git's `reset --mixed` preserves the skip-worktree bit on entries that survive
8717    // the reset (t7102 "--mixed preserves skip-worktree"): carry it forward from the
8718    // pre-reset index keyed by path, so reconstructed entries keep CE_SKIP_WORKTREE.
8719    let index_path = repository_index_path(git_dir);
8720    let prior_skip_worktree: BTreeSet<Vec<u8>> = match fs::read(&index_path) {
8721        Ok(bytes) => Index::parse(&bytes, format)?
8722            .entries
8723            .iter()
8724            .filter(|entry| entry.is_skip_worktree())
8725            .map(|entry| entry.path.as_bytes().to_vec())
8726            .collect(),
8727        Err(err) if err.kind() == std::io::ErrorKind::NotFound => BTreeSet::new(),
8728        Err(err) => return Err(err.into()),
8729    };
8730    let mut index_entries = Vec::new();
8731    for (path, entry) in &target_entries {
8732        let mut restored = restored_head_index_entry(worktree_root, &db, path, entry)?;
8733        if prior_skip_worktree.contains(path) {
8734            restored.set_skip_worktree(true);
8735        }
8736        index_entries.push(restored);
8737    }
8738    index_entries.sort_by(|left, right| left.path.cmp(&right.path));
8739    let mut index = Index {
8740        version: 2,
8741        entries: index_entries,
8742        extensions: Vec::new(),
8743        checksum: None,
8744    };
8745    index.upgrade_version_for_flags();
8746    fs::write(&index_path, index.write(format)?)?;
8747    Ok(RestoreResult {
8748        restored: target_entries.len(),
8749    })
8750}
8751
8752/// Build a fresh in-memory index that mirrors the tree `tree_oid`, the way
8753/// `git read-tree <tree>` does: every blob, symlink, and gitlink leaf (found by
8754/// recursing subtrees) becomes a stage-0 entry carrying the tree mode and oid,
8755/// with a fully zeroed stat (so nothing is treated as stat-clean) and size 0.
8756/// Entries are sorted by path; the index is version 2 with no extensions.
8757///
8758/// This does not touch the worktree or write anything to disk — serialize the
8759/// result with [`Index::write`] (and persist it) when you want to replace
8760/// `.git/index`.
8761pub fn index_from_tree(
8762    db: &FileObjectDatabase,
8763    format: ObjectFormat,
8764    tree_oid: &ObjectId,
8765) -> Result<Index> {
8766    let mut entries: Vec<IndexEntry> = Vec::new();
8767    if *tree_oid != ObjectId::empty_tree(format) {
8768        let mut tree_entries = BTreeMap::new();
8769        collect_tree_entries(db, format, tree_oid, &mut tree_entries)?;
8770        entries.reserve(tree_entries.len());
8771        for (path, entry) in tree_entries {
8772            let name_len = (path.len().min(0x0fff)) as u16;
8773            entries.push(IndexEntry {
8774                ctime_seconds: 0,
8775                ctime_nanoseconds: 0,
8776                mtime_seconds: 0,
8777                mtime_nanoseconds: 0,
8778                dev: 0,
8779                ino: 0,
8780                mode: entry.mode,
8781                uid: 0,
8782                gid: 0,
8783                size: 0,
8784                oid: entry.oid,
8785                flags: name_len,
8786                flags_extended: 0,
8787                path: path.into(),
8788            });
8789        }
8790    }
8791    // git orders index entries by path bytes; BTreeMap already yields that, but
8792    // sort explicitly so the contract holds regardless of how entries arrive.
8793    entries.sort_by(|left, right| left.path.cmp(&right.path));
8794    Ok(Index {
8795        version: 2,
8796        entries,
8797        extensions: Vec::new(),
8798        checksum: None,
8799    })
8800}
8801
8802/// Enforces a [`SparseCheckout`] against the current index and worktree.
8803///
8804/// Every stage-0 index entry is classified with the sparse patterns (see
8805/// [`SparseCheckoutMode`] for the matching semantics):
8806///
8807/// * **In cone**: the skip-worktree bit is cleared and, if the worktree file is
8808///   missing, it is re-materialized from the entry's blob in the object
8809///   database. Existing worktree files are left untouched so local content is
8810///   preserved.
8811/// * **Out of cone**: the skip-worktree bit is set and any existing worktree
8812///   file is removed (empty parent directories are pruned).
8813///
8814/// Conflicted entries (stage != 0) are never given the skip-worktree bit and
8815/// are left alone, matching upstream Git. The index is rewritten in place.
8816pub fn apply_sparse_checkout(
8817    worktree_root: impl AsRef<Path>,
8818    git_dir: impl AsRef<Path>,
8819    format: ObjectFormat,
8820    sparse: &SparseCheckout,
8821) -> Result<ApplySparseResult> {
8822    apply_sparse_checkout_with_mode(
8823        worktree_root,
8824        git_dir,
8825        format,
8826        sparse,
8827        SparseCheckoutMode::Auto,
8828    )
8829}
8830
8831/// Like [`apply_sparse_checkout`] but lets the caller force the pattern
8832/// interpretation instead of auto-detecting it.
8833pub fn apply_sparse_checkout_with_mode(
8834    worktree_root: impl AsRef<Path>,
8835    git_dir: impl AsRef<Path>,
8836    format: ObjectFormat,
8837    sparse: &SparseCheckout,
8838    mode: SparseCheckoutMode,
8839) -> Result<ApplySparseResult> {
8840    let worktree_root = worktree_root.as_ref();
8841    let git_dir = git_dir.as_ref();
8842    let index_path = repository_index_path(git_dir);
8843    let mut index = if index_path.exists() {
8844        Index::parse(&fs::read(&index_path)?, format)?
8845    } else {
8846        return Ok(ApplySparseResult {
8847            materialized: Vec::new(),
8848            skipped: Vec::new(),
8849            not_up_to_date: Vec::new(),
8850        });
8851    };
8852    let matcher = SparseMatcher::new(sparse, mode);
8853    let db = FileObjectDatabase::from_git_dir(git_dir, format);
8854    let mut materialized = Vec::new();
8855    let mut skipped = Vec::new();
8856    let mut not_up_to_date = Vec::new();
8857    for entry in &mut index.entries {
8858        // Never touch conflicted entries.
8859        if index_entry_stage(entry) != 0 {
8860            continue;
8861        }
8862        if matcher.includes_file(entry.path.as_bytes()) {
8863            clear_skip_worktree(entry);
8864            let file_path = worktree_path(worktree_root, entry.path.as_bytes())?;
8865            if !file_path.exists() {
8866                materialize_index_entry_file(&db, &file_path, entry)?;
8867            }
8868            materialized.push(entry.path.as_bytes().to_vec());
8869        } else {
8870            // The path is out of cone, so its worktree file should be removed and
8871            // the entry marked skip-worktree. But git refuses to delete a file
8872            // that is *not up to date* with the index (e.g. one that reappeared in
8873            // the worktree after the path was already sparse): it leaves the file,
8874            // leaves the skip-worktree bit clear, and reports the path in its "not
8875            // up to date" warning. Mirror that to avoid silent data loss.
8876            let file_path = worktree_path(worktree_root, entry.path.as_bytes())?;
8877            match fs::symlink_metadata(&file_path) {
8878                Ok(metadata) if !worktree_entry_is_uptodate(entry, &metadata) => {
8879                    clear_skip_worktree(entry);
8880                    not_up_to_date.push(entry.path.as_bytes().to_vec());
8881                }
8882                _ => {
8883                    set_skip_worktree(entry);
8884                    remove_worktree_file(worktree_root, entry.path.as_bytes())?;
8885                    skipped.push(entry.path.as_bytes().to_vec());
8886                }
8887            }
8888        }
8889    }
8890    not_up_to_date.sort();
8891    normalize_index_version_for_extended_flags(&mut index);
8892    fs::write(index_path, index.write(format)?)?;
8893    Ok(ApplySparseResult {
8894        materialized,
8895        skipped,
8896        not_up_to_date,
8897    })
8898}
8899
8900/// Whether the worktree file described by `metadata` is up to date with `entry`'s
8901/// cached index stat, using the size + mtime heuristic at the core of git's
8902/// `ie_match_stat`. A freshly-checked-out (clean) file matches; a file that was
8903/// deleted and later recreated — as happens when an out-of-cone path reappears in
8904/// the worktree — gets a fresh mtime and so reads as modified, which is exactly
8905/// the state git declines to overwrite during a sparse update.
8906fn worktree_entry_is_uptodate(entry: &IndexEntry, metadata: &fs::Metadata) -> bool {
8907    if u64::from(entry.size) != metadata.len() {
8908        return false;
8909    }
8910    let Some((mtime_seconds, mtime_nanoseconds)) = file_mtime_parts(metadata) else {
8911        // Without a usable mtime we cannot prove the file is clean; treat it as
8912        // not up to date so a present file is never silently discarded.
8913        return false;
8914    };
8915    u64::from(entry.mtime_seconds) == mtime_seconds
8916        && u64::from(entry.mtime_nanoseconds) == mtime_nanoseconds
8917}
8918
8919fn worktree_entry_ref_is_uptodate(entry: &IndexEntryRef<'_>, metadata: &fs::Metadata) -> bool {
8920    if u64::from(entry.size) != metadata.len() {
8921        return false;
8922    }
8923    let Some((mtime_seconds, mtime_nanoseconds)) = file_mtime_parts(metadata) else {
8924        return false;
8925    };
8926    u64::from(entry.mtime_seconds) == mtime_seconds
8927        && u64::from(entry.mtime_nanoseconds) == mtime_nanoseconds
8928}
8929
8930/// The file's modification time split into whole seconds and the sub-second
8931/// nanosecond remainder, matching how git stores `mtime` in the index.
8932fn file_mtime_parts(metadata: &fs::Metadata) -> Option<(u64, u64)> {
8933    let modified = metadata.modified().ok()?;
8934    let duration = modified.duration_since(UNIX_EPOCH).ok()?;
8935    Some((duration.as_secs(), u64::from(duration.subsec_nanos())))
8936}
8937
8938/// Write a git metadata file through a sibling `.lock` file and atomic rename.
8939///
8940/// This helper is intended for small repository/worktree metadata files such as
8941/// `HEAD`, `config.worktree`, or state files under `.git/`. It deliberately does
8942/// not try to replace object or pack writers, which have their own durability
8943/// and naming rules.
8944pub fn write_metadata_file_atomic(
8945    path: impl AsRef<Path>,
8946    bytes: &[u8],
8947    options: AtomicMetadataWriteOptions,
8948) -> Result<AtomicMetadataWriteResult> {
8949    let path = path.as_ref();
8950    let parent = path.parent().ok_or_else(|| {
8951        GitError::InvalidPath(format!("metadata path has no parent: {}", path.display()))
8952    })?;
8953    if !parent.as_os_str().is_empty() {
8954        fs::create_dir_all(parent)?;
8955    }
8956    let lock_path = metadata_lock_path(path)?;
8957    let mut lock = match fs::OpenOptions::new()
8958        .write(true)
8959        .create_new(true)
8960        .open(&lock_path)
8961    {
8962        Ok(lock) => lock,
8963        Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => {
8964            return Err(GitError::Transaction(format!(
8965                "metadata lock already exists: {}",
8966                lock_path.display()
8967            )));
8968        }
8969        Err(err) => return Err(err.into()),
8970    };
8971    if let Err(err) = lock.write_all(bytes) {
8972        let _ = fs::remove_file(&lock_path);
8973        return Err(err.into());
8974    }
8975    if options.fsync_file
8976        && let Err(err) = lock.sync_all()
8977    {
8978        let _ = fs::remove_file(&lock_path);
8979        return Err(err.into());
8980    }
8981    drop(lock);
8982    if let Err(err) = fs::rename(&lock_path, path) {
8983        let _ = fs::remove_file(&lock_path);
8984        return Err(err.into());
8985    }
8986    if options.fsync_dir
8987        && let Ok(dir) = fs::File::open(parent)
8988    {
8989        dir.sync_all()?;
8990    }
8991    let metadata = fs::metadata(path)?;
8992    Ok(AtomicMetadataWriteResult {
8993        path: path.to_path_buf(),
8994        len: metadata.len(),
8995        mtime: file_mtime_parts(&metadata),
8996    })
8997}
8998
8999fn metadata_lock_path(path: &Path) -> Result<PathBuf> {
9000    let file_name = path.file_name().ok_or_else(|| {
9001        GitError::InvalidPath(format!("metadata path has no filename: {}", path.display()))
9002    })?;
9003    let mut lock_name = file_name.to_os_string();
9004    lock_name.push(".lock");
9005    Ok(path.with_file_name(lock_name))
9006}
9007
9008/// Checks out `target` like [`checkout_detached`], but materializes the
9009/// worktree through the supplied [`SparseCheckout`]: out-of-cone paths are not
9010/// written, get their skip-worktree bit set, and have any stale worktree file
9011/// removed. Existing public checkout entry points are unchanged; this is an
9012/// additive sparse-aware variant.
9013///
9014/// The pattern interpretation is auto-detected ([`SparseCheckoutMode::Auto`]);
9015/// to reconcile an existing checkout under an explicit mode use
9016/// [`apply_sparse_checkout_with_mode`].
9017pub fn checkout_detached_sparse(
9018    worktree_root: impl AsRef<Path>,
9019    git_dir: impl AsRef<Path>,
9020    format: ObjectFormat,
9021    target: &ObjectId,
9022    committer: Vec<u8>,
9023    message: Vec<u8>,
9024    sparse: &SparseCheckout,
9025) -> Result<CheckoutResult> {
9026    let worktree_root = worktree_root.as_ref();
9027    let git_dir = git_dir.as_ref();
9028    let files = checkout_commit_to_index_and_worktree_sparse(
9029        worktree_root,
9030        git_dir,
9031        format,
9032        target,
9033        Some((sparse, SparseCheckoutMode::Auto)),
9034    )?;
9035    let refs = FileRefStore::new(git_dir, format);
9036    let zero = ObjectId::null(format);
9037    let mut tx = refs.transaction();
9038    tx.update(RefUpdate {
9039        name: "HEAD".into(),
9040        expected: None,
9041        new: RefTarget::Direct(*target),
9042        reflog: Some(ReflogEntry {
9043            old_oid: zero,
9044            new_oid: *target,
9045            committer,
9046            message,
9047        }),
9048    });
9049    tx.commit()?;
9050    Ok(CheckoutResult {
9051        branch: target.to_string(),
9052        oid: *target,
9053        files,
9054    })
9055}
9056
9057fn materialize_index_entry_file(
9058    db: &FileObjectDatabase,
9059    file_path: &Path,
9060    entry: &IndexEntry,
9061) -> Result<()> {
9062    let object = read_expected_object(db, &entry.oid, ObjectType::Blob)?;
9063    if let Some(parent) = file_path.parent() {
9064        fs::create_dir_all(parent)?;
9065    }
9066    fs::write(file_path, &object.body)?;
9067    Ok(())
9068}
9069
9070fn set_skip_worktree(entry: &mut IndexEntry) {
9071    entry.flags |= INDEX_FLAG_EXTENDED;
9072    entry.flags_extended |= INDEX_EXTENDED_FLAG_SKIP_WORKTREE;
9073}
9074
9075fn clear_skip_worktree(entry: &mut IndexEntry) {
9076    entry.flags_extended &= !INDEX_EXTENDED_FLAG_SKIP_WORKTREE;
9077    if entry.flags_extended == 0 {
9078        entry.flags &= !INDEX_FLAG_EXTENDED;
9079    }
9080}
9081
9082pub fn restore_worktree_paths_from_head(
9083    worktree_root: impl AsRef<Path>,
9084    git_dir: impl AsRef<Path>,
9085    format: ObjectFormat,
9086    paths: &[PathBuf],
9087) -> Result<RestoreResult> {
9088    let worktree_root = worktree_root.as_ref();
9089    let git_dir = git_dir.as_ref();
9090    let index_path = repository_index_path(git_dir);
9091    let index = if index_path.exists() {
9092        Index::parse(&fs::read(&index_path)?, format)?
9093    } else {
9094        Index {
9095            version: 2,
9096            entries: Vec::new(),
9097            extensions: Vec::new(),
9098            checksum: None,
9099        }
9100    };
9101    let db = FileObjectDatabase::from_git_dir(git_dir, format);
9102    let head_entries = head_tree_entries(git_dir, format, &db)?;
9103    restore_worktree_paths_from_entries(worktree_root, &db, index, &head_entries, paths)
9104}
9105
9106pub fn restore_worktree_paths_from_tree(
9107    worktree_root: impl AsRef<Path>,
9108    git_dir: impl AsRef<Path>,
9109    format: ObjectFormat,
9110    tree_oid: &ObjectId,
9111    paths: &[PathBuf],
9112) -> Result<RestoreResult> {
9113    let worktree_root = worktree_root.as_ref();
9114    let git_dir = git_dir.as_ref();
9115    let index_path = repository_index_path(git_dir);
9116    let index = if index_path.exists() {
9117        Index::parse(&fs::read(&index_path)?, format)?
9118    } else {
9119        Index {
9120            version: 2,
9121            entries: Vec::new(),
9122            extensions: Vec::new(),
9123            checksum: None,
9124        }
9125    };
9126    let db = FileObjectDatabase::from_git_dir(git_dir, format);
9127    let source_entries = tree_entries(&db, format, tree_oid)?;
9128    restore_worktree_paths_from_entries(worktree_root, &db, index, &source_entries, paths)
9129}
9130
9131fn restore_worktree_paths_from_entries(
9132    worktree_root: &Path,
9133    db: &FileObjectDatabase,
9134    index: Index,
9135    source_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
9136    paths: &[PathBuf],
9137) -> Result<RestoreResult> {
9138    let index_entries = index
9139        .entries
9140        .into_iter()
9141        .map(|entry| entry.path.into_bytes())
9142        .collect::<BTreeSet<_>>();
9143    let mut restored = BTreeSet::new();
9144    for path in paths {
9145        let absolute = if path.is_absolute() {
9146            path.clone()
9147        } else {
9148            worktree_root.join(path)
9149        };
9150        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
9151            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
9152        })?;
9153        let git_path = git_path_bytes(relative)?;
9154        let recursive = path == Path::new(".")
9155            || path.to_string_lossy().ends_with('/')
9156            || absolute.is_dir()
9157            || index_entries
9158                .iter()
9159                .any(|entry| index_entry_is_under_path(entry, &git_path))
9160            || source_entries
9161                .keys()
9162                .any(|entry| index_entry_is_under_path(entry, &git_path));
9163        let mut matched_paths = BTreeSet::new();
9164        for path in index_entries.iter().chain(source_entries.keys()) {
9165            if path.as_slice() == git_path.as_slice()
9166                || (recursive && index_entry_is_under_path(path, &git_path))
9167            {
9168                matched_paths.insert(path.clone());
9169            }
9170        }
9171        if matched_paths.is_empty() {
9172            eprintln!(
9173                "error: pathspec '{}' did not match any file(s) known to git",
9174                path.display()
9175            );
9176            return Err(GitError::Exit(1));
9177        }
9178        for path in matched_paths {
9179            if let Some(entry) = source_entries.get(&path) {
9180                restore_head_entry_to_worktree(worktree_root, db, &path, entry)?;
9181            } else {
9182                remove_worktree_file(worktree_root, &path)?;
9183            }
9184            restored.insert(path);
9185        }
9186    }
9187    Ok(RestoreResult {
9188        restored: restored.len(),
9189    })
9190}
9191
9192pub fn remove_index_and_worktree_paths(
9193    worktree_root: impl AsRef<Path>,
9194    git_dir: impl AsRef<Path>,
9195    format: ObjectFormat,
9196    paths: &[PathBuf],
9197    options: RemoveOptions,
9198    config_parameters_env: Option<&str>,
9199) -> Result<RemoveResult> {
9200    let worktree_root = worktree_root.as_ref();
9201    let git_dir = git_dir.as_ref();
9202    let index_path = repository_index_path(git_dir);
9203    let index = if index_path.exists() {
9204        Index::parse(&fs::read(&index_path)?, format)?
9205    } else {
9206        Index {
9207            version: 2,
9208            entries: Vec::new(),
9209            extensions: Vec::new(),
9210            checksum: None,
9211        }
9212    };
9213    let db = FileObjectDatabase::from_git_dir(git_dir, format);
9214    let head_entries = head_tree_entries(git_dir, format, &db)?;
9215    // Stat cache for the local-modification check (git's `ie_match_stat`):
9216    // proves a path unchanged from the cached stat without reading its blob, so
9217    // a `git rm --cached` of an untouched path whose blob was removed still
9218    // succeeds (cf. t1450-fsck cell 90). (`sley_index::IndexStatCache` is a
9219    // distinct type from this crate's same-named probe helper above.)
9220    let rm_stat_cache = sley_index::IndexStatCache::from_index(&index, &index_path);
9221    let Index {
9222        version: index_version,
9223        entries: index_entry_list,
9224        extensions: index_extensions,
9225        ..
9226    } = index;
9227    // The set of distinct index paths (any stage) — used for membership tests.
9228    let index_paths: BTreeSet<Vec<u8>> = index_entry_list
9229        .iter()
9230        .map(|entry| entry.path.as_bytes().to_vec())
9231        .collect();
9232    // Paths selected for removal. A single selected path removes ALL of its
9233    // stage entries (so resolving an unmerged path by removal drops stages
9234    // 1/2/3 together), matching git's name-keyed removal.
9235    let mut selected = BTreeSet::new();
9236    for path in paths {
9237        let absolute = if path.is_absolute() {
9238            path.clone()
9239        } else {
9240            worktree_root.join(path)
9241        };
9242        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
9243            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
9244        })?;
9245        // A pathspec with a trailing slash (e.g. `git rm dir/`) only matches a
9246        // directory: it must never match a same-named tracked file. `Path`'s
9247        // component iterator drops the slash, so capture it before it is lost.
9248        let has_trailing_slash = path_has_trailing_separator(&absolute);
9249        let git_path = git_path_bytes(relative)?;
9250        if !has_trailing_slash && index_paths.contains(&git_path) {
9251            selected.insert(git_path);
9252            continue;
9253        }
9254        // A wildcard pathspec (e.g. `git rm "*"` or `git rm "dir/*.c"`) matches
9255        // index entries by git's pathspec matcher rather than by literal path or
9256        // directory prefix. Try the glob match first when the spec contains
9257        // wildcard metacharacters; a glob match removes the entries directly
9258        // (no `-r` needed — the pathspec already names the files).
9259        if pathspec_is_glob(&git_path) {
9260            let glob_matched = index_paths
9261                .iter()
9262                .filter(|entry| {
9263                    pathspec_item_matches(&git_path, entry, PathspecMatchMagic::default())
9264                })
9265                .cloned()
9266                .collect::<Vec<_>>();
9267            if !glob_matched.is_empty() {
9268                selected.extend(glob_matched);
9269                continue;
9270            }
9271            if options.ignore_unmatch {
9272                continue;
9273            }
9274            eprintln!(
9275                "fatal: pathspec '{}' did not match any files",
9276                String::from_utf8_lossy(&git_path)
9277            );
9278            return Err(GitError::Exit(128));
9279        }
9280        let matched = index_paths
9281            .iter()
9282            .filter(|entry| index_entry_is_under_path(entry, &git_path))
9283            .cloned()
9284            .collect::<Vec<_>>();
9285        if matched.is_empty() {
9286            if options.ignore_unmatch {
9287                continue;
9288            }
9289            eprintln!(
9290                "fatal: pathspec '{}' did not match any files",
9291                String::from_utf8_lossy(&git_path)
9292            );
9293            return Err(GitError::Exit(128));
9294        }
9295        if !options.recursive {
9296            eprintln!(
9297                "fatal: not removing '{}' recursively without -r",
9298                String::from_utf8_lossy(&git_path)
9299            );
9300            return Err(GitError::Exit(128));
9301        }
9302        selected.extend(matched);
9303    }
9304
9305    // `git rm` runs the local-modification safety check unless `-f` is given —
9306    // even for `--cached`. The check (a faithful port of builtin/rm.c's
9307    // `check_local_mod`) buckets each selected path into one of three error
9308    // classes and prints all of them at once (collected, not fail-fast), so a
9309    // single `git rm a b c` reports every offending path. See the message
9310    // assertions in t3600-rm.sh.
9311    if !options.force {
9312        let config =
9313            sley_config::read_repo_config(git_dir, config_parameters_env).unwrap_or_default();
9314        // advice.rmhints (default true) gates the parenthetical "(use ...)" hints.
9315        let show_hints = config
9316            .get_bool("advice", None, "rmhints")
9317            .unwrap_or(true);
9318        // Map each selected path to its stage-0 index entry for the check; an
9319        // unmerged path (no stage 0) is skipped, exactly like git's loop
9320        // (index_name_pos fails, and a non-gitlink ours entry `continue`s).
9321        let stage0: BTreeMap<&[u8], &IndexEntry> = index_entry_list
9322            .iter()
9323            .filter(|entry| entry.stage() == Stage::Normal)
9324            .map(|entry| (entry.path.as_bytes(), entry))
9325            .collect();
9326        let mut files_staged: Vec<&[u8]> = Vec::new();
9327        let mut files_cached: Vec<&[u8]> = Vec::new();
9328        let mut files_local: Vec<&[u8]> = Vec::new();
9329        for path in &selected {
9330            let Some(index_entry) = stage0.get(path.as_slice()) else {
9331                // Unmerged path with no stage-0 entry: resolving by removal is
9332                // safe and not warning-worthy.
9333                continue;
9334            };
9335            let worktree_file = worktree_path(worktree_root, path)?;
9336            // Is the worktree path different from the index?
9337            //
9338            // Mirror builtin/rm.c's `check_local_mod`: when `lstat` fails with a
9339            // "missing file" error (ENOENT *or* ENOTDIR — the path vanished, or a
9340            // leading component became a file) the file has already gone from the
9341            // working tree, so git `continue`s and never buckets the path. Same
9342            // for a tracked plain path that is now a directory on disk: git
9343            // treats that as ENOENT and skips it (the later worktree-removal step
9344            // is what fails on a non-empty directory).
9345            let local_changes = match fs::symlink_metadata(&worktree_file) {
9346                Err(err)
9347                    if matches!(
9348                        err.kind(),
9349                        std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
9350                    ) || err.raw_os_error() == Some(20) =>
9351                {
9352                    // ENOENT/ENOTDIR: already gone — not warning-worthy.
9353                    continue;
9354                }
9355                Err(err) => return Err(err.into()),
9356                Ok(meta) if meta.is_dir() => continue,
9357                Ok(meta) => {
9358                    // git refreshes the index before `check_local_mod`, so a path
9359                    // whose stat changed but whose content is unchanged is up to
9360                    // date. We mirror that: a clean cached stat short-circuits to
9361                    // "unchanged"; otherwise re-hash the (clean-filtered) worktree
9362                    // content and compare to the index entry's *cached oid* (git's
9363                    // refresh `hash_object`), NOT the stored blob. Comparing to the
9364                    // oid — not the blob bytes — means a removed object does not
9365                    // abort the check (the worktree may still hash to the cached
9366                    // oid), so `git rm --cached` of a path whose blob was deleted
9367                    // still succeeds.
9368                    match rm_stat_cache.index_entry_worktree_stat_verdict(index_entry, &meta) {
9369                        sley_index::StatVerdict::Clean => false,
9370                        sley_index::StatVerdict::Dirty
9371                        | sley_index::StatVerdict::RacyNeedsContentCheck => {
9372                            let worktree_bytes = apply_clean_filter(
9373                                worktree_root,
9374                                git_dir,
9375                                &config,
9376                                path,
9377                                &fs::read(&worktree_file)?,
9378                            )?;
9379                            let worktree_oid =
9380                                EncodedObject::new(ObjectType::Blob, worktree_bytes)
9381                                    .object_id(format)?;
9382                            worktree_oid != index_entry.oid
9383                        }
9384                    }
9385                }
9386            };
9387            // Is the index different from the HEAD commit? (Before the first
9388            // commit, anything staged is treated as changed from HEAD.)
9389            let staged_changes = match head_entries.get(path) {
9390                Some(head_entry) => {
9391                    head_entry.oid != index_entry.oid || head_entry.mode != index_entry.mode
9392                }
9393                None => true,
9394            };
9395            if local_changes && staged_changes {
9396                // `git rm --cached` of an intent-to-add entry is safe.
9397                if !options.cached || !index_entry.is_intent_to_add() {
9398                    files_staged.push(path);
9399                }
9400            } else if !options.cached {
9401                if staged_changes {
9402                    files_cached.push(path);
9403                }
9404                if local_changes {
9405                    files_local.push(path);
9406                }
9407            }
9408        }
9409        let mut errs = false;
9410        print_rm_error_files(
9411            &files_staged,
9412            "the following file has staged content different from both the\nfile and the HEAD:",
9413            "the following files have staged content different from both the\nfile and the HEAD:",
9414            "\n(use -f to force removal)",
9415            show_hints,
9416            &mut errs,
9417        );
9418        print_rm_error_files(
9419            &files_cached,
9420            "the following file has changes staged in the index:",
9421            "the following files have changes staged in the index:",
9422            "\n(use --cached to keep the file, or -f to force removal)",
9423            show_hints,
9424            &mut errs,
9425        );
9426        print_rm_error_files(
9427            &files_local,
9428            "the following file has local modifications:",
9429            "the following files have local modifications:",
9430            "\n(use --cached to keep the file, or -f to force removal)",
9431            show_hints,
9432            &mut errs,
9433        );
9434        if errs {
9435            return Err(GitError::Exit(1));
9436        }
9437    }
9438
9439    if options.dry_run {
9440        return Ok(RemoveResult {
9441            removed: selected.into_iter().collect(),
9442        });
9443    }
9444    // Mirror builtin/rm.c's ordering: remove the worktree files BEFORE writing
9445    // the new index. If the very first removal fails (and nothing has been
9446    // removed yet), abort without committing the index, so a `git rm d` where
9447    // `d` is now a non-empty directory fails AND leaves the index untouched.
9448    // Once any file has been removed we commit to finishing (git does the same).
9449    if !options.cached {
9450        let mut removed_any = false;
9451        for path in &selected {
9452            match remove_tracked_worktree_path(worktree_root, path)? {
9453                true => removed_any = true,
9454                false if !removed_any => {
9455                    eprintln!(
9456                        "fatal: git rm: '{}': Is a directory",
9457                        String::from_utf8_lossy(path)
9458                    );
9459                    return Err(GitError::Exit(128));
9460                }
9461                false => {}
9462            }
9463        }
9464    }
9465    // Keep every entry whose path was not selected, preserving original order
9466    // and all stages of unmerged paths that were not removed.
9467    let entries = index_entry_list
9468        .into_iter()
9469        .filter(|entry| !selected.contains(entry.path.as_bytes()))
9470        .collect::<Vec<_>>();
9471    // Removing entries invalidates the cache-tree (`TREE` extension): a stale
9472    // cached subtree id makes `git diff --cached`/`git status` short-circuit the
9473    // comparison of an affected directory against HEAD and miss the deletion
9474    // (observed: `git rm dir/nested.txt` left a valid `dir/` cache-tree, so the
9475    // deletion never showed in the cached diff). Git invalidates the cache-tree
9476    // on any index mutation; drop it so it is rebuilt on the next write, exactly
9477    // like the `add` path does above.
9478    let extensions = index_extensions_without_cache_tree(&index_extensions);
9479    fs::write(
9480        index_path,
9481        Index {
9482            version: index_version,
9483            entries,
9484            extensions,
9485            checksum: None,
9486        }
9487        .write(format)?,
9488    )?;
9489    Ok(RemoveResult {
9490        removed: selected.into_iter().collect(),
9491    })
9492}
9493
9494/// Remove a tracked path from the working tree, mirroring builtin/rm.c's
9495/// `remove_path`: unlink the file and prune now-empty parent directories.
9496/// Returns `Ok(true)` when a file was removed, `Ok(false)` when the path could
9497/// not be unlinked because it is a directory (the caller decides whether that
9498/// aborts the run). A path that has already vanished is a no-op success.
9499fn remove_tracked_worktree_path(root: &Path, path: &[u8]) -> Result<bool> {
9500    let file = worktree_path(root, path)?;
9501    match fs::symlink_metadata(&file) {
9502        Err(err)
9503            if matches!(
9504                err.kind(),
9505                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
9506            ) =>
9507        {
9508            return Ok(true);
9509        }
9510        Err(err) if err.raw_os_error() == Some(20) => return Ok(true), // ENOTDIR
9511        Err(err) => return Err(err.into()),
9512        // A directory in the worktree where a plain file is tracked cannot be
9513        // unlinked (git's remove_path fails on EISDIR). Report it so the caller
9514        // can abort the removal without committing the index.
9515        Ok(meta) if meta.is_dir() => return Ok(false),
9516        Ok(_) => {}
9517    }
9518    fs::remove_file(&file)?;
9519    prune_empty_parents(root, file.parent())?;
9520    Ok(true)
9521}
9522
9523/// Print one batched `git rm` safety error block (mirrors builtin/rm.c's
9524/// `print_error_files`): the main message, the indented list of offending
9525/// paths, and — when `advice.rmhints` is enabled — the trailing hint. Sets
9526/// `*errs` so the caller can fail after collecting every class.
9527fn print_rm_error_files(
9528    files: &[&[u8]],
9529    singular: &str,
9530    plural: &str,
9531    hint: &str,
9532    show_hints: bool,
9533    errs: &mut bool,
9534) {
9535    if files.is_empty() {
9536        return;
9537    }
9538    let mut message = String::from(if files.len() == 1 { singular } else { plural });
9539    for path in files {
9540        message.push_str("\n    ");
9541        message.push_str(&String::from_utf8_lossy(path));
9542    }
9543    if show_hints {
9544        message.push_str(hint);
9545    }
9546    eprintln!("error: {message}");
9547    *errs = true;
9548}
9549
9550pub fn move_index_and_worktree_path(
9551    worktree_root: impl AsRef<Path>,
9552    git_dir: impl AsRef<Path>,
9553    format: ObjectFormat,
9554    source: &Path,
9555    destination: &Path,
9556    options: MoveOptions,
9557) -> Result<MoveResult> {
9558    let worktree_root = worktree_root.as_ref();
9559    let git_dir = git_dir.as_ref();
9560    let index_path = repository_index_path(git_dir);
9561    let mut index = if index_path.exists() {
9562        Index::parse(&fs::read(&index_path)?, format)?
9563    } else {
9564        Index {
9565            version: 2,
9566            entries: Vec::new(),
9567            extensions: Vec::new(),
9568            checksum: None,
9569        }
9570    };
9571    let source_absolute = if source.is_absolute() {
9572        source.to_path_buf()
9573    } else {
9574        worktree_root.join(source)
9575    };
9576    let destination_absolute = if destination.is_absolute() {
9577        destination.to_path_buf()
9578    } else {
9579        worktree_root.join(destination)
9580    };
9581    let destination_absolute = if destination_absolute.is_dir() {
9582        let Some(file_name) = source_absolute.file_name() else {
9583            return Err(GitError::InvalidPath(format!(
9584                "invalid source path {}",
9585                source.display()
9586            )));
9587        };
9588        destination_absolute.join(file_name)
9589    } else {
9590        destination_absolute
9591    };
9592    let source_relative = source_absolute.strip_prefix(worktree_root).map_err(|_| {
9593        GitError::InvalidPath(format!("path {} is outside worktree", source.display()))
9594    })?;
9595    let destination_relative = destination_absolute
9596        .strip_prefix(worktree_root)
9597        .map_err(|_| {
9598            GitError::InvalidPath(format!(
9599                "path {} is outside worktree",
9600                destination.display()
9601            ))
9602        })?;
9603    let source_path = git_path_bytes(source_relative)?;
9604    let destination_path = git_path_bytes(destination_relative)?;
9605    let destination_has_trailing_separator = path_has_trailing_separator(&destination_absolute);
9606    if destination_has_trailing_separator && !destination_absolute.is_dir() {
9607        if options.skip_errors {
9608            return Ok(MoveResult {
9609                source: source_path,
9610                destination: destination_path,
9611                skipped: true,
9612                fatal: None,
9613                details: Vec::new(),
9614            });
9615        }
9616        let mut destination = String::from_utf8_lossy(&destination_path).into_owned();
9617        destination.push('/');
9618        if options.dry_run {
9619            let fatal = format!(
9620                "fatal: destination directory does not exist, source={}, destination={destination}",
9621                String::from_utf8_lossy(&source_path),
9622            );
9623            return Ok(MoveResult {
9624                source: source_path,
9625                destination: destination.clone().into_bytes(),
9626                skipped: false,
9627                fatal: Some(fatal),
9628                details: Vec::new(),
9629            });
9630        }
9631        eprintln!(
9632            "fatal: destination directory does not exist, source={}, destination={destination}",
9633            String::from_utf8_lossy(&source_path),
9634        );
9635        return Err(GitError::Exit(128));
9636    }
9637    if destination_absolute.exists() {
9638        if !options.force {
9639            if options.skip_errors {
9640                return Ok(MoveResult {
9641                    source: source_path,
9642                    destination: destination_path,
9643                    skipped: true,
9644                    fatal: None,
9645                    details: Vec::new(),
9646                });
9647            }
9648            if options.dry_run {
9649                let fatal = format!(
9650                    "fatal: destination exists, source={}, destination={}",
9651                    String::from_utf8_lossy(&source_path),
9652                    String::from_utf8_lossy(&destination_path)
9653                );
9654                return Ok(MoveResult {
9655                    source: source_path,
9656                    destination: destination_path,
9657                    skipped: false,
9658                    fatal: Some(fatal),
9659                    details: Vec::new(),
9660                });
9661            }
9662            eprintln!(
9663                "fatal: destination exists, source={}, destination={}",
9664                String::from_utf8_lossy(&source_path),
9665                String::from_utf8_lossy(&destination_path)
9666            );
9667            return Err(GitError::Exit(128));
9668        }
9669        if !options.dry_run && destination_absolute.is_dir() {
9670            fs::remove_dir_all(&destination_absolute)?;
9671        } else if !options.dry_run {
9672            fs::remove_file(&destination_absolute)?;
9673        }
9674    }
9675    let directory_prefix = {
9676        let mut prefix = source_path.clone();
9677        prefix.push(b'/');
9678        prefix
9679    };
9680    let directory_entries: Vec<_> = index
9681        .entries
9682        .iter()
9683        .filter(|entry| entry.path.as_bytes().starts_with(&directory_prefix))
9684        .cloned()
9685        .collect();
9686    if !directory_entries.is_empty() {
9687        let details: Vec<_> = directory_entries
9688            .iter()
9689            .map(|entry| {
9690                let suffix = &entry.path.as_bytes()[source_path.len()..];
9691                let mut destination = destination_path.clone();
9692                destination.extend_from_slice(suffix);
9693                MoveDetail {
9694                    source: entry.path.as_bytes().to_vec(),
9695                    destination,
9696                    skipped: false,
9697                }
9698            })
9699            .collect();
9700        if options.dry_run {
9701            return Ok(MoveResult {
9702                source: source_path,
9703                destination: destination_path,
9704                skipped: false,
9705                fatal: None,
9706                details,
9707            });
9708        }
9709        fs::rename(&source_absolute, &destination_absolute)?;
9710        let moved_paths: Vec<_> = details
9711            .iter()
9712            .map(|detail| detail.destination.clone())
9713            .collect();
9714        index.entries.retain(|entry| {
9715            !entry.path.as_bytes().starts_with(&directory_prefix)
9716                && !moved_paths
9717                    .iter()
9718                    .any(|m| m.as_slice() == entry.path.as_bytes())
9719        });
9720        for (source_entry, detail) in directory_entries.into_iter().zip(details.iter()) {
9721            let relative_path = git_path_to_relative_path(&detail.destination)?;
9722            let metadata = fs::metadata(worktree_root.join(relative_path))?;
9723            let mut destination_entry =
9724                index_entry_from_metadata(detail.destination.clone(), source_entry.oid, &metadata);
9725            destination_entry.mode = source_entry.mode;
9726            index.entries.push(destination_entry);
9727        }
9728        index
9729            .entries
9730            .sort_by(|left, right| left.path.cmp(&right.path));
9731        index.extensions.clear();
9732        fs::write(index_path, index.write(format)?)?;
9733        return Ok(MoveResult {
9734            source: source_path,
9735            destination: destination_path,
9736            skipped: false,
9737            fatal: None,
9738            details,
9739        });
9740    }
9741
9742    let Some(position) = index
9743        .entries
9744        .iter()
9745        .position(|entry| entry.path == source_path)
9746    else {
9747        if options.skip_errors {
9748            return Ok(MoveResult {
9749                source: source_path,
9750                destination: destination_path,
9751                skipped: true,
9752                fatal: None,
9753                details: Vec::new(),
9754            });
9755        }
9756        let source_kind = if source_absolute.exists() {
9757            "not under version control"
9758        } else {
9759            "bad source"
9760        };
9761        if options.dry_run {
9762            let fatal = format!(
9763                "fatal: {source_kind}, source={}, destination={}",
9764                String::from_utf8_lossy(&source_path),
9765                String::from_utf8_lossy(&destination_path)
9766            );
9767            return Ok(MoveResult {
9768                source: source_path,
9769                destination: destination_path,
9770                skipped: false,
9771                fatal: Some(fatal),
9772                details: Vec::new(),
9773            });
9774        }
9775        eprintln!(
9776            "fatal: {source_kind}, source={}, destination={}",
9777            String::from_utf8_lossy(&source_path),
9778            String::from_utf8_lossy(&destination_path)
9779        );
9780        return Err(GitError::Exit(128));
9781    };
9782    if options.dry_run {
9783        return Ok(MoveResult {
9784            source: source_path,
9785            destination: destination_path,
9786            skipped: false,
9787            fatal: None,
9788            details: Vec::new(),
9789        });
9790    }
9791    if let Some(parent) = destination_absolute.parent()
9792        && !parent.exists()
9793    {
9794        if options.skip_errors {
9795            return Ok(MoveResult {
9796                source: source_path,
9797                destination: destination_path,
9798                skipped: true,
9799                fatal: None,
9800                details: Vec::new(),
9801            });
9802        }
9803        eprintln!(
9804            "fatal: renaming '{}' failed: No such file or directory",
9805            String::from_utf8_lossy(&source_path)
9806        );
9807        return Err(GitError::Exit(128));
9808    }
9809    fs::rename(&source_absolute, &destination_absolute)?;
9810    let metadata = fs::metadata(&destination_absolute)?;
9811    let source_entry = index.entries.remove(position);
9812    let mut destination_entry =
9813        index_entry_from_metadata(destination_path.clone(), source_entry.oid, &metadata);
9814    destination_entry.mode = source_entry.mode;
9815    index.entries.retain(|entry| entry.path != destination_path);
9816    index.entries.push(destination_entry);
9817    index
9818        .entries
9819        .sort_by(|left, right| left.path.cmp(&right.path));
9820    index.extensions.clear();
9821    fs::write(index_path, index.write(format)?)?;
9822    Ok(MoveResult {
9823        source: source_path,
9824        destination: destination_path,
9825        skipped: false,
9826        fatal: None,
9827        details: Vec::new(),
9828    })
9829}
9830
9831fn restore_index_entry(
9832    worktree_root: &Path,
9833    git_dir: &Path,
9834    format: ObjectFormat,
9835    db: &FileObjectDatabase,
9836    entry: &IndexEntry,
9837    smudge_config: Option<&GitConfig>,
9838) -> Result<()> {
9839    let object = read_expected_object(db, &entry.oid, ObjectType::Blob)?;
9840    let body: Cow<'_, [u8]> = match smudge_config {
9841        Some(config) => {
9842            let checks = smudge_attribute_checks_from_index(
9843                worktree_root,
9844                git_dir,
9845                format,
9846                entry.path.as_bytes(),
9847            )?;
9848            apply_smudge_filter_with_attributes_cow(
9849                config,
9850                &checks,
9851                entry.path.as_bytes(),
9852                &object.body,
9853            )?
9854        }
9855        None => Cow::Borrowed(&object.body),
9856    };
9857    let file_path = worktree_path(worktree_root, entry.path.as_bytes())?;
9858    if let Some(parent) = file_path.parent() {
9859        fs::create_dir_all(parent)?;
9860    }
9861    fs::write(file_path, &body)?;
9862    Ok(())
9863}
9864
9865fn restored_head_index_entry(
9866    worktree_root: &Path,
9867    db: &FileObjectDatabase,
9868    path: &[u8],
9869    entry: &TrackedEntry,
9870) -> Result<IndexEntry> {
9871    let file_path = worktree_path(worktree_root, path)?;
9872    // This restores the index from a tree (reset --mixed / stash / sparse) WITHOUT
9873    // rewriting the worktree file, so the file on disk may hold different content
9874    // than `entry.oid`. Crucially we must NOT copy the worktree file's stat onto
9875    // this entry: that would make the cached stat match a file whose real content
9876    // hashes to a DIFFERENT oid, breaking git's "stat-match implies oid-match"
9877    // invariant that the status stat-cache relies on. Leave the stat zeroed so
9878    // status always re-hashes this path and detects any modification -- exactly
9879    // git's behavior for tree-sourced entries until a later refresh validates them.
9880    let size = if entry.mode == 0o160000 {
9881        // A gitlink's oid names a commit in the submodule's repository — it is
9882        // not readable here, and a tree-sourced gitlink entry carries size 0.
9883        0
9884    } else {
9885        match fs::metadata(&file_path) {
9886            Ok(metadata) => metadata.len().min(u32::MAX as u64) as u32,
9887            Err(_) => {
9888                let object = read_expected_object(db, &entry.oid, ObjectType::Blob)?;
9889                object.body.len().min(u32::MAX as usize) as u32
9890            }
9891        }
9892    };
9893    Ok(IndexEntry {
9894        ctime_seconds: 0,
9895        ctime_nanoseconds: 0,
9896        mtime_seconds: 0,
9897        mtime_nanoseconds: 0,
9898        dev: 0,
9899        ino: 0,
9900        mode: entry.mode,
9901        uid: 0,
9902        gid: 0,
9903        size,
9904        oid: entry.oid,
9905        flags: path.len().min(0x0fff) as u16,
9906        flags_extended: 0,
9907        path: BString::from(path),
9908    })
9909}
9910
9911fn restore_head_entry_to_worktree(
9912    worktree_root: &Path,
9913    db: &FileObjectDatabase,
9914    path: &[u8],
9915    entry: &TrackedEntry,
9916) -> Result<()> {
9917    let object = read_expected_object(db, &entry.oid, ObjectType::Blob)?;
9918    let file_path = worktree_path(worktree_root, path)?;
9919    if let Some(parent) = file_path.parent() {
9920        fs::create_dir_all(parent)?;
9921    }
9922    fs::write(file_path, &object.body)?;
9923    Ok(())
9924}
9925
9926fn restore_head_entry_to_worktree_and_index(
9927    worktree_root: &Path,
9928    db: &FileObjectDatabase,
9929    path: &[u8],
9930    entry: &TrackedEntry,
9931) -> Result<IndexEntry> {
9932    let object = read_expected_object(db, &entry.oid, ObjectType::Blob)?;
9933    let file_path = worktree_path(worktree_root, path)?;
9934    if let Some(parent) = file_path.parent() {
9935        fs::create_dir_all(parent)?;
9936    }
9937    fs::write(&file_path, &object.body)?;
9938    let metadata = fs::metadata(&file_path)?;
9939    let mut index_entry = index_entry_from_metadata(path.to_vec(), entry.oid, &metadata);
9940    index_entry.mode = entry.mode;
9941    Ok(index_entry)
9942}
9943
9944fn index_has_entry_under(entries: &[IndexEntry], directory: &[u8]) -> bool {
9945    entries
9946        .iter()
9947        .any(|entry| index_entry_is_under_path(entry.path.as_bytes(), directory))
9948}
9949
9950fn index_entry_is_under_path(entry_path: &[u8], directory: &[u8]) -> bool {
9951    if directory.is_empty() {
9952        return true;
9953    }
9954    entry_path
9955        .strip_prefix(directory)
9956        .and_then(|rest| rest.strip_prefix(b"/"))
9957        .is_some()
9958}
9959
9960fn index_entry_from_metadata(
9961    path: impl Into<BString>,
9962    oid: ObjectId,
9963    metadata: &fs::Metadata,
9964) -> IndexEntry {
9965    let modified = metadata.modified().ok();
9966    let duration = modified
9967        .and_then(|time| time.duration_since(UNIX_EPOCH).ok())
9968        .unwrap_or_default();
9969    let mode = file_mode(metadata);
9970    let path = path.into();
9971    let flags = path.len().min(0x0fff) as u16;
9972    IndexEntry {
9973        ctime_seconds: duration.as_secs().min(u32::MAX as u64) as u32,
9974        ctime_nanoseconds: duration.subsec_nanos(),
9975        mtime_seconds: duration.as_secs().min(u32::MAX as u64) as u32,
9976        mtime_nanoseconds: duration.subsec_nanos(),
9977        dev: 0,
9978        ino: 0,
9979        mode,
9980        uid: 0,
9981        gid: 0,
9982        size: metadata.len().min(u32::MAX as u64) as u32,
9983        oid,
9984        flags,
9985        flags_extended: 0,
9986        path,
9987    }
9988}
9989
9990fn read_expected_object(
9991    db: &FileObjectDatabase,
9992    oid: &ObjectId,
9993    expected: ObjectType,
9994) -> Result<std::sync::Arc<EncodedObject>> {
9995    let object = db
9996        .read_object(oid)
9997        .map_err(|err| expect_missing_object_kind(err, *oid, missing_kind_for_type(expected)))?;
9998    if object.object_type != expected {
9999        return Err(GitError::InvalidObject(format!(
10000            "expected {} {}, found {}",
10001            expected.as_str(),
10002            oid,
10003            object.object_type.as_str()
10004        )));
10005    }
10006    Ok(object)
10007}
10008
10009fn expect_missing_object_kind(
10010    err: GitError,
10011    oid: ObjectId,
10012    expected: MissingObjectKind,
10013) -> GitError {
10014    match err.not_found_kind() {
10015        Some(sley_core::NotFoundKind::Object { .. }) => GitError::object_kind_not_found_in(
10016            oid,
10017            expected,
10018            MissingObjectContext::WorktreeMaterialize,
10019        ),
10020        _ => err,
10021    }
10022}
10023
10024fn missing_kind_for_type(object_type: ObjectType) -> MissingObjectKind {
10025    match object_type {
10026        ObjectType::Blob => MissingObjectKind::Blob,
10027        ObjectType::Tree => MissingObjectKind::Tree,
10028        ObjectType::Commit => MissingObjectKind::Commit,
10029        ObjectType::Tag => MissingObjectKind::Tag,
10030    }
10031}
10032
10033fn read_commit(db: &FileObjectDatabase, format: ObjectFormat, oid: &ObjectId) -> Result<Commit> {
10034    let object = read_expected_object(db, oid, ObjectType::Commit)?;
10035    Commit::parse(format, &object.body)
10036}
10037
10038#[derive(Debug, Clone, PartialEq, Eq)]
10039struct TrackedEntry {
10040    mode: u32,
10041    oid: ObjectId,
10042}
10043
10044/// git's racy-git stat cache: the stage-0 index entries keyed by path (so the
10045/// worktree walk can reuse a cached oid when a file's stat shows it is unchanged
10046/// since it was staged) plus the index *file's* own mtime, which git uses as the
10047/// racy-clean reference timestamp.
10048///
10049/// SAFETY INVARIANT: trusting a cached oid by stat alone is only sound because
10050/// every code path that stamps a worktree stat onto an index entry also hashed
10051/// that exact file content (see `index_entry_from_metadata`), while tree-sourced
10052/// restores (reset --mixed / stash / sparse) leave the stat zeroed
10053/// (`restored_head_index_entry`). So a non-zero, non-racy stat match implies the
10054/// cached oid is the file's true content. When that does not hold we fall through
10055/// to a full read+filter+hash, so a modified file is never reported clean.
10056#[derive(Debug, Clone, Default)]
10057struct IndexStatCache {
10058    entries: HashMap<Vec<u8>, IndexEntry>,
10059    /// The index file's modification time as `(seconds, nanoseconds)`, or `None`
10060    /// when it could not be determined. Used as git's racy-clean reference.
10061    index_mtime: Option<(u64, u64)>,
10062}
10063
10064impl IndexStatCache {
10065    /// Builds the cache from an already-parsed index plus the path of the index
10066    /// file on disk (whose mtime becomes the racy-clean reference). Only stage-0
10067    /// entries are retained; higher merge stages never describe a worktree file.
10068    fn from_index(index: &Index, index_path: &Path) -> Self {
10069        let index_mtime = fs::metadata(index_path)
10070            .ok()
10071            .and_then(|metadata| file_mtime_parts(&metadata));
10072        Self::from_index_mtime(index, index_mtime)
10073    }
10074
10075    fn from_index_mtime(index: &Index, index_mtime: Option<(u64, u64)>) -> Self {
10076        IndexStatCache {
10077            entries: stage0_index_entries(index),
10078            index_mtime,
10079        }
10080    }
10081
10082    fn from_index_mtime_only(index_mtime: Option<(u64, u64)>) -> Self {
10083        IndexStatCache {
10084            entries: HashMap::new(),
10085            index_mtime,
10086        }
10087    }
10088
10089    /// Whether `entry` is "racily clean" in git's sense: its cached mtime is not
10090    /// strictly older than the index file's mtime, so a same-timestamp write
10091    /// could have changed the content without moving the stat. Such entries must
10092    /// always be re-hashed.
10093    ///
10094    /// Conservative by construction: if the index mtime is unknown, or either
10095    /// side's mtime is zero (e.g. a tree-sourced entry whose stat was left
10096    /// zeroed), this returns `true` so the caller re-hashes rather than trusting
10097    /// a stat we cannot prove safe.
10098    fn is_racily_clean(&self, entry: &IndexEntry) -> bool {
10099        let Some(index_mtime) = self.index_mtime else {
10100            return true;
10101        };
10102        if index_mtime == (0, 0) {
10103            return true;
10104        }
10105        let entry_mtime = (
10106            u64::from(entry.mtime_seconds),
10107            u64::from(entry.mtime_nanoseconds),
10108        );
10109        if entry_mtime == (0, 0) {
10110            return true;
10111        }
10112        // Racy unless the index was written strictly after the entry's mtime.
10113        index_mtime <= entry_mtime
10114    }
10115
10116    fn is_racily_clean_ref(&self, entry: &IndexEntryRef<'_>) -> bool {
10117        let Some(index_mtime) = self.index_mtime else {
10118            return true;
10119        };
10120        if index_mtime == (0, 0) {
10121            return true;
10122        }
10123        let entry_mtime = (
10124            u64::from(entry.mtime_seconds),
10125            u64::from(entry.mtime_nanoseconds),
10126        );
10127        if entry_mtime == (0, 0) {
10128            return true;
10129        }
10130        index_mtime <= entry_mtime
10131    }
10132
10133    /// Whether the index has a stage-0 entry for `git_path` (i.e. the path is
10134    /// tracked). Used to skip hashing untracked worktree files.
10135    fn contains(&self, git_path: &[u8]) -> bool {
10136        self.entries.contains_key(git_path)
10137    }
10138
10139    fn tracked_entry(&self, git_path: &[u8]) -> Option<TrackedEntry> {
10140        self.entries.get(git_path).map(|entry| TrackedEntry {
10141            mode: entry.mode,
10142            oid: entry.oid,
10143        })
10144    }
10145
10146    /// Returns the cached [`TrackedEntry`] for `git_path` (reusing its stored
10147    /// oid, so the caller can SKIP reading, filtering, and hashing the file) only
10148    /// when the worktree file is provably unchanged since it was staged: a
10149    /// stage-0 entry exists, its recorded mode matches the file's current mode
10150    /// (catching pure `chmod`s that do not move mtime), the size+mtime stat
10151    /// check passes, and the entry is not racily clean. Otherwise returns `None`
10152    /// and the caller hashes the file as usual.
10153    fn reuse_tracked_entry(
10154        &self,
10155        git_path: &[u8],
10156        worktree_metadata: &fs::Metadata,
10157    ) -> Option<TrackedEntry> {
10158        let entry = self.entries.get(git_path)?;
10159        self.reuse_index_entry(entry, worktree_metadata)
10160    }
10161
10162    fn reuse_index_entry(
10163        &self,
10164        entry: &IndexEntry,
10165        worktree_metadata: &fs::Metadata,
10166    ) -> Option<TrackedEntry> {
10167        if entry.mode != worktree_entry_mode(worktree_metadata) {
10168            return None;
10169        }
10170        if !worktree_entry_is_uptodate(entry, worktree_metadata) {
10171            return None;
10172        }
10173        if self.is_racily_clean(entry) {
10174            return None;
10175        }
10176        Some(TrackedEntry {
10177            mode: entry.mode,
10178            oid: entry.oid,
10179        })
10180    }
10181
10182    fn reuse_index_entry_ref(
10183        &self,
10184        entry: &IndexEntryRef<'_>,
10185        worktree_metadata: &fs::Metadata,
10186    ) -> Option<TrackedEntry> {
10187        if entry.mode != worktree_entry_mode(worktree_metadata) {
10188            return None;
10189        }
10190        if !worktree_entry_ref_is_uptodate(entry, worktree_metadata) {
10191            return None;
10192        }
10193        if self.is_racily_clean_ref(entry) {
10194            return None;
10195        }
10196        Some(TrackedEntry {
10197            mode: entry.mode,
10198            oid: entry.oid,
10199        })
10200    }
10201
10202    /// The stage-0 gitlink (mode 160000) index entry at `git_path`, if any.
10203    fn gitlink_entry(&self, git_path: &[u8]) -> Option<&IndexEntry> {
10204        self.entries
10205            .get(git_path)
10206            .filter(|entry| entry.mode == 0o160000)
10207    }
10208}
10209
10210fn read_index_entries(
10211    git_dir: &Path,
10212    format: ObjectFormat,
10213) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
10214    let db = FileObjectDatabase::from_git_dir(git_dir, format);
10215    Ok(read_index_entries_with_stat_cache(git_dir, format, &db)?.0)
10216}
10217
10218fn resolve_head_tree_oid(
10219    git_dir: &Path,
10220    format: ObjectFormat,
10221    db: &FileObjectDatabase,
10222) -> Result<Option<ObjectId>> {
10223    let Some(commit_oid) = resolve_head_commit_oid(git_dir, format)? else {
10224        return Ok(None);
10225    };
10226    let object = read_expected_object(db, &commit_oid, ObjectType::Commit)?;
10227    let commit = Commit::parse_ref(format, &object.body)?;
10228    Ok(Some(commit.tree))
10229}
10230
10231fn resolve_head_commit_oid(git_dir: &Path, format: ObjectFormat) -> Result<Option<ObjectId>> {
10232    let refs = FileRefStore::new(git_dir, format);
10233    sley_refs::resolve_ref_peeled(&refs, "HEAD")
10234}
10235
10236fn status_entry_is_untracked_or_ignored(entry: &ShortStatusEntry) -> bool {
10237    matches!((entry.index, entry.worktree), (b'?', b'?') | (b'!', b'!'))
10238}
10239
10240fn checkout_switch_head_symbolic(
10241    refs: &FileRefStore,
10242    branch_ref: String,
10243    committer: Vec<u8>,
10244    branch: &str,
10245    old_oid: Option<ObjectId>,
10246    new_oid: Option<ObjectId>,
10247) -> Result<()> {
10248    // Reflog "from" side: the previous branch's short name, or the commit id
10249    // when HEAD was detached (git's `checkout: moving from X to Y` shape,
10250    // which `@{-N}` resolution parses).
10251    let from = match refs.read_ref("HEAD") {
10252        Ok(Some(RefTarget::Symbolic(name))) => name
10253            .strip_prefix("refs/heads/")
10254            .unwrap_or(&name)
10255            .to_string(),
10256        Ok(Some(RefTarget::Direct(oid))) => oid.to_hex(),
10257        _ => "HEAD".to_string(),
10258    };
10259    let mut tx = refs.transaction();
10260    let reflog = match (old_oid, new_oid) {
10261        (Some(old_oid), Some(new_oid)) => Some(ReflogEntry {
10262            old_oid,
10263            new_oid,
10264            committer,
10265            message: format!("checkout: moving from {from} to {branch}").into_bytes(),
10266        }),
10267        _ => None,
10268    };
10269    tx.update(RefUpdate {
10270        name: "HEAD".into(),
10271        expected: None,
10272        new: RefTarget::Symbolic(branch_ref),
10273        reflog,
10274    });
10275    tx.commit()
10276}
10277
10278fn cache_tree_is_valid(tree: &CacheTree) -> bool {
10279    if tree.entry_count < 0 || tree.oid.is_none() {
10280        return false;
10281    }
10282    tree.subtrees
10283        .iter()
10284        .all(|child| cache_tree_is_valid(&child.tree))
10285}
10286
10287fn head_matches_index_from_cache_tree(
10288    index: &Index,
10289    format: ObjectFormat,
10290    head_tree_oid: &ObjectId,
10291    stage0_entry_count: usize,
10292) -> Result<bool> {
10293    let cache_tree = match index.cache_tree(format) {
10294        Ok(Some(cache_tree)) => cache_tree,
10295        Ok(None) | Err(_) => return Ok(false),
10296    };
10297    if !cache_tree_is_valid(&cache_tree) {
10298        return Ok(false);
10299    }
10300    let Some(root_oid) = cache_tree.oid.as_ref() else {
10301        return Ok(false);
10302    };
10303    if root_oid != head_tree_oid {
10304        return Ok(false);
10305    }
10306    Ok(cache_tree.entry_count as usize == stage0_entry_count)
10307}
10308
10309fn head_matches_borrowed_index_from_cache_tree(
10310    index: &BorrowedIndex<'_>,
10311    format: ObjectFormat,
10312    head_tree_oid: &ObjectId,
10313    stage0_entry_count: usize,
10314) -> Result<bool> {
10315    let cache_tree = match index.cache_tree(format) {
10316        Ok(Some(cache_tree)) => cache_tree,
10317        Ok(None) | Err(_) => return Ok(false),
10318    };
10319    if !cache_tree_is_valid(&cache_tree) {
10320        return Ok(false);
10321    }
10322    let Some(root_oid) = cache_tree.oid.as_ref() else {
10323        return Ok(false);
10324    };
10325    if root_oid != head_tree_oid {
10326        return Ok(false);
10327    }
10328    Ok(cache_tree.entry_count as usize == stage0_entry_count)
10329}
10330
10331/// Parses the index a single time and returns both the path -> [`TrackedEntry`]
10332/// map used for status comparisons AND the [`IndexStatCache`] used to short-cut
10333/// the worktree walk, avoiding a second parse of the same file.
10334fn read_index_entries_with_stat_cache(
10335    git_dir: &Path,
10336    format: ObjectFormat,
10337    db: &FileObjectDatabase,
10338) -> Result<(BTreeMap<Vec<u8>, TrackedEntry>, IndexStatCache, bool)> {
10339    let (index, stat_cache, head_matches_index) = read_index_with_stat_cache(git_dir, format, db)?;
10340    let tracked = index_entries_from_index(index);
10341    Ok((tracked, stat_cache, head_matches_index))
10342}
10343
10344fn index_entries_from_index(index: Index) -> BTreeMap<Vec<u8>, TrackedEntry> {
10345    index
10346        .entries
10347        .into_iter()
10348        .filter(|entry| entry.stage() == Stage::Normal)
10349        .map(|entry| {
10350            (
10351                entry.path.into_bytes(),
10352                TrackedEntry {
10353                    mode: entry.mode,
10354                    oid: entry.oid,
10355                },
10356            )
10357        })
10358        .collect()
10359}
10360
10361fn read_index_with_stat_cache(
10362    git_dir: &Path,
10363    format: ObjectFormat,
10364    db: &FileObjectDatabase,
10365) -> Result<(Index, IndexStatCache, bool)> {
10366    read_index_with_stat_cache_entries(git_dir, format, db, true)
10367}
10368
10369fn read_index_with_stat_cache_entries(
10370    git_dir: &Path,
10371    format: ObjectFormat,
10372    db: &FileObjectDatabase,
10373    include_entries: bool,
10374) -> Result<(Index, IndexStatCache, bool)> {
10375    let index_path = repository_index_path(git_dir);
10376    let index_metadata = match fs::metadata(&index_path) {
10377        Ok(metadata) => metadata,
10378        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
10379            return Ok((
10380                Index {
10381                    version: 2,
10382                    entries: Vec::new(),
10383                    extensions: Vec::new(),
10384                    checksum: None,
10385                },
10386                IndexStatCache::default(),
10387                false,
10388            ));
10389        }
10390        Err(err) => return Err(err.into()),
10391    };
10392    let index = Index::parse(&fs::read(&index_path)?, format)?;
10393    let index_mtime = file_mtime_parts(&index_metadata);
10394    let stage0_entry_count = index
10395        .entries
10396        .iter()
10397        .filter(|entry| index_entry_stage(entry) == 0)
10398        .count();
10399    let stat_cache = if include_entries {
10400        IndexStatCache::from_index_mtime(&index, index_mtime)
10401    } else {
10402        IndexStatCache::from_index_mtime_only(index_mtime)
10403    };
10404    let head_matches_index = match resolve_head_tree_oid(git_dir, format, db)? {
10405        Some(head_tree_oid) => {
10406            head_matches_index_from_cache_tree(&index, format, &head_tree_oid, stage0_entry_count)?
10407        }
10408        None => false,
10409    };
10410    Ok((index, stat_cache, head_matches_index))
10411}
10412
10413fn head_tree_entries(
10414    git_dir: &Path,
10415    format: ObjectFormat,
10416    db: &FileObjectDatabase,
10417) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
10418    let refs = FileRefStore::new(git_dir, format);
10419    let Some(head) = refs.read_ref("HEAD")? else {
10420        return Ok(BTreeMap::new());
10421    };
10422    let commit_oid = match head {
10423        RefTarget::Direct(oid) => Some(oid),
10424        RefTarget::Symbolic(name) => match refs.read_ref(&name)? {
10425            Some(RefTarget::Direct(oid)) => Some(oid),
10426            _ => None,
10427        },
10428    };
10429    let Some(commit_oid) = commit_oid else {
10430        return Ok(BTreeMap::new());
10431    };
10432    let object = read_expected_object(db, &commit_oid, ObjectType::Commit)?;
10433    let commit = Commit::parse_ref(format, &object.body)?;
10434    let mut entries = BTreeMap::new();
10435    collect_tree_entries(db, format, &commit.tree, &mut entries)?;
10436    Ok(entries)
10437}
10438
10439fn tree_entries(
10440    db: &FileObjectDatabase,
10441    format: ObjectFormat,
10442    tree_oid: &ObjectId,
10443) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
10444    let mut entries = BTreeMap::new();
10445    collect_tree_entries(db, format, tree_oid, &mut entries)?;
10446    Ok(entries)
10447}
10448
10449/// Flatten a tree's blob leaves into `entries`, keyed by full path.
10450///
10451/// Delegates to the canonical [`sley_diff_merge::flatten_tree`] (the local
10452/// recursive flattener was a byte-identical copy) and adapts its
10453/// `(mode, oid)` tuples into this module's [`TrackedEntry`]. Entries already
10454/// present in `entries` are overwritten, matching the previous insert-based
10455/// behaviour.
10456fn collect_tree_entries(
10457    db: &FileObjectDatabase,
10458    format: ObjectFormat,
10459    tree_oid: &ObjectId,
10460    entries: &mut BTreeMap<Vec<u8>, TrackedEntry>,
10461) -> Result<()> {
10462    for (path, (mode, oid)) in sley_diff_merge::flatten_tree(db, format, tree_oid)? {
10463        entries.insert(path, TrackedEntry { mode, oid });
10464    }
10465    Ok(())
10466}
10467
10468/// Like a full worktree walk, but accepts the index's [`IndexStatCache`] so the
10469/// walk can reuse a cached oid for files that are provably unchanged since they
10470/// were staged, skipping the read+filter+hash for those paths. Passing `None`
10471/// hashes every file when no stat cache is supplied.
10472fn worktree_entries_with_stat_cache(
10473    worktree_root: &Path,
10474    git_dir: &Path,
10475    format: ObjectFormat,
10476    stat_cache: Option<&IndexStatCache>,
10477    tracked_paths: Option<&BTreeSet<Vec<u8>>>,
10478    ignores: Option<&mut IgnoreMatcher>,
10479) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
10480    Ok(worktree_entries_with_submodule_dirt(
10481        worktree_root,
10482        git_dir,
10483        format,
10484        stat_cache,
10485        tracked_paths,
10486        ignores,
10487    )?
10488    .0)
10489}
10490
10491/// Tracked worktree entries keyed by repo path, plus the dirt mask
10492/// ([`DIRTY_SUBMODULE_MODIFIED`] / [`DIRTY_SUBMODULE_UNTRACKED`]) for every
10493/// tracked gitlink path whose submodule working tree is dirty.
10494type WorktreeEntriesWithDirt = (BTreeMap<Vec<u8>, TrackedEntry>, BTreeMap<Vec<u8>, u8>);
10495
10496/// Status worktree snapshot: tracked/untracked entries, gitlink dirt masks, and
10497/// tracked paths observed in the worktree.
10498type StatusWorktreeSnapshot = (
10499    BTreeMap<Vec<u8>, TrackedEntry>,
10500    BTreeMap<Vec<u8>, u8>,
10501    HashSet<Vec<u8>>,
10502);
10503
10504/// Like [`worktree_entries_with_stat_cache`], but also reports, for every
10505/// tracked gitlink path whose submodule working tree is dirty, the dirt mask
10506/// ([`DIRTY_SUBMODULE_MODIFIED`] / [`DIRTY_SUBMODULE_UNTRACKED`]).
10507fn worktree_entries_with_submodule_dirt(
10508    worktree_root: &Path,
10509    git_dir: &Path,
10510    format: ObjectFormat,
10511    stat_cache: Option<&IndexStatCache>,
10512    tracked_paths: Option<&BTreeSet<Vec<u8>>>,
10513    ignores: Option<&mut IgnoreMatcher>,
10514) -> Result<WorktreeEntriesWithDirt> {
10515    let mut entries = BTreeMap::new();
10516    let mut submodule_dirt_map = BTreeMap::new();
10517    let mut tracked_presence = HashSet::new();
10518    // Worktree blobs are compared to the index by OID, so they must be passed
10519    // through the clean filter (core.autocrlf / .gitattributes) first -- exactly
10520    // as `git add` would store them. With no filter configured this is an exact
10521    // passthrough, so unfiltered repositories see identical OIDs.
10522    let config = sley_config::read_repo_config(git_dir, None).unwrap_or_default();
10523    // Seed the matcher with the repo-wide sources only; each directory's
10524    // `.gitattributes` is folded in by `collect_worktree_entries` as it descends,
10525    // so the worktree is read exactly once (a separate full-tree attribute pass was
10526    // a second traversal of every directory).
10527    let mut attr_matcher = AttributeMatcher::from_worktree_base(worktree_root);
10528    let attr_requested = filter_attribute_names();
10529    let mut context = WorktreeEntriesWalk {
10530        git_dir,
10531        format,
10532        config: &config,
10533        matcher: &mut attr_matcher,
10534        requested: &attr_requested,
10535        stat_cache,
10536        tracked_paths,
10537        ignores,
10538        entries: &mut entries,
10539        submodule_dirt: &mut submodule_dirt_map,
10540        tracked_presence: &mut tracked_presence,
10541        record_clean_tracked: true,
10542    };
10543    collect_worktree_entries(&mut context, worktree_root, &[])?;
10544    Ok((entries, submodule_dirt_map))
10545}
10546
10547fn status_worktree_entries_with_submodule_dirt(
10548    worktree_root: &Path,
10549    git_dir: &Path,
10550    format: ObjectFormat,
10551    stat_cache: &IndexStatCache,
10552    tracked_paths: Option<&BTreeSet<Vec<u8>>>,
10553    ignores: Option<&mut IgnoreMatcher>,
10554) -> Result<StatusWorktreeSnapshot> {
10555    let mut entries = BTreeMap::new();
10556    let mut submodule_dirt_map = BTreeMap::new();
10557    let mut tracked_presence = HashSet::new();
10558    let config = sley_config::read_repo_config(git_dir, None).unwrap_or_default();
10559    let mut attr_matcher = AttributeMatcher::from_worktree_base(worktree_root);
10560    let attr_requested = filter_attribute_names();
10561    let mut context = WorktreeEntriesWalk {
10562        git_dir,
10563        format,
10564        config: &config,
10565        matcher: &mut attr_matcher,
10566        requested: &attr_requested,
10567        stat_cache: Some(stat_cache),
10568        tracked_paths,
10569        ignores,
10570        entries: &mut entries,
10571        submodule_dirt: &mut submodule_dirt_map,
10572        tracked_presence: &mut tracked_presence,
10573        record_clean_tracked: false,
10574    };
10575    collect_worktree_entries(&mut context, worktree_root, &[])?;
10576    Ok((entries, submodule_dirt_map, tracked_presence))
10577}
10578
10579fn worktree_entry_for_git_path(
10580    worktree_root: &Path,
10581    git_dir: &Path,
10582    format: ObjectFormat,
10583    git_path: &[u8],
10584    expected_oid: &ObjectId,
10585    expected_mode: u32,
10586    stat_cache: Option<&IndexStatCache>,
10587) -> Result<Option<TrackedEntry>> {
10588    let absolute = worktree_root.join(repo_path_to_os_path(git_path)?);
10589    let metadata = match fs::symlink_metadata(&absolute) {
10590        Ok(metadata) => metadata,
10591        Err(err)
10592            if matches!(
10593                err.kind(),
10594                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
10595            ) =>
10596        {
10597            return Ok(None);
10598        }
10599        Err(err) => return Err(err.into()),
10600    };
10601
10602    if expected_mode == 0o160000 {
10603        if !metadata.is_dir() {
10604            return Ok(Some(TrackedEntry {
10605                mode: worktree_entry_mode(&metadata),
10606                oid: ObjectId::null(format),
10607            }));
10608        }
10609        let oid = sley_diff_merge::gitlink_head_oid(&absolute, format).unwrap_or(*expected_oid);
10610        return Ok(Some(TrackedEntry {
10611            mode: 0o160000,
10612            oid,
10613        }));
10614    }
10615
10616    if metadata.is_dir() {
10617        return Ok(Some(TrackedEntry {
10618            mode: worktree_entry_mode(&metadata),
10619            oid: ObjectId::null(format),
10620        }));
10621    }
10622
10623    if !(metadata.is_file() || metadata.file_type().is_symlink()) {
10624        return Ok(Some(TrackedEntry {
10625            mode: worktree_entry_mode(&metadata),
10626            oid: ObjectId::null(format),
10627        }));
10628    }
10629
10630    if let Some(tracked) =
10631        stat_cache.and_then(|cache| cache.reuse_tracked_entry(git_path, &metadata))
10632    {
10633        return Ok(Some(tracked));
10634    }
10635
10636    let mode = worktree_entry_mode(&metadata);
10637    let body = if metadata.file_type().is_symlink() {
10638        symlink_target_bytes(&absolute)?
10639    } else {
10640        let config = sley_config::read_repo_config(git_dir, None).unwrap_or_default();
10641        let body = fs::read(&absolute)?;
10642        apply_clean_filter(worktree_root, git_dir, &config, git_path, &body)?
10643    };
10644    let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
10645    Ok(Some(TrackedEntry { mode, oid }))
10646}
10647
10648fn worktree_entry_for_index_entry_with_attributes(
10649    worktree_root: &Path,
10650    git_dir: &Path,
10651    format: ObjectFormat,
10652    index_entry: &IndexEntry,
10653    stat_cache: &IndexStatCache,
10654    clean_filter: &mut Option<TrackedOnlyCleanFilter>,
10655) -> Result<Option<TrackedEntry>> {
10656    let git_path = index_entry.path.as_bytes();
10657    let expected_mode = index_entry.mode;
10658    let absolute = worktree_root.join(repo_path_to_os_path(git_path)?);
10659    let metadata = match fs::symlink_metadata(&absolute) {
10660        Ok(metadata) => metadata,
10661        Err(err)
10662            if matches!(
10663                err.kind(),
10664                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
10665            ) =>
10666        {
10667            return Ok(None);
10668        }
10669        Err(err) => return Err(err.into()),
10670    };
10671    let file_type = metadata.file_type();
10672
10673    if expected_mode == 0o160000 {
10674        if !file_type.is_dir() {
10675            return Ok(Some(TrackedEntry {
10676                mode: worktree_entry_mode(&metadata),
10677                oid: ObjectId::null(format),
10678            }));
10679        }
10680        let oid = sley_diff_merge::gitlink_head_oid(&absolute, format).unwrap_or(index_entry.oid);
10681        return Ok(Some(TrackedEntry {
10682            mode: 0o160000,
10683            oid,
10684        }));
10685    }
10686
10687    if file_type.is_dir() {
10688        return Ok(Some(TrackedEntry {
10689            mode: worktree_entry_mode(&metadata),
10690            oid: ObjectId::null(format),
10691        }));
10692    }
10693
10694    if !(file_type.is_file() || file_type.is_symlink()) {
10695        return Ok(Some(TrackedEntry {
10696            mode: worktree_entry_mode(&metadata),
10697            oid: ObjectId::null(format),
10698        }));
10699    }
10700
10701    if let Some(tracked) = stat_cache.reuse_index_entry(index_entry, &metadata) {
10702        return Ok(Some(tracked));
10703    }
10704
10705    let mode = worktree_entry_mode(&metadata);
10706    let body = if file_type.is_symlink() {
10707        symlink_target_bytes(&absolute)?
10708    } else {
10709        let body = fs::read(&absolute)?;
10710        let clean_filter = tracked_only_clean_filter(clean_filter, worktree_root, git_dir);
10711        clean_filter.read_attributes_for_path(worktree_root, git_path)?;
10712        let checks =
10713            clean_filter
10714                .matcher
10715                .attributes_for_path(git_path, &clean_filter.requested, false);
10716        apply_clean_filter_with_attributes(&clean_filter.config, &checks, git_path, &body)?
10717    };
10718    let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
10719    Ok(Some(TrackedEntry { mode, oid }))
10720}
10721
10722fn worktree_entry_for_index_entry_ref_with_attributes(
10723    worktree_root: &Path,
10724    git_dir: &Path,
10725    format: ObjectFormat,
10726    index_entry: &IndexEntryRef<'_>,
10727    stat_cache: &IndexStatCache,
10728    clean_filter: &mut Option<TrackedOnlyCleanFilter>,
10729) -> Result<Option<TrackedEntry>> {
10730    let git_path = index_entry.path;
10731    let expected_mode = index_entry.mode;
10732    let absolute = worktree_root.join(repo_path_to_os_path(git_path)?);
10733    let metadata = match fs::symlink_metadata(&absolute) {
10734        Ok(metadata) => metadata,
10735        Err(err)
10736            if matches!(
10737                err.kind(),
10738                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
10739            ) =>
10740        {
10741            return Ok(None);
10742        }
10743        Err(err) => return Err(err.into()),
10744    };
10745    let file_type = metadata.file_type();
10746
10747    if expected_mode == 0o160000 {
10748        if !file_type.is_dir() {
10749            return Ok(Some(TrackedEntry {
10750                mode: worktree_entry_mode(&metadata),
10751                oid: ObjectId::null(format),
10752            }));
10753        }
10754        let oid = sley_diff_merge::gitlink_head_oid(&absolute, format).unwrap_or(index_entry.oid);
10755        return Ok(Some(TrackedEntry {
10756            mode: 0o160000,
10757            oid,
10758        }));
10759    }
10760
10761    if file_type.is_dir() {
10762        return Ok(Some(TrackedEntry {
10763            mode: worktree_entry_mode(&metadata),
10764            oid: ObjectId::null(format),
10765        }));
10766    }
10767
10768    if !(file_type.is_file() || file_type.is_symlink()) {
10769        return Ok(Some(TrackedEntry {
10770            mode: worktree_entry_mode(&metadata),
10771            oid: ObjectId::null(format),
10772        }));
10773    }
10774
10775    if let Some(tracked) = stat_cache.reuse_index_entry_ref(index_entry, &metadata) {
10776        return Ok(Some(tracked));
10777    }
10778
10779    let mode = worktree_entry_mode(&metadata);
10780    let body = if file_type.is_symlink() {
10781        symlink_target_bytes(&absolute)?
10782    } else {
10783        let body = fs::read(&absolute)?;
10784        let clean_filter = tracked_only_clean_filter(clean_filter, worktree_root, git_dir);
10785        clean_filter.read_attributes_for_path(worktree_root, git_path)?;
10786        let checks =
10787            clean_filter
10788                .matcher
10789                .attributes_for_path(git_path, &clean_filter.requested, false);
10790        apply_clean_filter_with_attributes(&clean_filter.config, &checks, git_path, &body)?
10791    };
10792    let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
10793    Ok(Some(TrackedEntry { mode, oid }))
10794}
10795
10796struct TrackedOnlyCleanFilter {
10797    config: GitConfig,
10798    matcher: AttributeMatcher,
10799    requested: Vec<Vec<u8>>,
10800    attribute_dirs: BTreeSet<Vec<u8>>,
10801}
10802
10803impl TrackedOnlyCleanFilter {
10804    fn read_attributes_for_path(&mut self, worktree_root: &Path, git_path: &[u8]) -> Result<()> {
10805        self.read_attribute_dir(worktree_root, &[])?;
10806        let mut prefix = Vec::new();
10807        let mut parts = git_path.split(|byte| *byte == b'/').peekable();
10808        while let Some(part) = parts.next() {
10809            if parts.peek().is_none() {
10810                break;
10811            }
10812            if !prefix.is_empty() {
10813                prefix.push(b'/');
10814            }
10815            prefix.extend_from_slice(part);
10816            self.read_attribute_dir(worktree_root, &prefix)?;
10817        }
10818        Ok(())
10819    }
10820
10821    fn read_attribute_dir(&mut self, worktree_root: &Path, git_path: &[u8]) -> Result<()> {
10822        if !self.attribute_dirs.insert(git_path.to_vec()) {
10823            return Ok(());
10824        }
10825        let dir = if git_path.is_empty() {
10826            worktree_root.to_path_buf()
10827        } else {
10828            worktree_root.join(repo_path_to_os_path(git_path)?)
10829        };
10830        read_dir_attribute_patterns(worktree_root, &dir, &mut self.matcher)
10831    }
10832}
10833
10834fn tracked_only_clean_filter<'a>(
10835    clean_filter: &'a mut Option<TrackedOnlyCleanFilter>,
10836    worktree_root: &Path,
10837    git_dir: &Path,
10838) -> &'a mut TrackedOnlyCleanFilter {
10839    if clean_filter.is_none() {
10840        *clean_filter = Some(TrackedOnlyCleanFilter {
10841            config: sley_config::read_repo_config(git_dir, None).unwrap_or_default(),
10842            matcher: AttributeMatcher::from_worktree_base(worktree_root),
10843            requested: filter_attribute_names(),
10844            attribute_dirs: BTreeSet::new(),
10845        });
10846    }
10847    clean_filter
10848        .as_mut()
10849        .expect("tracked-only clean filter initialized")
10850}
10851
10852fn tracked_only_clean_filter_with_config<'a>(
10853    clean_filter: &'a mut Option<TrackedOnlyCleanFilter>,
10854    worktree_root: &Path,
10855    config: &GitConfig,
10856) -> &'a mut TrackedOnlyCleanFilter {
10857    if clean_filter.is_none() {
10858        *clean_filter = Some(TrackedOnlyCleanFilter {
10859            config: config.clone(),
10860            matcher: AttributeMatcher::from_worktree_base(worktree_root),
10861            requested: filter_attribute_names(),
10862            attribute_dirs: BTreeSet::new(),
10863        });
10864    }
10865    clean_filter
10866        .as_mut()
10867        .expect("tracked-only clean filter initialized")
10868}
10869
10870struct WorktreeEntriesWalk<'a> {
10871    git_dir: &'a Path,
10872    format: ObjectFormat,
10873    config: &'a GitConfig,
10874    matcher: &'a mut AttributeMatcher,
10875    requested: &'a [Vec<u8>],
10876    stat_cache: Option<&'a IndexStatCache>,
10877    tracked_paths: Option<&'a BTreeSet<Vec<u8>>>,
10878    ignores: Option<&'a mut IgnoreMatcher>,
10879    entries: &'a mut BTreeMap<Vec<u8>, TrackedEntry>,
10880    /// Dirt masks for tracked gitlink paths whose submodule worktree is dirty.
10881    submodule_dirt: &'a mut BTreeMap<Vec<u8>, u8>,
10882    tracked_presence: &'a mut HashSet<Vec<u8>>,
10883    record_clean_tracked: bool,
10884}
10885
10886impl WorktreeEntriesWalk<'_> {
10887    fn mark_tracked_present(&mut self, git_path: &[u8]) {
10888        self.tracked_presence.insert(git_path.to_vec());
10889    }
10890
10891    fn tracked_entry_for(&self, git_path: &[u8]) -> Option<TrackedEntry> {
10892        self.stat_cache
10893            .and_then(|cache| cache.tracked_entry(git_path))
10894    }
10895
10896    fn should_record_tracked_entry(&self, git_path: &[u8], entry: &TrackedEntry) -> bool {
10897        self.record_clean_tracked
10898            || self
10899                .tracked_entry_for(git_path)
10900                .is_none_or(|tracked| tracked != *entry)
10901    }
10902}
10903
10904fn git_path_append_component(parent: &[u8], component: &std::ffi::OsStr) -> Vec<u8> {
10905    let component = os_str_component_bytes(component);
10906    let separator = usize::from(!parent.is_empty());
10907    let mut path = Vec::with_capacity(parent.len() + separator + component.len());
10908    if !parent.is_empty() {
10909        path.extend_from_slice(parent);
10910        path.push(b'/');
10911    }
10912    path.extend_from_slice(component.as_ref());
10913    path
10914}
10915
10916fn git_path_push_component(path: &mut Vec<u8>, component: &std::ffi::OsStr) -> usize {
10917    let original_len = path.len();
10918    let component = os_str_component_bytes(component);
10919    if !path.is_empty() {
10920        path.push(b'/');
10921    }
10922    path.extend_from_slice(component.as_ref());
10923    original_len
10924}
10925
10926#[cfg(unix)]
10927fn os_str_component_bytes(component: &std::ffi::OsStr) -> Cow<'_, [u8]> {
10928    use std::os::unix::ffi::OsStrExt;
10929
10930    Cow::Borrowed(component.as_bytes())
10931}
10932
10933#[cfg(not(unix))]
10934fn os_str_component_bytes(component: &std::ffi::OsStr) -> Cow<'_, [u8]> {
10935    Cow::Owned(component.to_string_lossy().into_owned().into_bytes())
10936}
10937
10938fn collect_worktree_entries(
10939    context: &mut WorktreeEntriesWalk<'_>,
10940    dir: &Path,
10941    dir_git_path: &[u8],
10942) -> Result<()> {
10943    if is_same_path(dir, context.git_dir) {
10944        return Ok(());
10945    }
10946    // Fold this directory's `.gitattributes` into the matcher before processing its
10947    // files, so lookups for files here (and below) see it. This is what lets the
10948    // walk read the tree once instead of doing a separate full-tree attribute pass.
10949    read_dir_attribute_patterns_for_base(dir, dir_git_path, context.matcher)?;
10950    if let Some(ignores) = context.ignores.as_deref_mut() {
10951        read_dir_ignore_patterns_for_base(dir, dir_git_path, ignores)?;
10952    }
10953    for entry in fs::read_dir(dir)? {
10954        let entry = entry?;
10955        let file_name = entry.file_name();
10956        let path = entry.path();
10957        if is_dot_git_entry(&path) {
10958            continue;
10959        }
10960        if is_same_path(&path, context.git_dir) {
10961            continue;
10962        }
10963        let metadata = entry.metadata()?;
10964        let git_path = git_path_append_component(dir_git_path, &file_name);
10965        if context
10966            .ignores
10967            .as_ref()
10968            .is_some_and(|ignores| ignores.is_ignored(&git_path, metadata.is_dir()))
10969        {
10970            if metadata.is_dir()
10971                && context.tracked_paths.is_some_and(|tracked_paths| {
10972                    tracked_paths_may_contain(tracked_paths, &git_path)
10973                })
10974            {
10975                collect_worktree_entries(context, &path, &git_path)?;
10976            }
10977            continue;
10978        }
10979        if metadata.is_dir() {
10980            // A directory staged as a gitlink (mode 160000) is opaque: the walk
10981            // never descends into it. Its worktree "content" is the commit the
10982            // embedded repository has checked out (upstream ce_compare_gitlink):
10983            // a populated submodule reports its HEAD (plus a dirt mask when its
10984            // own tree has modified/untracked content); an unpopulated
10985            // directory — no repository, or no commit checked out — always
10986            // matches the staged oid.
10987            if let Some(index_entry) = context
10988                .stat_cache
10989                .and_then(|cache| cache.gitlink_entry(&git_path))
10990            {
10991                context.mark_tracked_present(&git_path);
10992                let oid = sley_diff_merge::gitlink_head_oid(&path, context.format)
10993                    .unwrap_or(index_entry.oid);
10994                let dirt = submodule_dirt(&path);
10995                if dirt != 0 {
10996                    context.submodule_dirt.insert(git_path.clone(), dirt);
10997                }
10998                let tracked = TrackedEntry {
10999                    mode: 0o160000,
11000                    oid,
11001                };
11002                if dirt != 0 || context.should_record_tracked_entry(&git_path, &tracked) {
11003                    context.entries.insert(git_path, tracked);
11004                }
11005                continue;
11006            }
11007            if is_nested_repository_boundary(&path) {
11008                if let Some(tracked_paths) = context.tracked_paths
11009                    && !tracked_paths_may_contain(tracked_paths, &git_path)
11010                {
11011                    continue;
11012                }
11013                context.entries.insert(
11014                    git_path,
11015                    TrackedEntry {
11016                        mode: 0o040000,
11017                        oid: ObjectId::null(context.format),
11018                    },
11019                );
11020                continue;
11021            }
11022            if let Some(tracked_paths) = context.tracked_paths
11023                && !tracked_paths_may_contain(tracked_paths, &git_path)
11024            {
11025                continue;
11026            }
11027            collect_worktree_entries(context, &path, &git_path)?;
11028        } else if metadata.is_file() || metadata.file_type().is_symlink() {
11029            if let Some(tracked_paths) = context.tracked_paths
11030                && !tracked_paths.contains(&git_path)
11031            {
11032                continue;
11033            }
11034            let entry_mode = worktree_entry_mode(&metadata);
11035            // git's racy-git stat shortcut: when the index's cached stat proves
11036            // this file is unchanged since it was staged, reuse the staged oid
11037            // and skip the read+filter+hash entirely. `reuse_tracked_entry`
11038            // returns `Some` ONLY for a non-racy size+mtime+mode match, so a
11039            // modified file always falls through to the full hash below and is
11040            // never silently reported clean.
11041            if let Some(tracked) = context
11042                .stat_cache
11043                .and_then(|cache| cache.reuse_tracked_entry(&git_path, &metadata))
11044            {
11045                context.mark_tracked_present(&git_path);
11046                if context.record_clean_tracked {
11047                    context.entries.insert(git_path, tracked);
11048                }
11049                continue;
11050            }
11051            // A file absent from the index is untracked: status and the
11052            // index-vs-worktree diff report it by *presence* (`??` / nothing), never
11053            // by content, so computing its oid is wasted work — git never hashes
11054            // untracked files. Record presence with a null oid and skip the
11055            // read+filter+hash. Without a stat cache we cannot tell tracked from
11056            // untracked, so fall through and hash as before.
11057            if context
11058                .stat_cache
11059                .is_some_and(|cache| !cache.contains(&git_path))
11060            {
11061                context.entries.insert(
11062                    git_path,
11063                    TrackedEntry {
11064                        mode: entry_mode,
11065                        oid: ObjectId::null(context.format),
11066                    },
11067                );
11068                continue;
11069            }
11070            let body = if metadata.file_type().is_symlink() {
11071                // The blob for a symlink is the raw link target; clean filters
11072                // never apply because git treats symlink content as opaque.
11073                symlink_target_bytes(&path)?
11074            } else {
11075                let body = fs::read(&path)?;
11076                // Resolve this path's attributes against the prebuilt matcher (a cheap
11077                // pattern match) and apply the clean filter -- no per-file matcher
11078                // rebuild. With no attributes/autocrlf configured this is an exact
11079                // passthrough, so the stored OID is unchanged.
11080                let checks =
11081                    context
11082                        .matcher
11083                        .attributes_for_path(&git_path, context.requested, false);
11084                apply_clean_filter_with_attributes(context.config, &checks, &git_path, &body)?
11085            };
11086            let oid = EncodedObject::new(ObjectType::Blob, body).object_id(context.format)?;
11087            let tracked = TrackedEntry {
11088                mode: entry_mode,
11089                oid,
11090            };
11091            if context
11092                .stat_cache
11093                .is_some_and(|cache| cache.contains(&git_path))
11094            {
11095                context.mark_tracked_present(&git_path);
11096                if context.should_record_tracked_entry(&git_path, &tracked) {
11097                    context.entries.insert(git_path, tracked);
11098                }
11099            } else {
11100                context.entries.insert(git_path, tracked);
11101            }
11102        }
11103    }
11104    Ok(())
11105}
11106
11107fn tracked_paths_may_contain(tracked_paths: &BTreeSet<Vec<u8>>, directory: &[u8]) -> bool {
11108    if tracked_paths.contains(directory) {
11109        return true;
11110    }
11111    let mut prefix = Vec::with_capacity(directory.len() + 1);
11112    prefix.extend_from_slice(directory);
11113    prefix.push(b'/');
11114    tracked_paths
11115        .range::<[u8], _>((
11116            std::ops::Bound::Included(prefix.as_slice()),
11117            std::ops::Bound::Unbounded,
11118        ))
11119        .next()
11120        .is_some_and(|path| path.starts_with(&prefix))
11121}
11122
11123fn is_same_path(left: &Path, right: &Path) -> bool {
11124    left == right
11125}
11126
11127/// Whether `path`'s final component is `.git`. Git never lists a `.git` entry at
11128/// any depth (a repository's own `.git`, a submodule gitlink file, or an embedded
11129/// repository's `.git` directory) as untracked content.
11130fn is_dot_git_entry(path: &Path) -> bool {
11131    path.file_name() == Some(std::ffi::OsStr::new(".git"))
11132}
11133
11134/// Whether `path` is a directory containing an embedded repository's `.git`
11135/// *directory*, or a `.git` file whose `gitdir:` pointer resolves to an
11136/// existing directory (a submodule worktree). Git treats both as a repository
11137/// boundary (listing the directory as `dir/`); an *invalid* `.git` file (no
11138/// resolvable `gitdir:` target) is not a boundary — Git descends into the
11139/// directory and lists its other untracked contents normally.
11140fn is_nested_repository_boundary(path: &Path) -> bool {
11141    if path.join(".git").is_dir() {
11142        return true;
11143    }
11144    sley_diff_merge::gitlink_git_dir(path).is_some()
11145}
11146
11147/// Whether `path` is an embedded repository's `.git` directory or a path inside it.
11148fn is_embedded_git_internals(root: &Path, path: &Path) -> bool {
11149    let Ok(relative) = path.strip_prefix(root) else {
11150        return false;
11151    };
11152    let mut current = root.to_path_buf();
11153    for component in relative.components() {
11154        if matches!(component, std::path::Component::Normal(name) if name == ".git")
11155            && current != root
11156            && current.join(".git").is_dir()
11157        {
11158            return true;
11159        }
11160        current.push(component);
11161    }
11162    false
11163}
11164
11165fn worktree_entry_mode(metadata: &fs::Metadata) -> u32 {
11166    if metadata.file_type().is_symlink() {
11167        0o120000
11168    } else if metadata.is_dir() {
11169        0o040000
11170    } else {
11171        file_mode(metadata)
11172    }
11173}
11174
11175fn worktree_path(root: &Path, path: &[u8]) -> Result<PathBuf> {
11176    let text = std::str::from_utf8(path).map_err(|err| GitError::InvalidPath(err.to_string()))?;
11177    let relative = PathBuf::from(text);
11178    if relative.is_absolute()
11179        || relative.components().any(|component| {
11180            matches!(
11181                component,
11182                std::path::Component::ParentDir | std::path::Component::Prefix(_)
11183            )
11184        })
11185    {
11186        return Err(GitError::InvalidPath(format!(
11187            "invalid worktree path {text}"
11188        )));
11189    }
11190    Ok(root.join(relative))
11191}
11192
11193fn remove_worktree_file(root: &Path, path: &[u8]) -> Result<()> {
11194    let file = worktree_path(root, path)?;
11195    if !file.exists() {
11196        return Ok(());
11197    }
11198    if file.is_dir() {
11199        // A tracked path that is a directory on disk is a gitlink: upstream
11200        // checkout/reset never recurses into a submodule's working tree. It
11201        // rmdirs the path when empty (remove_scheduled_dirs) and leaves a
11202        // populated submodule in place.
11203        match fs::remove_dir(&file) {
11204            Ok(()) => prune_empty_parents(root, file.parent())?,
11205            Err(err) if err.kind() == std::io::ErrorKind::DirectoryNotEmpty => {}
11206            Err(err) => return Err(err.into()),
11207        }
11208        return Ok(());
11209    }
11210    fs::remove_file(&file)?;
11211    prune_empty_parents(root, file.parent())?;
11212    Ok(())
11213}
11214
11215fn prune_empty_parents(root: &Path, mut dir: Option<&Path>) -> Result<()> {
11216    while let Some(path) = dir {
11217        if path == root {
11218            break;
11219        }
11220        match fs::remove_dir(path) {
11221            Ok(()) => dir = path.parent(),
11222            Err(err) if err.kind() == std::io::ErrorKind::NotFound => dir = path.parent(),
11223            Err(err) if err.kind() == std::io::ErrorKind::DirectoryNotEmpty => break,
11224            Err(err) => return Err(err.into()),
11225        }
11226    }
11227    Ok(())
11228}
11229
11230fn git_tree_entry_cmp(
11231    left_name: &[u8],
11232    left_mode: u32,
11233    right_name: &[u8],
11234    right_mode: u32,
11235) -> Ordering {
11236    let shared = left_name.len().min(right_name.len());
11237    let name_order = left_name[..shared].cmp(&right_name[..shared]);
11238    if name_order != Ordering::Equal {
11239        return name_order;
11240    }
11241    let left_end = left_name.len() == shared;
11242    let right_end = right_name.len() == shared;
11243    match (left_end, right_end) {
11244        (true, true) => Ordering::Equal,
11245        (true, false) => tree_name_terminator(left_mode).cmp(&right_name[shared]),
11246        (false, true) => left_name[shared].cmp(&tree_name_terminator(right_mode)),
11247        (false, false) => Ordering::Equal,
11248    }
11249}
11250
11251fn tree_name_terminator(mode: u32) -> u8 {
11252    if mode == 0o040000 { b'/' } else { 0 }
11253}
11254
11255#[cfg(unix)]
11256fn file_mode(metadata: &fs::Metadata) -> u32 {
11257    use std::os::unix::fs::PermissionsExt;
11258    if metadata.permissions().mode() & 0o111 != 0 {
11259        0o100755
11260    } else {
11261        0o100644
11262    }
11263}
11264
11265#[cfg(not(unix))]
11266fn file_mode(_metadata: &fs::Metadata) -> u32 {
11267    0o100644
11268}
11269
11270/// The blob content git stores for a symlink: the raw bytes of the link target
11271/// exactly as `readlink(2)` returns them. On Unix the target is an opaque byte
11272/// string, so we take the `OsStr` bytes verbatim (no UTF-8 round-trip, no path
11273/// re-componentization that could rewrite separators).
11274#[cfg(unix)]
11275fn symlink_target_bytes(path: &Path) -> Result<Vec<u8>> {
11276    use std::os::unix::ffi::OsStrExt;
11277    let target = fs::read_link(path)?;
11278    Ok(target.as_os_str().as_bytes().to_vec())
11279}
11280
11281#[cfg(not(unix))]
11282fn symlink_target_bytes(path: &Path) -> Result<Vec<u8>> {
11283    let target = fs::read_link(path)?;
11284    // git normalizes symlink targets to forward slashes on platforms whose
11285    // native separator is `\`.
11286    Ok(target.to_string_lossy().replace('\\', "/").into_bytes())
11287}
11288
11289fn git_path_bytes(path: &Path) -> Result<Vec<u8>> {
11290    if path.components().any(|component| {
11291        matches!(
11292            component,
11293            std::path::Component::ParentDir | std::path::Component::Prefix(_)
11294        )
11295    }) {
11296        return Err(GitError::InvalidPath(format!(
11297            "invalid index path {}",
11298            path.display()
11299        )));
11300    }
11301    Ok(path
11302        .components()
11303        .filter_map(|component| match component {
11304            std::path::Component::Normal(value) => Some(value.to_string_lossy().into_owned()),
11305            _ => None,
11306        })
11307        .collect::<Vec<_>>()
11308        .join("/")
11309        .into_bytes())
11310}
11311
11312fn repo_path_to_os_path(path: &[u8]) -> Result<PathBuf> {
11313    #[cfg(unix)]
11314    {
11315        use std::os::unix::ffi::OsStrExt;
11316
11317        Ok(PathBuf::from(std::ffi::OsStr::from_bytes(path)))
11318    }
11319
11320    #[cfg(not(unix))]
11321    {
11322        let path = std::str::from_utf8(path)
11323            .map_err(|_| GitError::InvalidPath("index path is not utf8".into()))?;
11324        Ok(path.split('/').collect())
11325    }
11326}
11327
11328fn git_path_to_relative_path(path: &[u8]) -> Result<PathBuf> {
11329    let path = std::str::from_utf8(path)
11330        .map_err(|err| GitError::InvalidPath(format!("invalid utf-8 index path: {err}")))?;
11331    Ok(path.split('/').collect())
11332}
11333
11334fn path_has_trailing_separator(path: &Path) -> bool {
11335    path.as_os_str()
11336        .to_string_lossy()
11337        .ends_with(std::path::MAIN_SEPARATOR)
11338}
11339
11340#[cfg(test)]
11341mod tests {
11342    use super::*;
11343    use sley_odb::ObjectReader;
11344    use std::sync::atomic::{AtomicU64, Ordering};
11345
11346    static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
11347
11348    #[test]
11349    fn atomic_metadata_writer_writes_and_reports_stat() {
11350        let root = temp_root();
11351        let path = root.join(".git").join("HEAD");
11352
11353        let result = write_metadata_file_atomic(
11354            &path,
11355            b"ref: refs/heads/main\n",
11356            AtomicMetadataWriteOptions::default(),
11357        )
11358        .expect("write metadata");
11359
11360        assert_eq!(
11361            fs::read(&path).expect("read metadata"),
11362            b"ref: refs/heads/main\n"
11363        );
11364        assert_eq!(result.path, path);
11365        assert_eq!(result.len, b"ref: refs/heads/main\n".len() as u64);
11366        assert!(result.mtime.is_some());
11367        assert!(!path.with_file_name("HEAD.lock").exists());
11368        fs::remove_dir_all(root).expect("test operation should succeed");
11369    }
11370
11371    #[test]
11372    fn atomic_metadata_writer_existing_lock_preserves_original() {
11373        let root = temp_root();
11374        let git_dir = root.join(".git");
11375        fs::create_dir_all(&git_dir).expect("create git dir");
11376        let path = git_dir.join("HEAD");
11377        let lock = git_dir.join("HEAD.lock");
11378        fs::write(&path, b"ref: refs/heads/main\n").expect("write original");
11379        fs::write(&lock, b"held\n").expect("write lock");
11380
11381        let err = write_metadata_file_atomic(
11382            &path,
11383            b"ref: refs/heads/other\n",
11384            AtomicMetadataWriteOptions::default(),
11385        )
11386        .expect_err("held lock must fail");
11387
11388        assert!(matches!(err, GitError::Transaction(_)));
11389        assert_eq!(
11390            fs::read(&path).expect("read original"),
11391            b"ref: refs/heads/main\n"
11392        );
11393        assert_eq!(fs::read(&lock).expect("read lock"), b"held\n");
11394        fs::remove_dir_all(root).expect("test operation should succeed");
11395    }
11396
11397    // --- `ls-files --eol` stat/attr helpers (mirror convert.c) ---------------
11398
11399    #[test]
11400    fn convert_stats_ascii_classifies_eol_content() {
11401        assert_eq!(convert_stats_ascii(b""), "none");
11402        assert_eq!(convert_stats_ascii(b"abc"), "none");
11403        assert_eq!(convert_stats_ascii(b"a\nb\n"), "lf");
11404        assert_eq!(convert_stats_ascii(b"a\r\nb\r\n"), "crlf");
11405        assert_eq!(convert_stats_ascii(b"a\r\nb\n"), "mixed");
11406        // A lone CR makes the content binary (-text), matching git.
11407        assert_eq!(convert_stats_ascii(b"a\rb"), "-text");
11408        // A NUL byte is binary.
11409        assert_eq!(convert_stats_ascii(b"a\0b\n"), "-text");
11410        // A trailing ^Z (EOF) is not counted as non-printable.
11411        assert_eq!(convert_stats_ascii(b"abc\n\x1a"), "lf");
11412    }
11413
11414    fn attr_check(name: &[u8], state: Option<AttributeState>) -> AttributeCheck {
11415        AttributeCheck {
11416            attribute: name.to_vec(),
11417            state,
11418        }
11419    }
11420
11421    #[test]
11422    fn convert_attr_ascii_matches_git_attr_action() {
11423        // No attributes at all: empty attr field.
11424        assert_eq!(convert_attr_ascii(&[]), "");
11425        // text (set) -> "text"; -text (unset) -> "-text".
11426        assert_eq!(
11427            convert_attr_ascii(&[attr_check(b"text", Some(AttributeState::Set))]),
11428            "text"
11429        );
11430        assert_eq!(
11431            convert_attr_ascii(&[attr_check(b"text", Some(AttributeState::Unset))]),
11432            "-text"
11433        );
11434        // text=auto -> "text=auto"; with eol=crlf/lf the AUTO variants.
11435        assert_eq!(
11436            convert_attr_ascii(&[attr_check(
11437                b"text",
11438                Some(AttributeState::Value(b"auto".to_vec()))
11439            )]),
11440            "text=auto"
11441        );
11442        assert_eq!(
11443            convert_attr_ascii(&[
11444                attr_check(b"text", Some(AttributeState::Value(b"auto".to_vec()))),
11445                attr_check(b"eol", Some(AttributeState::Value(b"crlf".to_vec()))),
11446            ]),
11447            "text=auto eol=crlf"
11448        );
11449        assert_eq!(
11450            convert_attr_ascii(&[
11451                attr_check(b"text", Some(AttributeState::Value(b"auto".to_vec()))),
11452                attr_check(b"eol", Some(AttributeState::Value(b"lf".to_vec()))),
11453            ]),
11454            "text=auto eol=lf"
11455        );
11456        // eol=crlf/lf alone (no text) forces text + the eol direction.
11457        assert_eq!(
11458            convert_attr_ascii(&[attr_check(
11459                b"eol",
11460                Some(AttributeState::Value(b"crlf".to_vec()))
11461            )]),
11462            "text eol=crlf"
11463        );
11464        assert_eq!(
11465            convert_attr_ascii(&[attr_check(
11466                b"eol",
11467                Some(AttributeState::Value(b"lf".to_vec()))
11468            )]),
11469            "text eol=lf"
11470        );
11471        // -text overrides any eol attribute (binary wins).
11472        assert_eq!(
11473            convert_attr_ascii(&[
11474                attr_check(b"text", Some(AttributeState::Unset)),
11475                attr_check(b"eol", Some(AttributeState::Value(b"crlf".to_vec()))),
11476            ]),
11477            "-text"
11478        );
11479    }
11480
11481    #[test]
11482    fn smudge_safety_guard_skips_irreversible_autocrlf() {
11483        // text=auto eol=crlf (AUTO_CRLF): convert pure-LF, but leave content
11484        // alone when it already has a CR or CRLF, or is binary.
11485        let auto = ContentFilterPlan {
11486            text: TextDecision::Auto,
11487            eol: EolConversion::Crlf,
11488            driver: None,
11489        };
11490        assert!(auto.will_convert_lf_to_crlf(b"a\nb\n"));
11491        assert!(!auto.will_convert_lf_to_crlf(b"a\r\nb\n")); // has CRLF
11492        assert!(!auto.will_convert_lf_to_crlf(b"a\nb\rc")); // lone CR (binary)
11493        assert!(!auto.will_convert_lf_to_crlf(b"abc")); // no naked LF
11494
11495        // text eol=crlf (TEXT_CRLF): no safety guard — always convert naked LF
11496        // even when a CR/CRLF is already present.
11497        let text = ContentFilterPlan {
11498            text: TextDecision::Text,
11499            eol: EolConversion::Crlf,
11500            driver: None,
11501        };
11502        assert!(text.will_convert_lf_to_crlf(b"a\r\nb\nc\n"));
11503        assert!(!text.will_convert_lf_to_crlf(b"a\r\nb\r\n")); // no naked LF
11504    }
11505
11506    /// Build an in-memory ignore matcher from raw `.gitignore` lines (no disk).
11507    fn ignore_matcher(patterns: &[&[u8]]) -> IgnoreMatcher {
11508        let mut matcher = IgnoreMatcher::default();
11509        let owned: Vec<Vec<u8>> = patterns.iter().map(|p| p.to_vec()).collect();
11510        matcher.extend_patterns(&owned);
11511        matcher
11512    }
11513
11514    #[test]
11515    fn ignore_match_kind_fast_paths_match_the_wildcard_engine() {
11516        // Literal: exact basename anywhere; not a superstring.
11517        let matcher = ignore_matcher(&[b"Pods"]);
11518        assert!(matcher.is_ignored(b"a/b/Pods", true));
11519        assert!(matcher.is_ignored(b"Pods", false));
11520        assert!(!matcher.is_ignored(b"Pods_not", false));
11521        assert!(matches!(
11522            classify_ignore_pattern(b"Pods"),
11523            MatchKind::Literal
11524        ));
11525
11526        // Suffix `*.log`: basename ending in `.log` at any depth.
11527        let matcher = ignore_matcher(&[b"*.log"]);
11528        assert!(matcher.is_ignored(b"x.log", false));
11529        assert!(matcher.is_ignored(b"a/b/x.log", false));
11530        assert!(matcher.is_ignored(b".log", false));
11531        assert!(!matcher.is_ignored(b"x.logx", false));
11532        assert!(matches!(
11533            classify_ignore_pattern(b"*.log"),
11534            MatchKind::Suffix
11535        ));
11536
11537        // Prefix `build*`: basename starting with `build`.
11538        let matcher = ignore_matcher(&[b"build*"]);
11539        assert!(matcher.is_ignored(b"buildfoo", false));
11540        assert!(matcher.is_ignored(b"a/build", false));
11541        assert!(!matcher.is_ignored(b"xbuild", false));
11542        assert!(matches!(
11543            classify_ignore_pattern(b"build*"),
11544            MatchKind::Prefix
11545        ));
11546    }
11547
11548    #[test]
11549    fn ignore_anchored_suffix_does_not_cross_slash() {
11550        // `/*.log` is anchored: matches `.log` files only at the matcher base,
11551        // never in a subdirectory — the slash guard in `match_segment`.
11552        let matcher = ignore_matcher(&[b"/*.log"]);
11553        assert!(matcher.is_ignored(b"x.log", false));
11554        assert!(!matcher.is_ignored(b"sub/x.log", false));
11555
11556        // Anchored literal likewise only matches at root.
11557        let matcher = ignore_matcher(&[b"/foo"]);
11558        assert!(matcher.is_ignored(b"foo", false));
11559        assert!(!matcher.is_ignored(b"a/foo", false));
11560    }
11561
11562    #[test]
11563    fn ignore_anchored_directory_glob_matches_root_directory() {
11564        let matcher = ignore_matcher(&[b"/tmp-*/"]);
11565        assert!(matcher.is_ignored(b"tmp-info-only", true));
11566        assert!(matcher.is_ignored(b"tmp-info-only/file.txt", false));
11567        assert!(!matcher.is_ignored(b"nested/tmp-info-only", true));
11568        assert!(!matcher.is_ignored(b"tmp-info-only", false));
11569    }
11570
11571    #[test]
11572    fn ignore_negated_directory_glob_does_not_reinclude_files() {
11573        // t0008-ignores "directories and ** matches": a negated directory-only
11574        // pattern re-includes *directories* but never the *files* inside them
11575        // (git: re-including a dir with `!dir/` still needs an explicit
11576        // `!dir/*` to reach its files). Verified against git 2.54 check-ignore:
11577        //   data/file              -> data/**           (ignored)
11578        //   data/data1/file1       -> data/**           (ignored, NOT !data/**/)
11579        //   data/data1/file1.txt   -> !data/**/*.txt    (re-included)
11580        //   data/data1   (dir)     -> !data/**/         (re-included)
11581        let matcher = ignore_matcher(&[b"data/**", b"!data/**/", b"!data/**/*.txt"]);
11582        // Files stay ignored: `!data/**/` must not win the file leaf scan.
11583        assert!(matcher.is_ignored(b"data/file", false));
11584        assert!(matcher.is_ignored(b"data/data1/file1", false));
11585        assert!(matcher.is_ignored(b"data/data2/file2", false));
11586        // `.txt` files are re-included by the explicit non-dir negation.
11587        assert!(!matcher.is_ignored(b"data/data1/file1.txt", false));
11588        assert!(!matcher.is_ignored(b"data/data2/file2.txt", false));
11589        // Directories ARE re-included by `!data/**/` (the directory-glob gain
11590        // from `fix: match git status ignored directory globs`).
11591        assert!(!matcher.is_ignored(b"data/data1", true));
11592        assert!(!matcher.is_ignored(b"data/data2", true));
11593    }
11594
11595    #[test]
11596    fn ignore_double_star_prefix_collapses_to_basename() {
11597        // `**/X` ≡ `X` for slash-free X (verified against `git check-ignore`).
11598        let matcher = ignore_matcher(&[b"**/Pods"]);
11599        assert!(matcher.is_ignored(b"a/b/Pods", true));
11600        assert!(matcher.is_ignored(b"Pods", true));
11601        assert!(!matcher.is_ignored(b"Pods_not", false));
11602
11603        let matcher = ignore_matcher(&[b"**/*.jks"]);
11604        assert!(matcher.is_ignored(b"x.jks", false));
11605        assert!(matcher.is_ignored(b"a/deep/y.jks", false));
11606        assert!(!matcher.is_ignored(b"x.jksx", false));
11607
11608        // `**/A/B` keeps a slash in the tail, so it stays a real glob and must
11609        // match the trailing path at any depth.
11610        let matcher = ignore_matcher(&[b"**/Flutter/ephemeral"]);
11611        assert!(matcher.is_ignored(b"Flutter/ephemeral", true));
11612        assert!(matcher.is_ignored(b"a/Flutter/ephemeral", true));
11613        assert!(!matcher.is_ignored(b"Flutter/other", true));
11614    }
11615
11616    #[test]
11617    fn ignore_complex_globs_still_use_the_engine() {
11618        let matcher = ignore_matcher(&[b"*.[Cc]ache"]);
11619        assert!(matcher.is_ignored(b"x.cache", false));
11620        assert!(matcher.is_ignored(b"x.Cache", false));
11621        assert!(!matcher.is_ignored(b"x.xache", false));
11622        assert!(matches!(
11623            classify_ignore_pattern(b"*.[Cc]ache"),
11624            MatchKind::Glob
11625        ));
11626
11627        let matcher = ignore_matcher(&[b"Icon?"]);
11628        assert!(matcher.is_ignored(b"IconA", false));
11629        assert!(!matcher.is_ignored(b"Icon", false));
11630        assert!(!matcher.is_ignored(b"IconAB", false));
11631
11632        // Multi-star is not a simple prefix/suffix.
11633        assert!(matches!(
11634            classify_ignore_pattern(b"app.*.symbols"),
11635            MatchKind::Glob
11636        ));
11637        assert!(matches!(classify_ignore_pattern(b"a*b*c"), MatchKind::Glob));
11638    }
11639
11640    #[test]
11641    fn ignore_negation_still_applies_after_fast_paths() {
11642        // Last match wins: a negated literal un-ignores a suffix-matched file.
11643        let matcher = ignore_matcher(&[b"*.log", b"!keep.log"]);
11644        assert!(matcher.is_ignored(b"a/x.log", false));
11645        assert!(!matcher.is_ignored(b"a/keep.log", false));
11646    }
11647
11648    #[test]
11649    fn read_expected_object_missing_blob_exposes_oid_and_kind() {
11650        let root = temp_root();
11651        let git_dir = root.join(".git");
11652        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
11653        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
11654        let missing = ObjectId::empty_blob(ObjectFormat::Sha1);
11655
11656        let err = read_expected_object(&db, &missing, ObjectType::Blob)
11657            .expect_err("missing blob should error");
11658        let kind = err.not_found_kind().expect("typed not found");
11659        assert_eq!(kind.object_id(), Some(missing));
11660        assert_eq!(kind.missing_object_kind(), Some(MissingObjectKind::Blob));
11661        assert_eq!(
11662            kind.missing_object_context(),
11663            Some(MissingObjectContext::WorktreeMaterialize)
11664        );
11665        fs::remove_dir_all(root).expect("test operation should succeed");
11666    }
11667
11668    #[test]
11669    fn update_index_adds_file_entry_and_blob() {
11670        let root = temp_root();
11671        let git_dir = root.join(".git");
11672        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
11673        fs::write(root.join("hello.txt"), b"hello\n").expect("test operation should succeed");
11674        let result = add_paths_to_index(
11675            &root,
11676            &git_dir,
11677            ObjectFormat::Sha1,
11678            &[PathBuf::from("hello.txt")],
11679        )
11680        .expect("test operation should succeed");
11681        assert_eq!(result.entries, 1);
11682        let index = Index::parse_v2_sha1(
11683            &fs::read(repository_index_path(git_dir)).expect("test operation should succeed"),
11684        )
11685        .expect("test operation should succeed");
11686        assert_eq!(index.entries[0].path, b"hello.txt");
11687        fs::remove_dir_all(root).expect("test operation should succeed");
11688    }
11689
11690    #[test]
11691    fn update_index_and_write_tree_support_sha256() {
11692        let root = temp_root();
11693        let git_dir = root.join(".git");
11694        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
11695        fs::write(root.join("hello.txt"), b"hello\n").expect("test operation should succeed");
11696        let result = add_paths_to_index(
11697            &root,
11698            &git_dir,
11699            ObjectFormat::Sha256,
11700            &[PathBuf::from("hello.txt")],
11701        )
11702        .expect("test operation should succeed");
11703        assert_eq!(result.entries, 1);
11704
11705        let index = Index::parse(
11706            &fs::read(repository_index_path(&git_dir)).expect("test operation should succeed"),
11707            ObjectFormat::Sha256,
11708        )
11709        .expect("test operation should succeed");
11710        assert_eq!(index.entries[0].path, b"hello.txt");
11711        assert_eq!(index.entries[0].oid.format(), ObjectFormat::Sha256);
11712
11713        let tree_oid = write_tree_from_index(&git_dir, ObjectFormat::Sha256)
11714            .expect("test operation should succeed");
11715        assert_eq!(tree_oid.format(), ObjectFormat::Sha256);
11716        let odb = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
11717        let tree = odb
11718            .read_object(&tree_oid)
11719            .expect("test operation should succeed");
11720        assert_eq!(tree.object_type, ObjectType::Tree);
11721        fs::remove_dir_all(root).expect("test operation should succeed");
11722    }
11723
11724    #[test]
11725    fn write_tree_from_index_writes_nested_tree_objects() {
11726        let root = temp_root();
11727        let git_dir = root.join(".git");
11728        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
11729        fs::create_dir_all(root.join("src")).expect("test operation should succeed");
11730        fs::write(root.join("README.md"), b"readme\n").expect("test operation should succeed");
11731        fs::write(root.join("src").join("lib.rs"), b"pub fn demo() {}\n")
11732            .expect("test operation should succeed");
11733        let result = add_paths_to_index(
11734            &root,
11735            &git_dir,
11736            ObjectFormat::Sha1,
11737            &[PathBuf::from("README.md"), PathBuf::from("src/lib.rs")],
11738        )
11739        .expect("test operation should succeed");
11740        assert_eq!(result.entries, 2);
11741        let tree_oid = write_tree_from_index(&git_dir, ObjectFormat::Sha1)
11742            .expect("test operation should succeed");
11743        let odb = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
11744        let tree = odb
11745            .read_object(&tree_oid)
11746            .expect("test operation should succeed");
11747        assert_eq!(tree.object_type, ObjectType::Tree);
11748        fs::remove_dir_all(root).expect("test operation should succeed");
11749    }
11750
11751    #[test]
11752    fn short_status_reports_added_and_untracked_paths() {
11753        let root = temp_root();
11754        let git_dir = root.join(".git");
11755        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
11756        fs::write(root.join("hello.txt"), b"hello\n").expect("test operation should succeed");
11757        fs::write(root.join("extra.txt"), b"extra\n").expect("test operation should succeed");
11758        add_paths_to_index(
11759            &root,
11760            &git_dir,
11761            ObjectFormat::Sha1,
11762            &[PathBuf::from("hello.txt")],
11763        )
11764        .expect("test operation should succeed");
11765        let status = short_status(&root, &git_dir, ObjectFormat::Sha1)
11766            .expect("test operation should succeed");
11767        assert_eq!(
11768            status
11769                .iter()
11770                .map(ShortStatusEntry::line)
11771                .collect::<Vec<_>>(),
11772            vec!["A  hello.txt", "?? extra.txt"]
11773        );
11774        fs::remove_dir_all(root).expect("test operation should succeed");
11775    }
11776
11777    #[test]
11778    fn worktree_root_is_none_for_bare_repository() {
11779        // A bare git_dir (basename `.git`) with `core.bare = true` must resolve to
11780        // `Ok(None)` rather than falling through to the "parent of .git" case.
11781        let root = temp_root();
11782        let git_dir = root.join(".git");
11783        fs::create_dir_all(&git_dir).expect("create bare git dir");
11784        // Hermetic minimal config — do not depend on host gitconfig.
11785        fs::write(git_dir.join("config"), b"[core]\n\tbare = true\n").expect("write bare config");
11786
11787        assert_eq!(
11788            worktree_root_for_git_dir(&git_dir).expect("resolve bare worktree root"),
11789            None,
11790            "a bare repository has no working tree"
11791        );
11792
11793        fs::remove_dir_all(root).expect("test operation should succeed");
11794    }
11795
11796    #[test]
11797    fn worktree_root_is_parent_for_non_bare_dot_git() {
11798        // A non-bare `.git` directory (no core.bare / core.bare = false) still
11799        // resolves to its parent — the ordinary non-bare layout.
11800        let root = temp_root();
11801        let work = root.join("work");
11802        let git_dir = work.join(".git");
11803        fs::create_dir_all(&git_dir).expect("create non-bare git dir");
11804        fs::write(git_dir.join("config"), b"[core]\n\tbare = false\n")
11805            .expect("write non-bare config");
11806
11807        assert_eq!(
11808            worktree_root_for_git_dir(&git_dir).expect("resolve non-bare worktree root"),
11809            Some(work.clone()),
11810            "a non-bare .git dir resolves to its parent"
11811        );
11812
11813        fs::remove_dir_all(root).expect("test operation should succeed");
11814    }
11815
11816    fn temp_root() -> PathBuf {
11817        let path = std::env::temp_dir().join(format!(
11818            "sley-worktree-{}-{}",
11819            std::process::id(),
11820            TEMP_COUNTER.fetch_add(1, Ordering::Relaxed)
11821        ));
11822        fs::create_dir_all(&path).expect("test operation should succeed");
11823        path
11824    }
11825
11826    fn index_entry_for<'a>(index: &'a Index, path: &[u8]) -> &'a IndexEntry {
11827        index
11828            .entries
11829            .iter()
11830            .find(|entry| entry.path == path)
11831            .unwrap_or_else(|| panic!("missing index entry for {}", String::from_utf8_lossy(path)))
11832    }
11833
11834    fn read_index(git_dir: &Path) -> Index {
11835        Index::parse(
11836            &fs::read(repository_index_path(git_dir)).expect("test operation should succeed"),
11837            ObjectFormat::Sha1,
11838        )
11839        .expect("test operation should succeed")
11840    }
11841
11842    /// Stages `paths` from the worktree, writes their tree, wraps it in a commit
11843    /// object, and points `refs/heads/main` + `HEAD` at it. Returns the commit
11844    /// id. After this call the index reflects the committed tree.
11845    fn build_commit(root: &Path, git_dir: &Path, paths: &[&str]) -> ObjectId {
11846        let path_bufs = paths.iter().map(PathBuf::from).collect::<Vec<_>>();
11847        add_paths_to_index(root, git_dir, ObjectFormat::Sha1, &path_bufs)
11848            .expect("test operation should succeed");
11849        let tree = write_tree_from_index(git_dir, ObjectFormat::Sha1)
11850            .expect("test operation should succeed");
11851        let mut body = Vec::new();
11852        body.extend_from_slice(format!("tree {tree}\n").as_bytes());
11853        body.extend_from_slice(b"author Test <test@example.com> 0 +0000\n");
11854        body.extend_from_slice(b"committer Test <test@example.com> 0 +0000\n");
11855        body.extend_from_slice(b"\n");
11856        body.extend_from_slice(b"sparse fixture\n");
11857        let odb = FileObjectDatabase::from_git_dir(git_dir, ObjectFormat::Sha1);
11858        let commit = odb
11859            .write_object(EncodedObject::new(ObjectType::Commit, body))
11860            .expect("test operation should succeed");
11861        let refs = FileRefStore::new(git_dir, ObjectFormat::Sha1);
11862        let mut tx = refs.transaction();
11863        tx.update(RefUpdate {
11864            name: "refs/heads/main".into(),
11865            expected: None,
11866            new: RefTarget::Direct(commit),
11867            reflog: None,
11868        });
11869        tx.update(RefUpdate {
11870            name: "HEAD".into(),
11871            expected: None,
11872            new: RefTarget::Symbolic("refs/heads/main".into()),
11873            reflog: None,
11874        });
11875        tx.commit().expect("test operation should succeed");
11876        commit
11877    }
11878
11879    fn full_sparse(patterns: &[&[u8]]) -> SparseCheckout {
11880        SparseCheckout {
11881            patterns: patterns.iter().map(|pattern| pattern.to_vec()).collect(),
11882            sparse_index: false,
11883        }
11884    }
11885
11886    #[test]
11887    fn apply_sparse_checkout_full_mode_skips_out_of_cone_paths() {
11888        let root = temp_root();
11889        let git_dir = root.join(".git");
11890        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
11891        fs::create_dir_all(root.join("in")).expect("test operation should succeed");
11892        fs::create_dir_all(root.join("out")).expect("test operation should succeed");
11893        fs::write(root.join("in").join("keep.txt"), b"keep\n")
11894            .expect("test operation should succeed");
11895        fs::write(root.join("out").join("drop.txt"), b"drop\n")
11896            .expect("test operation should succeed");
11897        fs::write(root.join("top.txt"), b"top\n").expect("test operation should succeed");
11898        build_commit(&root, &git_dir, &["in/keep.txt", "out/drop.txt", "top.txt"]);
11899
11900        // Full (non-cone) pattern: keep only the `in/` subtree.
11901        let sparse = full_sparse(&[b"/in/"]);
11902        let result = apply_sparse_checkout_with_mode(
11903            &root,
11904            &git_dir,
11905            ObjectFormat::Sha1,
11906            &sparse,
11907            SparseCheckoutMode::Full,
11908        )
11909        .expect("test operation should succeed");
11910
11911        assert!(root.join("in").join("keep.txt").exists());
11912        assert!(!root.join("out").join("drop.txt").exists());
11913        assert!(!root.join("top.txt").exists());
11914        assert!(result.materialized.contains(&b"in/keep.txt".to_vec()));
11915        assert!(result.skipped.contains(&b"out/drop.txt".to_vec()));
11916        assert!(result.skipped.contains(&b"top.txt".to_vec()));
11917
11918        let index = read_index(&git_dir);
11919        assert!(!index_entry_skip_worktree(index_entry_for(
11920            &index,
11921            b"in/keep.txt"
11922        )));
11923        assert!(index_entry_skip_worktree(index_entry_for(
11924            &index,
11925            b"out/drop.txt"
11926        )));
11927        assert!(index_entry_skip_worktree(index_entry_for(
11928            &index, b"top.txt"
11929        )));
11930        // Out-of-cone entries are preserved in the index, just not on disk.
11931        assert_eq!(index.entries.len(), 3);
11932        fs::remove_dir_all(root).expect("test operation should succeed");
11933    }
11934
11935    #[test]
11936    fn apply_sparse_checkout_toggle_rematerializes() {
11937        let root = temp_root();
11938        let git_dir = root.join(".git");
11939        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
11940        fs::create_dir_all(root.join("a")).expect("test operation should succeed");
11941        fs::create_dir_all(root.join("b")).expect("test operation should succeed");
11942        fs::write(root.join("a").join("file.txt"), b"a\n").expect("test operation should succeed");
11943        fs::write(root.join("b").join("file.txt"), b"b\n").expect("test operation should succeed");
11944        build_commit(&root, &git_dir, &["a/file.txt", "b/file.txt"]);
11945
11946        // First narrow to `a/`.
11947        apply_sparse_checkout_with_mode(
11948            &root,
11949            &git_dir,
11950            ObjectFormat::Sha1,
11951            &full_sparse(&[b"/a/"]),
11952            SparseCheckoutMode::Full,
11953        )
11954        .expect("test operation should succeed");
11955        assert!(root.join("a").join("file.txt").exists());
11956        assert!(!root.join("b").join("file.txt").exists());
11957        let index = read_index(&git_dir);
11958        assert!(index_entry_skip_worktree(index_entry_for(
11959            &index,
11960            b"b/file.txt"
11961        )));
11962
11963        // Now switch the cone to `b/`: `a/` must leave, `b/` must come back with
11964        // the correct content, and the skip-worktree bits must flip.
11965        apply_sparse_checkout_with_mode(
11966            &root,
11967            &git_dir,
11968            ObjectFormat::Sha1,
11969            &full_sparse(&[b"/b/"]),
11970            SparseCheckoutMode::Full,
11971        )
11972        .expect("test operation should succeed");
11973        assert!(!root.join("a").join("file.txt").exists());
11974        assert!(root.join("b").join("file.txt").exists());
11975        assert_eq!(
11976            fs::read(root.join("b").join("file.txt")).expect("test operation should succeed"),
11977            b"b\n"
11978        );
11979        let index = read_index(&git_dir);
11980        assert!(index_entry_skip_worktree(index_entry_for(
11981            &index,
11982            b"a/file.txt"
11983        )));
11984        assert!(!index_entry_skip_worktree(index_entry_for(
11985            &index,
11986            b"b/file.txt"
11987        )));
11988        fs::remove_dir_all(root).expect("test operation should succeed");
11989    }
11990
11991    #[test]
11992    fn apply_sparse_checkout_cone_mode_matches_directory_prefixes() {
11993        let root = temp_root();
11994        let git_dir = root.join(".git");
11995        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
11996        fs::create_dir_all(root.join("kept").join("nested"))
11997            .expect("test operation should succeed");
11998        fs::create_dir_all(root.join("other")).expect("test operation should succeed");
11999        fs::write(root.join("kept").join("a.txt"), b"a\n").expect("test operation should succeed");
12000        fs::write(root.join("kept").join("nested").join("b.txt"), b"b\n")
12001            .expect("test operation should succeed");
12002        fs::write(root.join("other").join("c.txt"), b"c\n").expect("test operation should succeed");
12003        fs::write(root.join("root.txt"), b"r\n").expect("test operation should succeed");
12004        build_commit(
12005            &root,
12006            &git_dir,
12007            &["kept/a.txt", "kept/nested/b.txt", "other/c.txt", "root.txt"],
12008        );
12009
12010        // Standard cone patterns: top-level files plus the whole `kept/` tree.
12011        let sparse = SparseCheckout {
12012            patterns: vec![b"/*".to_vec(), b"!/*/".to_vec(), b"/kept/".to_vec()],
12013            sparse_index: false,
12014        };
12015        // Auto mode should detect cone shape on its own.
12016        assert!(patterns_are_cone(&sparse.patterns));
12017        apply_sparse_checkout(&root, &git_dir, ObjectFormat::Sha1, &sparse)
12018            .expect("test operation should succeed");
12019
12020        assert!(root.join("root.txt").exists());
12021        assert!(root.join("kept").join("a.txt").exists());
12022        assert!(root.join("kept").join("nested").join("b.txt").exists());
12023        assert!(!root.join("other").join("c.txt").exists());
12024
12025        let index = read_index(&git_dir);
12026        assert!(!index_entry_skip_worktree(index_entry_for(
12027            &index,
12028            b"root.txt"
12029        )));
12030        assert!(!index_entry_skip_worktree(index_entry_for(
12031            &index,
12032            b"kept/a.txt"
12033        )));
12034        assert!(!index_entry_skip_worktree(index_entry_for(
12035            &index,
12036            b"kept/nested/b.txt"
12037        )));
12038        assert!(index_entry_skip_worktree(index_entry_for(
12039            &index,
12040            b"other/c.txt"
12041        )));
12042        fs::remove_dir_all(root).expect("test operation should succeed");
12043    }
12044
12045    #[test]
12046    fn apply_sparse_checkout_honors_preexisting_skip_worktree_via_idempotence() {
12047        let root = temp_root();
12048        let git_dir = root.join(".git");
12049        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
12050        fs::create_dir_all(root.join("in")).expect("test operation should succeed");
12051        fs::create_dir_all(root.join("out")).expect("test operation should succeed");
12052        fs::write(root.join("in").join("keep.txt"), b"keep\n")
12053            .expect("test operation should succeed");
12054        fs::write(root.join("out").join("drop.txt"), b"drop\n")
12055            .expect("test operation should succeed");
12056        build_commit(&root, &git_dir, &["in/keep.txt", "out/drop.txt"]);
12057
12058        let sparse = full_sparse(&[b"/in/"]);
12059        apply_sparse_checkout_with_mode(
12060            &root,
12061            &git_dir,
12062            ObjectFormat::Sha1,
12063            &sparse,
12064            SparseCheckoutMode::Full,
12065        )
12066        .expect("test operation should succeed");
12067        assert!(!root.join("out").join("drop.txt").exists());
12068
12069        // Re-applying the same spec is a no-op: the already-skipped file stays
12070        // absent and the bit stays set (we do not resurrect it).
12071        let result = apply_sparse_checkout_with_mode(
12072            &root,
12073            &git_dir,
12074            ObjectFormat::Sha1,
12075            &sparse,
12076            SparseCheckoutMode::Full,
12077        )
12078        .expect("test operation should succeed");
12079        assert!(!root.join("out").join("drop.txt").exists());
12080        assert!(root.join("in").join("keep.txt").exists());
12081        assert!(result.skipped.contains(&b"out/drop.txt".to_vec()));
12082        let index = read_index(&git_dir);
12083        assert!(index_entry_skip_worktree(index_entry_for(
12084            &index,
12085            b"out/drop.txt"
12086        )));
12087        fs::remove_dir_all(root).expect("test operation should succeed");
12088    }
12089
12090    #[test]
12091    fn checkout_detached_sparse_only_writes_in_cone_paths() {
12092        let root = temp_root();
12093        let git_dir = root.join(".git");
12094        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
12095        fs::create_dir_all(root.join("keep")).expect("test operation should succeed");
12096        fs::create_dir_all(root.join("skip")).expect("test operation should succeed");
12097        fs::write(root.join("keep").join("a.txt"), b"a\n").expect("test operation should succeed");
12098        fs::write(root.join("skip").join("b.txt"), b"b\n").expect("test operation should succeed");
12099        let commit = build_commit(&root, &git_dir, &["keep/a.txt", "skip/b.txt"]);
12100
12101        // The worktree is clean and matches the commit. A sparse checkout must
12102        // keep the in-cone file and evict the out-of-cone one.
12103        let sparse = full_sparse(&[b"/keep/"]);
12104        let result = checkout_detached_sparse(
12105            &root,
12106            &git_dir,
12107            ObjectFormat::Sha1,
12108            &commit,
12109            b"Test <test@example.com> 0 +0000".to_vec(),
12110            b"checkout".to_vec(),
12111            &sparse,
12112        )
12113        .expect("test operation should succeed");
12114        assert_eq!(result.files, 2);
12115
12116        assert!(root.join("keep").join("a.txt").exists());
12117        assert_eq!(
12118            fs::read(root.join("keep").join("a.txt")).expect("test operation should succeed"),
12119            b"a\n"
12120        );
12121        assert!(!root.join("skip").join("b.txt").exists());
12122
12123        let index = read_index(&git_dir);
12124        assert_eq!(index.entries.len(), 2);
12125        assert!(!index_entry_skip_worktree(index_entry_for(
12126            &index,
12127            b"keep/a.txt"
12128        )));
12129        let skipped = index_entry_for(&index, b"skip/b.txt");
12130        assert!(index_entry_skip_worktree(skipped));
12131        // The skipped entry still carries the committed blob id and mode.
12132        assert_eq!(skipped.mode, 0o100644);
12133        fs::remove_dir_all(root).expect("test operation should succeed");
12134    }
12135
12136    // ----- content filtering: EOL / autocrlf + clean/smudge drivers -----
12137
12138    /// Build a [`GitConfig`] from raw config text.
12139    fn config_from(text: &str) -> GitConfig {
12140        GitConfig::parse(text.as_bytes()).expect("test operation should succeed")
12141    }
12142
12143    /// Conformance grid for git's `output_eol(crlf_action)` decision table
12144    /// (convert.c) on the smudge side, exercised across the same
12145    /// attr × autocrlf × eol × content matrix as upstream t0027/t0026.
12146    ///
12147    /// Each row asserts the smudge output for a representative content shape.
12148    /// The cases that historically under-converted are the non-`auto` `text`
12149    /// paths (the auto-only safety guard must NOT fire) and the
12150    /// `autocrlf=true overrides core.eol` precedence rows.
12151    #[test]
12152    fn smudge_output_eol_decision_table() {
12153        // Naked-LF-only blob (the canonical "should gain CRLF" case).
12154        const LF: &[u8] = b"a\nb\nc\n";
12155        // Mixed CRLF + naked LF: a non-auto crlf action converts the naked LFs
12156        // to CRLF (whole file becomes CRLF); an auto action leaves it untouched.
12157        const CRLF_MIX_LF: &[u8] = b"a\r\nb\nc\r\n";
12158        // Naked LF plus a lone CR: non-auto converts LFs, keeping the lone CR.
12159        const LF_MIX_CR: &[u8] = b"a\nb\rc\n";
12160
12161        let smudge = |cfg: &str, attrline: Option<&[u8]>, input: &[u8]| -> Vec<u8> {
12162            let config = config_from(cfg);
12163            let checks = match attrline {
12164                Some(line) => {
12165                    let mut matcher = AttributeMatcher::default();
12166                    read_attribute_patterns_from_bytes(line, &mut matcher, &[]);
12167                    matcher.attributes_for_path(b"f.txt", &filter_attribute_names(), false)
12168                }
12169                None => Vec::new(),
12170            };
12171            apply_smudge_filter_with_attributes(&config, &checks, b"f.txt", input)
12172                .expect("smudge must succeed")
12173        };
12174
12175        // --- attr=text (CRLF_TEXT_*): non-auto, the safety guard must not fire.
12176        // text + eol=crlf => CRLF_TEXT_CRLF: every naked LF gains CR.
12177        let attr_text_crlf: &[u8] = b"*.txt text eol=crlf";
12178        for cfg in [
12179            "[core]\n\tautocrlf = false\n\teol = lf\n",
12180            "[core]\n\tautocrlf = false\n\teol = crlf\n",
12181            "[core]\n\tautocrlf = true\n\teol = lf\n",
12182            "[core]\n\tautocrlf = input\n",
12183        ] {
12184            assert_eq!(
12185                smudge(cfg, Some(attr_text_crlf), LF),
12186                b"a\r\nb\r\nc\r\n",
12187                "text eol=crlf must add CR to naked LF (cfg={cfg:?})"
12188            );
12189            assert_eq!(
12190                smudge(cfg, Some(attr_text_crlf), CRLF_MIX_LF),
12191                b"a\r\nb\r\nc\r\n",
12192                "text eol=crlf must convert mixed content fully (cfg={cfg:?})"
12193            );
12194            assert_eq!(
12195                smudge(cfg, Some(attr_text_crlf), LF_MIX_CR),
12196                b"a\r\nb\rc\r\n",
12197                "text eol=crlf keeps the lone CR but adds CR to naked LF (cfg={cfg:?})"
12198            );
12199        }
12200
12201        // --- attr=text, no eol attr: CRLF_TEXT, resolved by text_eol_is_crlf().
12202        // autocrlf=true wins over core.eol=lf (the precedence fix).
12203        assert_eq!(
12204            smudge("[core]\n\tautocrlf = true\n\teol = lf\n", Some(b"*.txt text"), LF),
12205            b"a\r\nb\r\nc\r\n",
12206            "autocrlf=true must override core.eol=lf for plain text attr"
12207        );
12208        // autocrlf unset, core.eol=crlf => CRLF.
12209        assert_eq!(
12210            smudge("[core]\n\teol = crlf\n", Some(b"*.txt text"), LF),
12211            b"a\r\nb\r\nc\r\n",
12212            "core.eol=crlf adds CR to naked LF for plain text attr"
12213        );
12214        // autocrlf unset, core.eol=lf (and native LF on this host) => no CR.
12215        assert_eq!(
12216            smudge("[core]\n\teol = lf\n", Some(b"*.txt text"), LF),
12217            LF,
12218            "core.eol=lf leaves naked LF untouched on smudge"
12219        );
12220        // text + autocrlf=input => CRLF_TEXT_INPUT: no CR on smudge.
12221        assert_eq!(
12222            smudge("[core]\n\tautocrlf = input\n", Some(b"*.txt text"), LF),
12223            LF,
12224            "autocrlf=input overrides core.eol; no CR on smudge"
12225        );
12226
12227        // --- attr=text=auto (CRLF_AUTO_*): the safety guard DOES fire.
12228        // auto + autocrlf=true + naked-LF-only => convert.
12229        assert_eq!(
12230            smudge("[core]\n\tautocrlf = true\n", Some(b"*.txt text=auto"), LF),
12231            b"a\r\nb\r\nc\r\n",
12232            "text=auto converts a clean naked-LF file"
12233        );
12234        // auto + already has a CR/CRLF => leave untouched (irreversible guard).
12235        assert_eq!(
12236            smudge("[core]\n\tautocrlf = true\n", Some(b"*.txt text=auto"), CRLF_MIX_LF),
12237            CRLF_MIX_LF,
12238            "text=auto must not touch content that already has CRLF"
12239        );
12240        assert_eq!(
12241            smudge("[core]\n\tautocrlf = true\n", Some(b"*.txt text=auto"), LF_MIX_CR),
12242            LF_MIX_CR,
12243            "text=auto must not touch content that already has a lone CR"
12244        );
12245
12246        // --- no attr, autocrlf=true => CRLF_AUTO_CRLF (auto guard applies).
12247        assert_eq!(
12248            smudge("[core]\n\tautocrlf = true\n\teol = lf\n", None, LF),
12249            b"a\r\nb\r\nc\r\n",
12250            "autocrlf=true (no attr) converts clean naked-LF and overrides core.eol=lf"
12251        );
12252        // --- no attr, autocrlf=false => CRLF_BINARY: never convert.
12253        assert_eq!(
12254            smudge("[core]\n\teol = crlf\n", None, LF),
12255            LF,
12256            "no attr + autocrlf=false leaves content untouched even with core.eol=crlf"
12257        );
12258        // --- -text (CRLF_BINARY): never convert regardless of config.
12259        assert_eq!(
12260            smudge("[core]\n\tautocrlf = true\n", Some(b"*.txt -text"), LF),
12261            LF,
12262            "-text is binary: never convert"
12263        );
12264    }
12265
12266    /// Resolve attribute checks against an on-disk `.gitattributes` in `root`.
12267    fn attrs(root: &Path, path: &[u8]) -> Vec<AttributeCheck> {
12268        filter_attribute_checks(root, path).expect("test operation should succeed")
12269    }
12270
12271    #[test]
12272    fn standard_attribute_matcher_matches_per_path_lookup() {
12273        let root = temp_root();
12274        fs::create_dir_all(root.join(".git").join("info")).expect("test operation should succeed");
12275        fs::create_dir_all(root.join("src").join("nested")).expect("test operation should succeed");
12276        fs::write(root.join(".gitattributes"), b"*.rs diff=rust\n")
12277            .expect("test operation should succeed");
12278        fs::write(
12279            root.join("src").join(".gitattributes"),
12280            b"*.rs diff=python\n",
12281        )
12282        .expect("test operation should succeed");
12283        fs::write(
12284            root.join(".git").join("info").join("attributes"),
12285            b"src/nested/*.rs diff=java\n",
12286        )
12287        .expect("test operation should succeed");
12288
12289        let requested = vec![b"diff".to_vec()];
12290        let path = b"src/nested/file.rs";
12291        let per_path = standard_attributes_for_path(&root, path, &requested, false)
12292            .expect("test operation should succeed");
12293        let matcher = StandardAttributeMatcher::from_worktree_root(&root)
12294            .expect("test operation should succeed");
12295        assert_eq!(
12296            matcher.attributes_for_path(path, &requested, false),
12297            per_path
12298        );
12299
12300        fs::remove_dir_all(root).expect("test operation should succeed");
12301    }
12302
12303    #[test]
12304    fn filter_attribute_lookup_reads_only_path_chain() {
12305        let root = temp_root();
12306        fs::create_dir_all(root.join(".git").join("info")).expect("test operation should succeed");
12307        fs::create_dir_all(root.join("src").join("nested")).expect("test operation should succeed");
12308        fs::create_dir_all(root.join("sibling")).expect("test operation should succeed");
12309        fs::write(root.join(".gitattributes"), b"*.txt text\n")
12310            .expect("test operation should succeed");
12311        fs::write(root.join("src").join(".gitattributes"), b"*.txt -text\n")
12312            .expect("test operation should succeed");
12313        fs::write(
12314            root.join("sibling").join(".gitattributes"),
12315            b"*.txt eol=crlf\n",
12316        )
12317        .expect("test operation should succeed");
12318        fs::write(
12319            root.join(".git").join("info").join("attributes"),
12320            b"src/nested/*.txt eol=lf\n",
12321        )
12322        .expect("test operation should succeed");
12323
12324        let path = b"src/nested/file.txt";
12325        let full = standard_attributes_for_path(&root, path, &filter_attribute_names(), false)
12326            .expect("test operation should succeed");
12327        assert_eq!(filter_attribute_checks(&root, path).unwrap(), full);
12328
12329        fs::remove_dir_all(root).expect("test operation should succeed");
12330    }
12331
12332    #[test]
12333    fn crlf_to_lf_collapses_only_pairs() {
12334        assert_eq!(
12335            convert_crlf_to_lf_cow(Cow::Borrowed(b"a\r\nb\r\n")).as_ref(),
12336            b"a\nb\n"
12337        );
12338        // A lone CR (no following LF) is preserved.
12339        assert_eq!(
12340            convert_crlf_to_lf_cow(Cow::Borrowed(b"a\rb")).as_ref(),
12341            b"a\rb"
12342        );
12343        // An already-LF stream is unchanged.
12344        assert!(matches!(
12345            convert_crlf_to_lf_cow(Cow::Borrowed(b"a\nb\n")),
12346            Cow::Borrowed(_)
12347        ));
12348    }
12349
12350    #[test]
12351    fn lf_to_crlf_does_not_double_convert() {
12352        assert_eq!(convert_lf_to_crlf(b"a\nb\n"), b"a\r\nb\r\n");
12353        // Existing CRLF is left intact (no extra CR added).
12354        assert_eq!(convert_lf_to_crlf(b"a\r\nb\r\n"), b"a\r\nb\r\n");
12355    }
12356
12357    #[test]
12358    fn autocrlf_round_trip_clean_then_smudge() {
12359        // autocrlf=true: worktree CRLF -> blob LF on clean, blob LF -> worktree
12360        // CRLF on smudge.
12361        let config = config_from("[core]\n\tautocrlf = true\n");
12362        let checks: Vec<AttributeCheck> = Vec::new();
12363        let worktree = b"line1\r\nline2\r\n";
12364        let blob = apply_clean_filter_with_attributes(&config, &checks, b"file.txt", worktree)
12365            .expect("test operation should succeed");
12366        assert_eq!(blob, b"line1\nline2\n", "clean must normalize CRLF to LF");
12367        let restored = apply_smudge_filter_with_attributes(&config, &checks, b"file.txt", &blob)
12368            .expect("test operation should succeed");
12369        assert_eq!(
12370            restored, worktree,
12371            "smudge must restore CRLF from the LF blob"
12372        );
12373    }
12374
12375    #[test]
12376    fn conv_flags_from_config_matches_git_defaults() {
12377        // Unset core.safecrlf defaults to WARN (git's global_conv_flags_eol).
12378        assert_eq!(ConvFlags::from_config(&config_from("")), ConvFlags::Warn);
12379        assert_eq!(
12380            ConvFlags::from_config(&config_from("[core]\n\tsafecrlf = warn\n")),
12381            ConvFlags::Warn
12382        );
12383        assert_eq!(
12384            ConvFlags::from_config(&config_from("[core]\n\tsafecrlf = WARN\n")),
12385            ConvFlags::Warn
12386        );
12387        assert_eq!(
12388            ConvFlags::from_config(&config_from("[core]\n\tsafecrlf = true\n")),
12389            ConvFlags::Die
12390        );
12391        assert_eq!(
12392            ConvFlags::from_config(&config_from("[core]\n\tsafecrlf = false\n")),
12393            ConvFlags::Off
12394        );
12395    }
12396
12397    #[test]
12398    fn safecrlf_warn_does_not_change_clean_bytes() {
12399        // The warning is purely additive: byte output is identical whether
12400        // safecrlf is off or warn.
12401        let config = config_from("[core]\n\tautocrlf = true\n");
12402        let checks: Vec<AttributeCheck> = Vec::new();
12403        let worktree = b"a\nb\nc\n";
12404        let plain = apply_clean_filter_with_attributes(&config, &checks, b"f.txt", worktree)
12405            .expect("clean");
12406        let warned = apply_clean_filter_with_attributes_cow_safecrlf(
12407            &config,
12408            &checks,
12409            b"f.txt",
12410            worktree,
12411            ConvFlags::Warn,
12412            SafeCrlfIndexBlob::None,
12413        )
12414        .expect("clean with safecrlf")
12415        .into_owned();
12416        assert_eq!(plain, warned, "safecrlf must not alter the cleaned bytes");
12417    }
12418
12419    #[test]
12420    fn safecrlf_die_errors_on_lf_to_crlf_round_trip() {
12421        // autocrlf=true on a pure-LF file: checkout would add CRLF, so the
12422        // round-trip is irreversible and safecrlf=true dies (exit 128).
12423        let config = config_from("[core]\n\tautocrlf = true\n");
12424        let checks: Vec<AttributeCheck> = Vec::new();
12425        let err = apply_clean_filter_with_attributes_cow_safecrlf(
12426            &config,
12427            &checks,
12428            b"f.txt",
12429            b"a\nb\n",
12430            ConvFlags::Die,
12431            SafeCrlfIndexBlob::None,
12432        )
12433        .expect_err("die must error");
12434        assert!(matches!(err, GitError::Exit(128)));
12435    }
12436
12437    #[test]
12438    fn safecrlf_die_errors_on_crlf_to_lf_round_trip() {
12439        // autocrlf=input on a CRLF file: clean strips CRLF and checkout never
12440        // restores it, so safecrlf=true dies.
12441        let config = config_from("[core]\n\tautocrlf = input\n");
12442        let checks: Vec<AttributeCheck> = Vec::new();
12443        let err = apply_clean_filter_with_attributes_cow_safecrlf(
12444            &config,
12445            &checks,
12446            b"f.txt",
12447            b"a\r\nb\r\n",
12448            ConvFlags::Die,
12449            SafeCrlfIndexBlob::None,
12450        )
12451        .expect_err("die must error");
12452        assert!(matches!(err, GitError::Exit(128)));
12453    }
12454
12455    #[test]
12456    fn safecrlf_reversible_round_trip_does_not_warn_or_die() {
12457        // A CRLF file under autocrlf=true survives the round trip (clean to LF,
12458        // smudge back to CRLF), so even safecrlf=true is silent.
12459        let config = config_from("[core]\n\tautocrlf = true\n");
12460        let checks: Vec<AttributeCheck> = Vec::new();
12461        let out = apply_clean_filter_with_attributes_cow_safecrlf(
12462            &config,
12463            &checks,
12464            b"f.txt",
12465            b"a\r\nb\r\n",
12466            ConvFlags::Die,
12467            SafeCrlfIndexBlob::None,
12468        )
12469        .expect("reversible round trip must not die");
12470        assert_eq!(out.as_ref(), b"a\nb\n");
12471    }
12472
12473    #[test]
12474    fn safecrlf_binary_content_is_silent() {
12475        // autocrlf=true with NUL-containing (binary) content: no conversion and
12476        // no warning/die, mirroring git's early-return in crlf_to_git.
12477        let config = config_from("[core]\n\tautocrlf = true\n");
12478        let checks: Vec<AttributeCheck> = Vec::new();
12479        let body: &[u8] = b"a\nb\0c\n";
12480        let out = apply_clean_filter_with_attributes_cow_safecrlf(
12481            &config,
12482            &checks,
12483            b"f.bin",
12484            body,
12485            ConvFlags::Die,
12486            SafeCrlfIndexBlob::None,
12487        )
12488        .expect("binary content must not die");
12489        assert_eq!(out.as_ref(), body, "binary content is never converted");
12490    }
12491
12492    #[test]
12493    fn safecrlf_off_is_silent_even_on_irreversible_round_trip() {
12494        let config = config_from("[core]\n\tautocrlf = true\n");
12495        let checks: Vec<AttributeCheck> = Vec::new();
12496        let out = apply_clean_filter_with_attributes_cow_safecrlf(
12497            &config,
12498            &checks,
12499            b"f.txt",
12500            b"a\nb\n",
12501            ConvFlags::Off,
12502            SafeCrlfIndexBlob::None,
12503        )
12504        .expect("safecrlf=off never errors");
12505        // autocrlf=true does not convert on clean (only smudge), so bytes pass through.
12506        assert_eq!(out.as_ref(), b"a\nb\n");
12507    }
12508
12509    #[test]
12510    fn autocrlf_input_normalizes_on_clean_but_not_smudge() {
12511        // autocrlf=input: clean normalizes to LF, smudge leaves LF as-is.
12512        let config = config_from("[core]\n\tautocrlf = input\n");
12513        let checks: Vec<AttributeCheck> = Vec::new();
12514        let blob = apply_clean_filter_with_attributes(&config, &checks, b"file.txt", b"a\r\nb\r\n")
12515            .expect("test operation should succeed");
12516        assert_eq!(blob, b"a\nb\n");
12517        let smudged = apply_smudge_filter_with_attributes(&config, &checks, b"file.txt", &blob)
12518            .expect("test operation should succeed");
12519        assert_eq!(
12520            smudged, b"a\nb\n",
12521            "input mode must not add carriage returns"
12522        );
12523    }
12524
12525    #[test]
12526    fn eol_crlf_attribute_drives_conversion_without_config() {
12527        // No core.autocrlf; the `eol=crlf` attribute alone forces conversion.
12528        let config = config_from("");
12529        let checks = vec![AttributeCheck {
12530            attribute: b"eol".to_vec(),
12531            state: Some(AttributeState::Value(b"crlf".to_vec())),
12532        }];
12533        let blob = apply_clean_filter_with_attributes(&config, &checks, b"a.txt", b"x\r\ny\r\n")
12534            .expect("test operation should succeed");
12535        assert_eq!(blob, b"x\ny\n");
12536        let smudged = apply_smudge_filter_with_attributes(&config, &checks, b"a.txt", &blob)
12537            .expect("test operation should succeed");
12538        assert_eq!(smudged, b"x\r\ny\r\n");
12539    }
12540
12541    #[test]
12542    fn binary_attribute_disables_eol_conversion() {
12543        // `-text` (binary) must leave CRLF/NUL content untouched in both
12544        // directions even when autocrlf=true.
12545        let config = config_from("[core]\n\tautocrlf = true\n");
12546        let checks = vec![AttributeCheck {
12547            attribute: b"text".to_vec(),
12548            state: Some(AttributeState::Unset),
12549        }];
12550        let content = b"\x00\x01\r\n\x02\r\n".to_vec();
12551        let blob = apply_clean_filter_with_attributes(&config, &checks, b"data.bin", &content)
12552            .expect("test operation should succeed");
12553        assert_eq!(blob, content, "binary file must not be CRLF-normalized");
12554        let smudged = apply_smudge_filter_with_attributes(&config, &checks, b"data.bin", &blob)
12555            .expect("test operation should succeed");
12556        assert_eq!(
12557            smudged, content,
12558            "binary file must not gain carriage returns"
12559        );
12560    }
12561
12562    #[test]
12563    fn autocrlf_auto_skips_binary_looking_content() {
12564        // text=auto (via autocrlf) must not convert content that contains NUL.
12565        let config = config_from("[core]\n\tautocrlf = true\n");
12566        let checks: Vec<AttributeCheck> = Vec::new();
12567        let content = b"a\r\n\x00b\r\n".to_vec();
12568        let blob = apply_clean_filter_with_attributes(&config, &checks, b"f", &content)
12569            .expect("test operation should succeed");
12570        assert_eq!(blob, content, "binary-looking content stays untouched");
12571    }
12572
12573    #[test]
12574    fn autocrlf_via_add_and_checkout_round_trips() {
12575        // End-to-end: a CRLF worktree file is stored as an LF blob by the
12576        // filtered add path, and restored as CRLF by the filtered checkout.
12577        let root = temp_root();
12578        let git_dir = root.join(".git");
12579        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
12580        let config = config_from("[core]\n\tautocrlf = true\n");
12581
12582        fs::write(root.join("crlf.txt"), b"alpha\r\nbeta\r\n")
12583            .expect("test operation should succeed");
12584        add_paths_to_index_filtered(
12585            &root,
12586            &git_dir,
12587            ObjectFormat::Sha1,
12588            &[PathBuf::from("crlf.txt")],
12589            &config,
12590        )
12591        .expect("test operation should succeed");
12592
12593        // The stored blob must be LF-normalized.
12594        let index = read_index(&git_dir);
12595        let entry = index_entry_for(&index, b"crlf.txt");
12596        let odb = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
12597        let blob = odb
12598            .read_object(&entry.oid)
12599            .expect("test operation should succeed");
12600        assert_eq!(blob.body, b"alpha\nbeta\n");
12601
12602        // Commit and point HEAD at it, then re-checkout with smudge filtering.
12603        let tree = write_tree_from_index(&git_dir, ObjectFormat::Sha1)
12604            .expect("test operation should succeed");
12605        let mut body = Vec::new();
12606        body.extend_from_slice(format!("tree {tree}\n").as_bytes());
12607        body.extend_from_slice(b"author T <t@e> 0 +0000\ncommitter T <t@e> 0 +0000\n\nm\n");
12608        let odb = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
12609        let commit = odb
12610            .write_object(EncodedObject::new(ObjectType::Commit, body))
12611            .expect("test operation should succeed");
12612        let refs = FileRefStore::new(&git_dir, ObjectFormat::Sha1);
12613        let mut tx = refs.transaction();
12614        tx.update(RefUpdate {
12615            name: "HEAD".into(),
12616            expected: None,
12617            new: RefTarget::Direct(commit),
12618            reflog: None,
12619        });
12620        tx.commit().expect("test operation should succeed");
12621
12622        // Make the worktree match the committed (LF) blob so the tree is clean
12623        // for checkout; `short_status`/`worktree_entries` compare by content
12624        // hash and are not filter-aware. Checkout will then smudge it to CRLF.
12625        fs::write(root.join("crlf.txt"), b"alpha\nbeta\n").expect("test operation should succeed");
12626        checkout_detached_filtered(
12627            &root,
12628            &git_dir,
12629            ObjectFormat::Sha1,
12630            &commit,
12631            b"T <t@e> 0 +0000".to_vec(),
12632            b"co".to_vec(),
12633            &config,
12634        )
12635        .expect("test operation should succeed");
12636        assert_eq!(
12637            fs::read(root.join("crlf.txt")).expect("test operation should succeed"),
12638            b"alpha\r\nbeta\r\n",
12639            "checkout must restore CRLF line endings"
12640        );
12641        fs::remove_dir_all(root).expect("test operation should succeed");
12642    }
12643
12644    #[test]
12645    fn driver_filter_clean_and_smudge_transform_both_directions() {
12646        // filter=case: clean upper-cases (worktree -> blob), smudge lower-cases
12647        // (blob -> worktree).
12648        let config =
12649            config_from("[filter \"case\"]\n\tclean = tr a-z A-Z\n\tsmudge = tr A-Z a-z\n");
12650        let checks = vec![AttributeCheck {
12651            attribute: b"filter".to_vec(),
12652            state: Some(AttributeState::Value(b"case".to_vec())),
12653        }];
12654        let blob = apply_clean_filter_with_attributes(&config, &checks, b"f.txt", b"Hello World")
12655            .expect("test operation should succeed");
12656        assert_eq!(blob, b"HELLO WORLD", "clean driver must upper-case");
12657        let worktree =
12658            apply_smudge_filter_with_attributes(&config, &checks, b"f.txt", b"HELLO WORLD")
12659                .expect("test operation should succeed");
12660        assert_eq!(worktree, b"hello world", "smudge driver must lower-case");
12661    }
12662
12663    #[test]
12664    fn driver_filter_resolved_from_gitattributes_file() {
12665        // The filter name is read from a real `.gitattributes`, the commands from
12666        // config; exercises the public worktree-rooted entry points.
12667        let root = temp_root();
12668        let git_dir = root.join(".git");
12669        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
12670        fs::write(root.join(".gitattributes"), b"*.dat filter=rot\n")
12671            .expect("test operation should succeed");
12672        let config =
12673            config_from("[filter \"rot\"]\n\tclean = sed s/a/b/g\n\tsmudge = sed s/b/a/g\n");
12674        // Clean reads attributes from the live worktree `.gitattributes`.
12675        let blob = apply_clean_filter(&root, &git_dir, &config, b"x.dat", b"banana")
12676            .expect("test operation should succeed");
12677        assert_eq!(blob, b"bbnbnb");
12678        // Smudge reads attributes from the index (the worktree file may not
12679        // exist yet during checkout), so stage `.gitattributes` first.
12680        add_paths_to_index(
12681            &root,
12682            &git_dir,
12683            ObjectFormat::Sha1,
12684            &[PathBuf::from(".gitattributes")],
12685        )
12686        .expect("test operation should succeed");
12687        let smudged = apply_smudge_filter(
12688            &root,
12689            &git_dir,
12690            ObjectFormat::Sha1,
12691            &config,
12692            b"x.dat",
12693            &blob,
12694        )
12695        .expect("test operation should succeed");
12696        // sed s/b/a/g is not a perfect inverse, but verifies the smudge command
12697        // ran on the blob bytes.
12698        assert_eq!(smudged, b"aanana");
12699        fs::remove_dir_all(root).expect("test operation should succeed");
12700    }
12701
12702    #[test]
12703    fn required_filter_failure_is_fatal() {
12704        // A required filter whose command fails must surface an error.
12705        let config = config_from("[filter \"boom\"]\n\tclean = false\n\trequired = true\n");
12706        let checks = vec![AttributeCheck {
12707            attribute: b"filter".to_vec(),
12708            state: Some(AttributeState::Value(b"boom".to_vec())),
12709        }];
12710        let err = apply_clean_filter_with_attributes(&config, &checks, b"f", b"data")
12711            .expect_err("required filter failure must error");
12712        assert!(matches!(err, GitError::Command(_)), "got {err:?}");
12713    }
12714
12715    #[test]
12716    fn required_filter_missing_command_is_fatal() {
12717        // required=true but no clean command for this direction is also fatal.
12718        let config = config_from("[filter \"need\"]\n\tsmudge = cat\n\trequired = true\n");
12719        let checks = vec![AttributeCheck {
12720            attribute: b"filter".to_vec(),
12721            state: Some(AttributeState::Value(b"need".to_vec())),
12722        }];
12723        let err = apply_clean_filter_with_attributes(&config, &checks, b"f", b"data")
12724            .expect_err("required filter without a clean command must error");
12725        assert!(matches!(err, GitError::Command(_)), "got {err:?}");
12726    }
12727
12728    #[test]
12729    fn non_required_filter_failure_passes_through() {
12730        // A non-required filter that fails must pass the content through
12731        // unchanged rather than erroring.
12732        let config = config_from("[filter \"opt\"]\n\tclean = false\n");
12733        let checks = vec![AttributeCheck {
12734            attribute: b"filter".to_vec(),
12735            state: Some(AttributeState::Value(b"opt".to_vec())),
12736        }];
12737        let out = apply_clean_filter_with_attributes(&config, &checks, b"f", b"keepme")
12738            .expect("test operation should succeed");
12739        assert_eq!(
12740            out, b"keepme",
12741            "optional filter failure passes content through"
12742        );
12743    }
12744
12745    #[test]
12746    fn filter_with_no_command_is_noop() {
12747        // filter=name with no configured commands and not required is ignored.
12748        let config = config_from("");
12749        let checks = vec![AttributeCheck {
12750            attribute: b"filter".to_vec(),
12751            state: Some(AttributeState::Value(b"ghost".to_vec())),
12752        }];
12753        let out = apply_clean_filter_with_attributes(&config, &checks, b"f", b"unchanged")
12754            .expect("test operation should succeed");
12755        assert_eq!(out, b"unchanged");
12756    }
12757
12758    #[test]
12759    fn driver_and_eol_compose_on_clean_and_smudge() {
12760        // filter=case + autocrlf=true: clean runs the driver then CRLF->LF;
12761        // smudge runs LF->CRLF then the driver.
12762        let config = config_from(
12763            "[core]\n\tautocrlf = true\n[filter \"case\"]\n\tclean = tr a-z A-Z\n\tsmudge = tr A-Z a-z\n",
12764        );
12765        let checks = vec![
12766            AttributeCheck {
12767                attribute: b"filter".to_vec(),
12768                state: Some(AttributeState::Value(b"case".to_vec())),
12769            },
12770            AttributeCheck {
12771                attribute: b"text".to_vec(),
12772                state: Some(AttributeState::Set),
12773            },
12774        ];
12775        let blob = apply_clean_filter_with_attributes(&config, &checks, b"f.txt", b"ab\r\ncd\r\n")
12776            .expect("test operation should succeed");
12777        assert_eq!(blob, b"AB\nCD\n", "clean: upper-case then CRLF->LF");
12778        let worktree = apply_smudge_filter_with_attributes(&config, &checks, b"f.txt", &blob)
12779            .expect("test operation should succeed");
12780        assert_eq!(
12781            worktree, b"ab\r\ncd\r\n",
12782            "smudge: LF->CRLF then lower-case"
12783        );
12784    }
12785
12786    #[test]
12787    fn attrs_helper_reads_filter_from_disk() {
12788        let root = temp_root();
12789        fs::write(root.join(".gitattributes"), b"*.txt text\n*.bin -text\n")
12790            .expect("test operation should succeed");
12791        let text = attrs(&root, b"a.txt");
12792        assert!(
12793            text.iter()
12794                .any(|c| c.attribute == b"text" && c.state == Some(AttributeState::Set))
12795        );
12796        let bin = attrs(&root, b"a.bin");
12797        assert!(
12798            bin.iter()
12799                .any(|c| c.attribute == b"text" && c.state == Some(AttributeState::Unset))
12800        );
12801        fs::remove_dir_all(root).expect("test operation should succeed");
12802    }
12803
12804    /// Builds a stat cache holding a single stage-0 entry whose size+mtime match
12805    /// `file`'s real metadata, with the index-file mtime placed strictly after
12806    /// the entry mtime so the entry reads as non-racy by default. The entry's oid
12807    /// is `oid` and its mode is `mode`.
12808    fn stat_cache_for(file: &Path, oid: ObjectId, mode: u32) -> (IndexStatCache, IndexEntry) {
12809        let metadata = fs::metadata(file).expect("test operation should succeed");
12810        let mut entry = index_entry_from_metadata(b"f.txt".to_vec(), oid, &metadata);
12811        entry.mode = mode;
12812        let index_mtime = Some((u64::from(entry.mtime_seconds) + 10, 0));
12813        let mut entries = HashMap::new();
12814        entries.insert(entry.path.as_bytes().to_vec(), entry.clone());
12815        (
12816            IndexStatCache {
12817                entries,
12818                index_mtime,
12819            },
12820            entry,
12821        )
12822    }
12823
12824    #[test]
12825    fn reuse_tracked_entry_only_reuses_clean_non_racy_match() {
12826        let root = temp_root();
12827        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
12828        let file = root.join("f.txt");
12829        let metadata = fs::metadata(&file).expect("test operation should succeed");
12830        let real_mode = file_mode(&metadata);
12831        let oid = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec())
12832            .object_id(ObjectFormat::Sha1)
12833            .expect("test operation should succeed");
12834
12835        // Clean, non-racy, matching stat + mode -> reuse the cached oid.
12836        let (cache, _) = stat_cache_for(&file, oid, real_mode);
12837        let reused = cache.reuse_tracked_entry(b"f.txt", &metadata);
12838        assert_eq!(
12839            reused,
12840            Some(TrackedEntry {
12841                mode: real_mode,
12842                oid,
12843            }),
12844            "a clean non-racy stat+mode match must reuse the staged oid"
12845        );
12846
12847        // No stage-0 entry for the path -> must hash.
12848        assert_eq!(
12849            cache.reuse_tracked_entry(b"other.txt", &metadata),
12850            None,
12851            "a path with no cached entry must fall through to hashing"
12852        );
12853
12854        // Size differs from the file -> must hash.
12855        let (mut size_cache, mut shrunk) = stat_cache_for(&file, oid, real_mode);
12856        shrunk.size = shrunk.size.saturating_sub(1);
12857        size_cache.entries.insert(shrunk.path.to_vec(), shrunk);
12858        assert_eq!(
12859            size_cache.reuse_tracked_entry(b"f.txt", &metadata),
12860            None,
12861            "a size mismatch must fall through to hashing"
12862        );
12863
12864        // Mode differs (e.g. a chmod that did not move mtime) -> must hash.
12865        let (mode_cache, _) = stat_cache_for(&file, oid, 0o100755);
12866        assert_eq!(
12867            mode_cache.reuse_tracked_entry(b"f.txt", &metadata),
12868            None,
12869            "a mode mismatch must fall through to hashing"
12870        );
12871
12872        // Racily clean (index mtime not strictly after the entry mtime) -> hash.
12873        let (mut racy_cache, entry) = stat_cache_for(&file, oid, real_mode);
12874        racy_cache.index_mtime = Some((
12875            u64::from(entry.mtime_seconds),
12876            u64::from(entry.mtime_nanoseconds),
12877        ));
12878        assert_eq!(
12879            racy_cache.reuse_tracked_entry(b"f.txt", &metadata),
12880            None,
12881            "a racily-clean entry must always be re-hashed"
12882        );
12883
12884        // Unknown index mtime is treated as racy -> hash.
12885        let (mut unknown_cache, _) = stat_cache_for(
12886            &file,
12887            EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec())
12888                .object_id(ObjectFormat::Sha1)
12889                .expect("test operation should succeed"),
12890            real_mode,
12891        );
12892        unknown_cache.index_mtime = None;
12893        assert_eq!(
12894            unknown_cache.reuse_tracked_entry(b"f.txt", &metadata),
12895            None,
12896            "an unknown index mtime must be treated conservatively as racy"
12897        );
12898
12899        fs::remove_dir_all(root).expect("test operation should succeed");
12900    }
12901
12902    #[test]
12903    fn index_stat_probe_cache_serves_many_paths_from_one_index_parse() {
12904        let root = temp_root();
12905        let git_dir = root.join(".git");
12906        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
12907        fs::write(root.join("a.txt"), b"alpha\n").expect("test operation should succeed");
12908        fs::write(root.join("b.txt"), b"bravo\n").expect("test operation should succeed");
12909        build_commit(&root, &git_dir, &["a.txt", "b.txt"]);
12910
12911        let cache = IndexStatProbeCache::from_repository_index(&git_dir, ObjectFormat::Sha1)
12912            .expect("probe cache");
12913        assert_eq!(cache.len(), 2);
12914        assert!(cache.contains_git_path(b"a.txt"));
12915        assert!(cache.contains_git_path(b"b.txt"));
12916        let a = cache.probe_for_git_path(b"a.txt").expect("a probe");
12917        let b = cache.probe_for_git_path(b"b.txt").expect("b probe");
12918        assert_eq!(a.entry().path, b"a.txt");
12919        assert_eq!(b.entry().path, b"b.txt");
12920        assert_eq!(a.index_mtime(), cache.index_mtime());
12921        assert_eq!(b.index_mtime(), cache.index_mtime());
12922        assert!(
12923            cache.probe_for_git_path(b"missing.txt").is_none(),
12924            "missing paths should not allocate probes"
12925        );
12926
12927        let one_shot =
12928            IndexStatProbe::from_repository_index(&git_dir, ObjectFormat::Sha1, b"a.txt")
12929                .expect("legacy one-shot probe")
12930                .expect("a probe");
12931        assert_eq!(one_shot.entry().path, b"a.txt");
12932        assert_eq!(one_shot.index_mtime(), cache.index_mtime());
12933
12934        fs::remove_dir_all(root).expect("test operation should succeed");
12935    }
12936
12937    #[test]
12938    fn short_status_detects_same_length_content_change() {
12939        let root = temp_root();
12940        let git_dir = root.join(".git");
12941        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
12942        fs::write(root.join("f.txt"), b"aaaa\n").expect("test operation should succeed");
12943        build_commit(&root, &git_dir, &["f.txt"]);
12944        // Overwrite with the SAME byte length but different content. Right after
12945        // staging the entry is racily clean (index mtime >= entry mtime), so the
12946        // stat shortcut must not be trusted and the change must surface as M.
12947        fs::write(root.join("f.txt"), b"bbbb\n").expect("test operation should succeed");
12948        let status = short_status(&root, &git_dir, ObjectFormat::Sha1)
12949            .expect("test operation should succeed");
12950        assert_eq!(
12951            status
12952                .iter()
12953                .map(ShortStatusEntry::line)
12954                .collect::<Vec<_>>(),
12955            vec![" M f.txt"],
12956            "a same-length content change must be reported modified"
12957        );
12958        fs::remove_dir_all(root).expect("test operation should succeed");
12959    }
12960
12961    #[test]
12962    fn short_status_clean_after_byte_identical_rewrite() {
12963        let root = temp_root();
12964        let git_dir = root.join(".git");
12965        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
12966        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
12967        build_commit(&root, &git_dir, &["f.txt"]);
12968        // Rewrite with byte-identical content; the mtime moves so the stat
12969        // shortcut declines to reuse and the fallback hash proves it clean.
12970        std::thread::sleep(std::time::Duration::from_millis(20));
12971        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
12972        let status = short_status(&root, &git_dir, ObjectFormat::Sha1)
12973            .expect("test operation should succeed");
12974        assert!(
12975            status.is_empty(),
12976            "a byte-identical rewrite must be clean via the fallback hash, got {status:?}"
12977        );
12978        fs::remove_dir_all(root).expect("test operation should succeed");
12979    }
12980
12981    #[test]
12982    fn short_status_trusts_stat_cache_and_skips_rehash() {
12983        let root = temp_root();
12984        let git_dir = root.join(".git");
12985        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
12986        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
12987        build_commit(&root, &git_dir, &["f.txt"]);
12988
12989        // Plant a BOGUS oid in the stage-0 entry while preserving its size+mtime,
12990        // so a real re-hash of the (unchanged) worktree file would NOT match it.
12991        let index_path = repository_index_path(&git_dir);
12992        let mut index = read_index(&git_dir);
12993        let bogus = ObjectId::from_hex(ObjectFormat::Sha1, &"0".repeat(40))
12994            .expect("test operation should succeed");
12995        let real_oid = index_entry_for(&index, b"f.txt").oid;
12996        assert_ne!(
12997            real_oid, bogus,
12998            "fixture oid should differ from the bogus oid"
12999        );
13000        index
13001            .entries
13002            .iter_mut()
13003            .find(|entry| entry.path == b"f.txt")
13004            .expect("test operation should succeed")
13005            .oid = bogus.clone();
13006        fs::write(
13007            &index_path,
13008            index
13009                .write(ObjectFormat::Sha1)
13010                .expect("test operation should succeed"),
13011        )
13012        .expect("test operation should succeed");
13013
13014        // Make the index file STRICTLY newer than the entry mtime (non-racy) by
13015        // waiting past one-second filesystem granularity and rewriting it, so the
13016        // racy-clean guard does not force a re-hash.
13017        std::thread::sleep(std::time::Duration::from_millis(1100));
13018        fs::write(
13019            &index_path,
13020            fs::read(&index_path).expect("test operation should succeed"),
13021        )
13022        .expect("test operation should succeed");
13023
13024        // The file is unchanged on disk, so a trusted stat reuses the bogus index
13025        // oid for the worktree entry: worktree-oid == index-oid == bogus, so the
13026        // WORKTREE column is clean. Had status re-hashed the file, the real oid
13027        // would differ from the bogus index oid and the worktree column would be
13028        // 'M'. (The index-vs-HEAD column is 'M' because we corrupted the index
13029        // oid away from HEAD; that is expected and not what this test asserts.)
13030        let status = short_status(&root, &git_dir, ObjectFormat::Sha1)
13031            .expect("test operation should succeed");
13032        let entry = status
13033            .iter()
13034            .find(|entry| entry.path == b"f.txt")
13035            .expect("f.txt should appear (its index oid now differs from HEAD)");
13036        assert_eq!(
13037            entry.worktree, b' ',
13038            "non-racy stat match must trust the cached oid (no re-hash); worktree column was {}",
13039            entry.worktree as char
13040        );
13041        assert_eq!(
13042            entry.index_oid.as_ref(),
13043            Some(&bogus),
13044            "the worktree entry must have reused the planted bogus index oid, not the real hash"
13045        );
13046
13047        fs::remove_dir_all(root).expect("test operation should succeed");
13048    }
13049
13050    #[test]
13051    fn worktree_entry_state_detects_same_size_content_change() {
13052        let root = temp_root();
13053        let git_dir = root.join(".git");
13054        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
13055        fs::write(root.join("f.txt"), b"aaaa\n").expect("test operation should succeed");
13056        build_commit(&root, &git_dir, &["f.txt"]);
13057        let index = read_index(&git_dir);
13058        let entry = index_entry_for(&index, b"f.txt").clone();
13059        let probe = IndexStatProbe::from_index_entry_and_index_path(
13060            entry.clone(),
13061            repository_index_path(&git_dir),
13062        );
13063
13064        fs::write(root.join("f.txt"), b"bbbb\n").expect("test operation should succeed");
13065        let state = worktree_entry_state(
13066            &root,
13067            &git_dir,
13068            ObjectFormat::Sha1,
13069            Path::new("f.txt"),
13070            &entry.oid,
13071            entry.mode,
13072            Some(&probe),
13073        )
13074        .expect("test operation should succeed");
13075        assert_eq!(state, WorktreeEntryState::Modified);
13076
13077        fs::remove_dir_all(root).expect("test operation should succeed");
13078    }
13079
13080    #[test]
13081    fn worktree_entry_state_reports_deleted_for_missing_and_parent_not_directory() {
13082        let root = temp_root();
13083        let git_dir = root.join(".git");
13084        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
13085        fs::create_dir_all(root.join("dir")).expect("test operation should succeed");
13086        fs::write(root.join("dir").join("f.txt"), b"hello\n")
13087            .expect("test operation should succeed");
13088        build_commit(&root, &git_dir, &["dir/f.txt"]);
13089        let index = read_index(&git_dir);
13090        let entry = index_entry_for(&index, b"dir/f.txt").clone();
13091
13092        fs::remove_file(root.join("dir").join("f.txt")).expect("test operation should succeed");
13093        let missing = worktree_entry_state_by_git_path(
13094            &root,
13095            &git_dir,
13096            ObjectFormat::Sha1,
13097            b"dir/f.txt",
13098            &entry.oid,
13099            entry.mode,
13100            None,
13101        )
13102        .expect("test operation should succeed");
13103        assert_eq!(missing, WorktreeEntryState::Deleted);
13104
13105        fs::remove_dir(root.join("dir")).expect("test operation should succeed");
13106        fs::write(root.join("dir"), b"not a directory").expect("test operation should succeed");
13107        let parent_not_directory = worktree_entry_state_by_git_path(
13108            &root,
13109            &git_dir,
13110            ObjectFormat::Sha1,
13111            b"dir/f.txt",
13112            &entry.oid,
13113            entry.mode,
13114            None,
13115        )
13116        .expect("test operation should succeed");
13117        assert_eq!(parent_not_directory, WorktreeEntryState::Deleted);
13118
13119        fs::remove_dir_all(root).expect("test operation should succeed");
13120    }
13121
13122    #[test]
13123    fn worktree_entry_state_trusts_clean_non_racy_probe() {
13124        let root = temp_root();
13125        let git_dir = root.join(".git");
13126        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
13127        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
13128        build_commit(&root, &git_dir, &["f.txt"]);
13129        let index_path = repository_index_path(&git_dir);
13130        let mut index = read_index(&git_dir);
13131        let bogus = ObjectId::from_hex(ObjectFormat::Sha1, &"1".repeat(40))
13132            .expect("test operation should succeed");
13133        index
13134            .entries
13135            .iter_mut()
13136            .find(|entry| entry.path == b"f.txt")
13137            .expect("test operation should succeed")
13138            .oid = bogus;
13139        fs::write(
13140            &index_path,
13141            index
13142                .write(ObjectFormat::Sha1)
13143                .expect("test operation should succeed"),
13144        )
13145        .expect("test operation should succeed");
13146        std::thread::sleep(std::time::Duration::from_millis(1100));
13147        fs::write(
13148            &index_path,
13149            fs::read(&index_path).expect("test operation should succeed"),
13150        )
13151        .expect("test operation should succeed");
13152        let index = read_index(&git_dir);
13153        let entry = index_entry_for(&index, b"f.txt").clone();
13154        let probe = IndexStatProbe::from_index_entry_and_index_path(
13155            entry.clone(),
13156            repository_index_path(&git_dir),
13157        );
13158
13159        let state = worktree_entry_state(
13160            &root,
13161            &git_dir,
13162            ObjectFormat::Sha1,
13163            Path::new("f.txt"),
13164            &entry.oid,
13165            entry.mode,
13166            Some(&probe),
13167        )
13168        .expect("test operation should succeed");
13169        assert_eq!(
13170            state,
13171            WorktreeEntryState::Clean,
13172            "a non-racy stat match must be enough to prove this path clean"
13173        );
13174
13175        fs::remove_dir_all(root).expect("test operation should succeed");
13176    }
13177
13178    #[test]
13179    fn worktree_entry_state_rehashes_racy_probe() {
13180        let root = temp_root();
13181        let git_dir = root.join(".git");
13182        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
13183        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
13184        build_commit(&root, &git_dir, &["f.txt"]);
13185        let index = read_index(&git_dir);
13186        let mut entry = index_entry_for(&index, b"f.txt").clone();
13187        entry.oid = ObjectId::from_hex(ObjectFormat::Sha1, &"2".repeat(40))
13188            .expect("test operation should succeed");
13189        let probe = IndexStatProbe::from_index_entry(
13190            entry.clone(),
13191            Some((
13192                u64::from(entry.mtime_seconds),
13193                u64::from(entry.mtime_nanoseconds),
13194            )),
13195        );
13196
13197        let state = worktree_entry_state(
13198            &root,
13199            &git_dir,
13200            ObjectFormat::Sha1,
13201            Path::new("f.txt"),
13202            &entry.oid,
13203            entry.mode,
13204            Some(&probe),
13205        )
13206        .expect("test operation should succeed");
13207        assert_eq!(
13208            state,
13209            WorktreeEntryState::Modified,
13210            "a racily-clean stat match must fall through to hashing"
13211        );
13212
13213        fs::remove_dir_all(root).expect("test operation should succeed");
13214    }
13215
13216    #[cfg(unix)]
13217    #[test]
13218    fn worktree_entry_state_detects_chmod_only_change() {
13219        use std::os::unix::fs::PermissionsExt;
13220
13221        let root = temp_root();
13222        let git_dir = root.join(".git");
13223        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
13224        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
13225        build_commit(&root, &git_dir, &["f.txt"]);
13226        let index = read_index(&git_dir);
13227        let entry = index_entry_for(&index, b"f.txt").clone();
13228
13229        let file = root.join("f.txt");
13230        let mut permissions = fs::metadata(&file)
13231            .expect("test operation should succeed")
13232            .permissions();
13233        permissions.set_mode(permissions.mode() | 0o111);
13234        fs::set_permissions(&file, permissions).expect("test operation should succeed");
13235        let state = worktree_entry_state(
13236            &root,
13237            &git_dir,
13238            ObjectFormat::Sha1,
13239            Path::new("f.txt"),
13240            &entry.oid,
13241            entry.mode,
13242            None,
13243        )
13244        .expect("test operation should succeed");
13245        assert_eq!(state, WorktreeEntryState::Modified);
13246
13247        fs::remove_dir_all(root).expect("test operation should succeed");
13248    }
13249
13250    #[cfg(unix)]
13251    #[test]
13252    fn worktree_entry_state_detects_symlink_target_change() {
13253        use std::os::unix::fs::symlink;
13254
13255        let root = temp_root();
13256        let git_dir = root.join(".git");
13257        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
13258        symlink("one", root.join("link")).expect("test operation should succeed");
13259        build_commit(&root, &git_dir, &["link"]);
13260        let index = read_index(&git_dir);
13261        let entry = index_entry_for(&index, b"link").clone();
13262
13263        fs::remove_file(root.join("link")).expect("test operation should succeed");
13264        symlink("two", root.join("link")).expect("test operation should succeed");
13265        let state = worktree_entry_state(
13266            &root,
13267            &git_dir,
13268            ObjectFormat::Sha1,
13269            Path::new("link"),
13270            &entry.oid,
13271            entry.mode,
13272            None,
13273        )
13274        .expect("test operation should succeed");
13275        assert_eq!(state, WorktreeEntryState::Modified);
13276
13277        fs::remove_dir_all(root).expect("test operation should succeed");
13278    }
13279
13280    #[test]
13281    fn worktree_entry_state_treats_present_unpopulated_gitlink_directory_as_clean() {
13282        let root = temp_root();
13283        let git_dir = root.join(".git");
13284        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
13285        fs::create_dir_all(root.join("submodule")).expect("test operation should succeed");
13286        let oid = ObjectId::from_hex(ObjectFormat::Sha1, &"3".repeat(40))
13287            .expect("test operation should succeed");
13288
13289        let state = worktree_entry_state(
13290            &root,
13291            &git_dir,
13292            ObjectFormat::Sha1,
13293            Path::new("submodule"),
13294            &oid,
13295            0o160000,
13296            None,
13297        )
13298        .expect("test operation should succeed");
13299        assert_eq!(state, WorktreeEntryState::Clean);
13300
13301        fs::remove_dir_all(root).expect("test operation should succeed");
13302    }
13303
13304    #[test]
13305    fn short_status_empty_on_unborn_repository() {
13306        let root = temp_root();
13307        let git_dir = root.join(".git");
13308        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
13309        fs::write(git_dir.join("HEAD"), "ref: refs/heads/main\n")
13310            .expect("test operation should succeed");
13311        let status = short_status(&root, &git_dir, ObjectFormat::Sha1)
13312            .expect("test operation should succeed");
13313        assert!(
13314            status.is_empty(),
13315            "an unborn repository with an empty worktree must be clean, got {status:?}"
13316        );
13317        fs::remove_dir_all(root).expect("test operation should succeed");
13318    }
13319
13320    #[test]
13321    fn untracked_paths_skips_embedded_git_internals() {
13322        let root = temp_root();
13323        let git_dir = root.join(".git");
13324        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
13325        fs::write(git_dir.join("HEAD"), "ref: refs/heads/main\n")
13326            .expect("test operation should succeed");
13327        let nested = root.join("not-a-submodule");
13328        fs::create_dir_all(nested.join(".git")).expect("test operation should succeed");
13329        fs::write(nested.join(".git/HEAD"), "ref: refs/heads/main\n")
13330            .expect("test operation should succeed");
13331        fs::write(nested.join("file.txt"), b"inside\n").expect("test operation should succeed");
13332        let paths = untracked_paths(&root, &git_dir, ObjectFormat::Sha1)
13333            .expect("test operation should succeed");
13334        assert!(
13335            paths.iter().any(|path| path == b"not-a-submodule/"),
13336            "embedded repository directory should be listed, got {paths:?}"
13337        );
13338        assert!(
13339            !paths
13340                .iter()
13341                .any(|path| path.starts_with(b"not-a-submodule/.git")),
13342            "embedded .git internals must not be listed, got {paths:?}"
13343        );
13344        fs::remove_dir_all(root).expect("test operation should succeed");
13345    }
13346
13347    #[cfg(unix)]
13348    #[test]
13349    fn untracked_paths_lists_symlink() {
13350        use std::os::unix::fs::symlink;
13351
13352        let root = temp_root();
13353        let git_dir = root.join(".git");
13354        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
13355        fs::write(git_dir.join("HEAD"), "ref: refs/heads/main\n")
13356            .expect("test operation should succeed");
13357        fs::write(root.join("target.txt"), b"target\n").expect("test operation should succeed");
13358        symlink(root.join("target.txt"), root.join("path1")).expect("create symlink");
13359        let paths = untracked_paths(&root, &git_dir, ObjectFormat::Sha1)
13360            .expect("test operation should succeed");
13361        assert!(
13362            paths.contains(&b"path1".to_vec()),
13363            "untracked symlink must be listed, got {paths:?}"
13364        );
13365        fs::remove_dir_all(root).expect("test operation should succeed");
13366    }
13367}