Skip to main content

sley_worktree/
lib.rs

1use sley_config::GitConfig;
2use sley_core::{
3    BString, GitError, MissingObjectContext, MissingObjectKind, ObjectFormat, ObjectId, RepoPath,
4    Result,
5};
6use sley_index::{CacheTree, Index, IndexEntry, Stage};
7use sley_object::{Commit, EncodedObject, ObjectType, Tree, TreeEntry, tree_entry_object_type};
8use sley_odb::{FileObjectDatabase, ObjectReader, ObjectWriter};
9use sley_refs::{FileRefStore, RefTarget, RefUpdate, ReflogEntry, branch_ref_name};
10use std::borrow::Cow;
11use std::cell::RefCell;
12use std::cmp::Ordering;
13use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
14use std::io::Write;
15use std::path::{Path, PathBuf};
16use std::process::{Command, Stdio};
17use std::sync::{Mutex, OnceLock};
18use std::time::UNIX_EPOCH;
19use std::{env, fs};
20
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub enum WorktreeStatus {
23    Clean,
24    Modified(RepoPath),
25    Added(RepoPath),
26    Deleted(RepoPath),
27    Untracked(RepoPath),
28}
29
30pub trait WorktreeScanner {
31    fn status(&self) -> Result<Vec<WorktreeStatus>>;
32}
33
34#[derive(Debug, Clone, PartialEq, Eq)]
35pub struct SparseCheckout {
36    pub patterns: Vec<Vec<u8>>,
37    pub sparse_index: bool,
38}
39
40/// Selects how the patterns in a [`SparseCheckout`] are interpreted when
41/// deciding which index paths are "in cone" (kept in the worktree).
42///
43/// * [`SparseCheckoutMode::Full`] interprets the patterns exactly like
44///   `.gitignore` lines (full pattern matching, including `*`, `?`, `**`,
45///   character classes, anchoring with a leading `/`, directory-only `/`
46///   suffixes, and `!` negation). A path is *included* when the last pattern
47///   that matches it is not negated. This mirrors upstream Git's non-cone
48///   `core.sparseCheckout` behaviour.
49/// * [`SparseCheckoutMode::Cone`] interprets the patterns as the restricted
50///   directory-prefix form Git emits for `core.sparseCheckoutCone`: a literal
51///   `/*` (top-level files), the recursive-parent guard `!/*/`, and anchored
52///   directory patterns such as `/dir/` (everything under `dir/`) plus the
53///   parent guards `/dir/*` and `!/dir/*/`. Matching is purely prefix based,
54///   so glob metacharacters are treated literally.
55/// * [`SparseCheckoutMode::Auto`] inspects the patterns and uses cone matching
56///   when every pattern fits the cone grammar above, otherwise full matching.
57#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
58pub enum SparseCheckoutMode {
59    #[default]
60    Auto,
61    Full,
62    Cone,
63}
64
65#[derive(Debug, Clone, PartialEq, Eq)]
66pub struct ApplySparseResult {
67    /// Paths whose worktree file was (re)materialized because they are in cone.
68    pub materialized: Vec<Vec<u8>>,
69    /// Paths that were taken out of the worktree because they are out of cone;
70    /// their index entry now has the skip-worktree bit set.
71    pub skipped: Vec<Vec<u8>>,
72    /// Out-of-cone paths whose worktree file was *not* up to date with the index
73    /// and was therefore left in place (and its skip-worktree bit left clear),
74    /// matching git's data-loss-avoiding behavior. The caller surfaces these as
75    /// git's "The following paths are not up to date …" warning. Sorted by path.
76    pub not_up_to_date: Vec<Vec<u8>>,
77}
78
79#[derive(Debug, Clone, PartialEq, Eq)]
80pub struct UpdateIndexResult {
81    pub entries: usize,
82    pub updated: Vec<ObjectId>,
83}
84
85#[derive(Debug, Clone, PartialEq, Eq)]
86pub struct CacheInfoEntry {
87    pub mode: u32,
88    pub oid: ObjectId,
89    pub path: Vec<u8>,
90    pub stage: u16,
91}
92
93#[derive(Debug, Clone, PartialEq, Eq)]
94pub enum IndexInfoRecord {
95    Add(CacheInfoEntry),
96    Remove { path: Vec<u8> },
97}
98
99#[derive(Debug, Clone, Copy, PartialEq, Eq)]
100pub struct UpdateIndexOptions {
101    pub add: bool,
102    pub remove: bool,
103    pub force_remove: bool,
104    pub chmod: Option<bool>,
105    pub info_only: bool,
106    pub ignore_skip_worktree_entries: bool,
107}
108
109/// A single positional path passed to `update-index`, together with the
110/// `--chmod` state that was active at the point the path was seen on the
111/// command line. git applies `--chmod=(+|-)x` as a stateful flag that affects
112/// every *subsequent* path until overridden, so `--chmod=+x A --chmod=-x B`
113/// flips A executable and B non-executable. Each path also reports its action
114/// (`add '<p>'`, `remove '<p>'`, `chmod (+|-)x '<p>'`) inline under `--verbose`,
115/// interleaved in command-line order — which is why the chmod state must travel
116/// with the path rather than as a single batch-wide flag.
117#[derive(Debug, Clone)]
118pub struct UpdateIndexPath {
119    pub path: PathBuf,
120    pub chmod: Option<bool>,
121}
122
123#[derive(Debug, Clone, PartialEq, Eq, Default)]
124pub struct WriteTreeOptions {
125    pub missing_ok: bool,
126    pub prefix: Option<Vec<u8>>,
127}
128
129#[derive(Debug, Clone, PartialEq, Eq)]
130pub struct ShortStatusEntry {
131    pub index: u8,
132    pub worktree: u8,
133    pub path: Vec<u8>,
134    pub head_mode: Option<u32>,
135    pub index_mode: Option<u32>,
136    pub worktree_mode: Option<u32>,
137    pub head_oid: Option<ObjectId>,
138    pub index_oid: Option<ObjectId>,
139    /// For a tracked gitlink (submodule) path: how the submodule's working
140    /// state differs from the staged gitlink. `None` for ordinary paths.
141    pub submodule: Option<SubmoduleStatus>,
142}
143
144/// Submodule-specific change detail for a status entry, mirroring upstream's
145/// `wt_status_change_data` trio: `new_submodule_commits` plus the
146/// `DIRTY_SUBMODULE_MODIFIED`/`DIRTY_SUBMODULE_UNTRACKED` dirty bits.
147#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
148pub struct SubmoduleStatus {
149    /// The submodule's checked-out HEAD differs from the staged gitlink oid.
150    pub new_commits: bool,
151    /// The submodule has staged or unstaged changes to tracked files.
152    pub modified_content: bool,
153    /// The submodule has untracked files.
154    pub untracked_content: bool,
155}
156
157impl SubmoduleStatus {
158    pub fn any(&self) -> bool {
159        self.new_commits || self.modified_content || self.untracked_content
160    }
161}
162
163/// Bit set in a submodule dirt mask when the submodule has staged or unstaged
164/// changes to tracked files (upstream `DIRTY_SUBMODULE_MODIFIED`).
165pub const DIRTY_SUBMODULE_MODIFIED: u8 = 1;
166/// Bit set in a submodule dirt mask when the submodule has untracked files
167/// (upstream `DIRTY_SUBMODULE_UNTRACKED`).
168pub const DIRTY_SUBMODULE_UNTRACKED: u8 = 2;
169
170/// Inspect the working state of the submodule whose worktree is at `sub_root`
171/// and report its dirt mask: [`DIRTY_SUBMODULE_MODIFIED`] for staged/unstaged
172/// changes to tracked files, [`DIRTY_SUBMODULE_UNTRACKED`] for untracked
173/// files. Returns 0 for a clean submodule — and for a directory that is not a
174/// populated repository at all (upstream treats an unpopulated gitlink as
175/// always unchanged). The native equivalent of upstream's
176/// `is_submodule_modified()` (which runs `git status --porcelain=2` inside the
177/// submodule and classifies `?` lines as untracked, everything else as
178/// modified).
179pub fn submodule_dirt(sub_root: &Path) -> u8 {
180    let Some(git_dir) = sley_diff_merge::gitlink_git_dir(sub_root) else {
181        return 0;
182    };
183    let Ok(config) = sley_config::read_repo_config(&git_dir, None) else {
184        return 0;
185    };
186    let Ok(format) = config.repository_object_format() else {
187        return 0;
188    };
189    let Ok(entries) = short_status_with_options(
190        sub_root,
191        &git_dir,
192        format,
193        ShortStatusOptions {
194            include_ignored: false,
195            untracked_mode: StatusUntrackedMode::Normal,
196        },
197    ) else {
198        return 0;
199    };
200    let mut dirt = 0;
201    for entry in entries {
202        if entry.index == b'?' && entry.worktree == b'?' {
203            dirt |= DIRTY_SUBMODULE_UNTRACKED;
204        } else {
205            dirt |= DIRTY_SUBMODULE_MODIFIED;
206        }
207    }
208    dirt
209}
210
211#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
212pub enum StatusUntrackedMode {
213    #[default]
214    All,
215    Normal,
216    None,
217}
218
219#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
220pub struct ShortStatusOptions {
221    pub include_ignored: bool,
222    pub untracked_mode: StatusUntrackedMode,
223}
224
225/// The worktree state of one tracked path relative to an expected index/tree
226/// entry.
227#[derive(Debug, Clone, Copy, PartialEq, Eq)]
228pub enum WorktreeEntryState {
229    /// The path exists in the worktree and matches the expected mode/object id.
230    Clean,
231    /// The path exists, but its type, mode, filtered content, symlink target, or
232    /// gitlink HEAD differs from the expected entry.
233    Modified,
234    /// The path, or one of its parents, is missing from the worktree.
235    Deleted,
236}
237
238#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
239pub struct AtomicMetadataWriteOptions {
240    pub fsync_file: bool,
241    pub fsync_dir: bool,
242}
243
244#[derive(Debug, Clone, PartialEq, Eq)]
245pub struct AtomicMetadataWriteResult {
246    pub path: PathBuf,
247    pub len: u64,
248    pub mtime: Option<(u64, u64)>,
249}
250
251/// Stage-0 index stat data that can prove a worktree path clean without
252/// re-reading and re-hashing it.
253///
254/// This is the public carrier for sley's racy-git shortcut. Callers that already
255/// parsed `.git/index` can build a probe from the matching [`IndexEntry`] and
256/// the index file's mtime, then pass it to [`worktree_entry_state`] or
257/// [`worktree_entry_state_by_git_path`]. The probe is trusted only when its path,
258/// mode, and object id match the expected entry and the cached stat is not
259/// racily clean; otherwise the helper falls back to the same content hashing
260/// path used by [`short_status_with_options`].
261#[derive(Debug, Clone, PartialEq, Eq)]
262pub struct IndexStatProbe {
263    entry: IndexEntry,
264    index_mtime: Option<(u64, u64)>,
265}
266
267/// Reusable stage-0 index stat probes for many worktree paths.
268///
269/// Prefer this over repeated [`IndexStatProbe::from_repository_index`] calls
270/// when an embedder needs to verify many paths. It parses `.git/index` once,
271/// records the index file mtime used for racy-git checks, and serves cheap
272/// per-path probes from memory.
273#[derive(Debug, Clone, PartialEq, Eq, Default)]
274pub struct IndexStatProbeCache {
275    entries: HashMap<Vec<u8>, IndexEntry>,
276    index_mtime: Option<(u64, u64)>,
277}
278
279impl IndexStatProbe {
280    /// Build a probe from a parsed stage-0 index entry and the index file's mtime
281    /// split as `(seconds, nanoseconds)`.
282    pub fn from_index_entry(entry: IndexEntry, index_mtime: Option<(u64, u64)>) -> Self {
283        Self { entry, index_mtime }
284    }
285
286    /// Build a probe from a parsed index entry and the path of the index file on
287    /// disk, using that file's mtime as the racy-clean reference timestamp.
288    pub fn from_index_entry_and_index_path(
289        entry: IndexEntry,
290        index_path: impl AsRef<Path>,
291    ) -> Self {
292        let index_mtime = fs::metadata(index_path.as_ref())
293            .ok()
294            .and_then(|metadata| file_mtime_parts(&metadata));
295        Self { entry, index_mtime }
296    }
297
298    /// Read this repository's index and return a probe for `git_path` when a
299    /// stage-0 entry exists.
300    ///
301    /// For repeated lookups prefer [`IndexStatProbeCache::from_repository_index`]
302    /// and [`IndexStatProbeCache::probe_for_git_path`]. This one-shot helper
303    /// keeps a small process-local cache for back-to-back calls against an
304    /// unchanged index, but the explicit cache makes ownership and invalidation
305    /// clearer for high-volume embedders.
306    pub fn from_repository_index(
307        git_dir: impl AsRef<Path>,
308        format: ObjectFormat,
309        git_path: &[u8],
310    ) -> Result<Option<Self>> {
311        let index_path = repository_index_path(git_dir);
312        cached_repository_index_stat_probe(&index_path, format, git_path)
313    }
314
315    /// The parsed index entry this probe was built from.
316    pub fn entry(&self) -> &IndexEntry {
317        &self.entry
318    }
319
320    /// The index file mtime used as the racy-clean reference timestamp.
321    pub fn index_mtime(&self) -> Option<(u64, u64)> {
322        self.index_mtime
323    }
324
325    fn stat_cache_for(
326        &self,
327        git_path: &[u8],
328        expected_oid: &ObjectId,
329        expected_mode: u32,
330    ) -> Option<IndexStatCache> {
331        if index_entry_stage(&self.entry) != 0
332            || self.entry.path.as_bytes() != git_path
333            || self.entry.oid != *expected_oid
334            || self.entry.mode != expected_mode
335        {
336            return None;
337        }
338        let mut entries = HashMap::new();
339        entries.insert(git_path.to_vec(), self.entry.clone());
340        Some(IndexStatCache {
341            entries,
342            index_mtime: self.index_mtime,
343        })
344    }
345}
346
347impl IndexStatProbeCache {
348    /// Build a reusable probe cache from an already parsed index and index-file
349    /// mtime.
350    pub fn from_index(index: &Index, index_mtime: Option<(u64, u64)>) -> Self {
351        Self {
352            entries: stage0_index_entries(index),
353            index_mtime,
354        }
355    }
356
357    /// Read this repository's index once and build reusable stat probes.
358    ///
359    /// A missing index returns an empty cache, matching the one-shot helper's
360    /// `Ok(None)` result for every path.
361    pub fn from_repository_index(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Result<Self> {
362        let index_path = repository_index_path(git_dir);
363        read_index_stat_probe_cache(&index_path, format)
364    }
365
366    /// Return a per-path probe for a stage-0 entry, if present.
367    pub fn probe_for_git_path(&self, git_path: &[u8]) -> Option<IndexStatProbe> {
368        self.entries
369            .get(git_path)
370            .cloned()
371            .map(|entry| IndexStatProbe {
372                entry,
373                index_mtime: self.index_mtime,
374            })
375    }
376
377    /// Whether this cache has a stage-0 entry for `git_path`.
378    pub fn contains_git_path(&self, git_path: &[u8]) -> bool {
379        self.entries.contains_key(git_path)
380    }
381
382    /// Number of stage-0 entries in the cache.
383    pub fn len(&self) -> usize {
384        self.entries.len()
385    }
386
387    /// Whether the cache has no stage-0 entries.
388    pub fn is_empty(&self) -> bool {
389        self.entries.is_empty()
390    }
391
392    /// The index file mtime used as the racy-clean reference timestamp.
393    pub fn index_mtime(&self) -> Option<(u64, u64)> {
394        self.index_mtime
395    }
396}
397
398#[derive(Clone)]
399struct CachedRepositoryIndexStatProbes {
400    index_path: PathBuf,
401    format: ObjectFormat,
402    len: u64,
403    mtime: Option<(u64, u64)>,
404    probes: IndexStatProbeCache,
405}
406
407static REPOSITORY_INDEX_STAT_PROBES: OnceLock<Mutex<Option<CachedRepositoryIndexStatProbes>>> =
408    OnceLock::new();
409
410fn cached_repository_index_stat_probe(
411    index_path: &Path,
412    format: ObjectFormat,
413    git_path: &[u8],
414) -> Result<Option<IndexStatProbe>> {
415    let metadata = match fs::metadata(index_path) {
416        Ok(metadata) => metadata,
417        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
418            if let Some(cache) = REPOSITORY_INDEX_STAT_PROBES.get()
419                && let Ok(mut guard) = cache.lock()
420            {
421                *guard = None;
422            }
423            return Ok(None);
424        }
425        Err(err) => return Err(err.into()),
426    };
427    let len = metadata.len();
428    let mtime = file_mtime_parts(&metadata);
429    let cache = REPOSITORY_INDEX_STAT_PROBES.get_or_init(|| Mutex::new(None));
430    if let Ok(guard) = cache.lock()
431        && let Some(cached) = guard.as_ref()
432        && cached.index_path == index_path
433        && cached.format == format
434        && cached.len == len
435        && cached.mtime == mtime
436    {
437        return Ok(cached.probes.probe_for_git_path(git_path));
438    }
439
440    let probes = read_index_stat_probe_cache_with_metadata(index_path, format, mtime)?;
441    let probe = probes.probe_for_git_path(git_path);
442    if let Ok(mut guard) = cache.lock() {
443        *guard = Some(CachedRepositoryIndexStatProbes {
444            index_path: index_path.to_path_buf(),
445            format,
446            len,
447            mtime,
448            probes: probes.clone(),
449        });
450    }
451    Ok(probe)
452}
453
454fn read_index_stat_probe_cache(
455    index_path: &Path,
456    format: ObjectFormat,
457) -> Result<IndexStatProbeCache> {
458    let metadata = match fs::metadata(index_path) {
459        Ok(metadata) => metadata,
460        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
461            return Ok(IndexStatProbeCache::default());
462        }
463        Err(err) => return Err(err.into()),
464    };
465    read_index_stat_probe_cache_with_metadata(index_path, format, file_mtime_parts(&metadata))
466}
467
468fn read_index_stat_probe_cache_with_metadata(
469    index_path: &Path,
470    format: ObjectFormat,
471    index_mtime: Option<(u64, u64)>,
472) -> Result<IndexStatProbeCache> {
473    let bytes = fs::read(index_path)?;
474    let index = Index::parse(&bytes, format)?;
475    Ok(IndexStatProbeCache::from_index(&index, index_mtime))
476}
477
478fn stage0_index_entries(index: &Index) -> HashMap<Vec<u8>, IndexEntry> {
479    let mut entries = HashMap::new();
480    for entry in &index.entries {
481        if index_entry_stage(entry) == 0 {
482            entries.insert(entry.path.as_bytes().to_vec(), entry.clone());
483        }
484    }
485    entries
486}
487
488#[derive(Debug, Clone, PartialEq, Eq)]
489pub struct CheckoutResult {
490    pub branch: String,
491    pub oid: ObjectId,
492    pub files: usize,
493}
494
495#[derive(Debug, Clone, PartialEq, Eq)]
496pub struct RestoreResult {
497    pub restored: usize,
498}
499
500#[derive(Debug, Clone, PartialEq, Eq)]
501pub struct RemoveResult {
502    pub removed: Vec<Vec<u8>>,
503}
504
505#[derive(Debug, Clone, PartialEq, Eq)]
506pub struct MoveResult {
507    pub source: Vec<u8>,
508    pub destination: Vec<u8>,
509    pub skipped: bool,
510    pub fatal: Option<String>,
511    pub details: Vec<MoveDetail>,
512}
513
514#[derive(Debug, Clone, PartialEq, Eq)]
515pub struct MoveDetail {
516    pub source: Vec<u8>,
517    pub destination: Vec<u8>,
518    pub skipped: bool,
519}
520
521pub fn repository_index_path(git_dir: impl AsRef<Path>) -> PathBuf {
522    env::var_os("GIT_INDEX_FILE")
523        .map(PathBuf::from)
524        .unwrap_or_else(|| git_dir.as_ref().join("index"))
525}
526
527pub fn read_repository_index(
528    git_dir: impl AsRef<Path>,
529    format: ObjectFormat,
530) -> Result<Option<Index>> {
531    let index_path = repository_index_path(git_dir);
532    if !index_path.exists() {
533        return Ok(None);
534    }
535    Ok(Some(Index::parse(&fs::read(index_path)?, format)?))
536}
537
538/// Resolve the working-tree root for a repository identified by its git
539/// directory, returning `Ok(None)` for a bare repository.
540///
541/// This is the repository-intrinsic worktree resolution (it does *not* consult
542/// `GIT_WORK_TREE`/`GIT_DIR` or CLI overrides — those are the caller's job):
543///
544/// 0. if `core.bare` is true the repository is bare and `Ok(None)` is returned
545///    immediately — `core.bare` takes precedence, so a bare repo ignores
546///    `core.worktree` and the `.git`-parent fallback;
547/// 1. otherwise, a `core.worktree` setting in `<git_dir>/config` (absolute, or
548///    relative to the git directory), canonicalised;
549/// 2. otherwise, for a linked worktree (a git directory that has both a
550///    `commondir` and a `gitdir` administrative file), the directory containing
551///    the worktree's `.git` link, canonicalised;
552/// 3. otherwise, when the git directory is a `.git` directory, its parent (the
553///    ordinary non-bare layout) — returned verbatim, not canonicalised;
554/// 4. otherwise the repository is bare and `Ok(None)` is returned.
555///
556/// `Ok(None)` means specifically "bare" (case 0 or case 4). A [`GitError::Io`] is
557/// returned if a path that should exist cannot be canonicalised, and a
558/// [`GitError::InvalidPath`] if a `.git` directory has no parent (a malformed
559/// layout).
560pub fn worktree_root_for_git_dir(git_dir: &Path) -> Result<Option<PathBuf>> {
561    if let Ok(config) = sley_config::read_repo_config(git_dir, None) {
562        // A bare repository has no working tree, and `core.bare` takes precedence:
563        // a bare repo ignores `core.worktree`. Check it before any worktree
564        // resolution so a bare `.git`-named directory does not fall through to the
565        // "parent of .git" case below.
566        if config.get_bool("core", None, "bare") == Some(true) {
567            return Ok(None);
568        }
569        if let Some(worktree) = config.get("core", None, "worktree") {
570            let worktree = PathBuf::from(worktree);
571            let worktree = if worktree.is_absolute() {
572                worktree
573            } else {
574                git_dir.join(worktree)
575            };
576            return fs::canonicalize(worktree)
577                .map(Some)
578                .map_err(|err| GitError::Io(err.to_string()));
579        }
580    }
581    if git_dir.join("commondir").is_file() {
582        let gitdir_file = git_dir.join("gitdir");
583        if gitdir_file.is_file() {
584            let value = fs::read_to_string(&gitdir_file)?;
585            let worktree_git_file = resolve_worktree_admin_path(git_dir, value.trim());
586            if let Some(worktree) = worktree_git_file.parent() {
587                return fs::canonicalize(worktree)
588                    .map(Some)
589                    .map_err(|err| GitError::Io(err.to_string()));
590            }
591        }
592    }
593    if git_dir.file_name().and_then(|name| name.to_str()) != Some(".git") {
594        return Ok(None);
595    }
596    git_dir
597        .parent()
598        .map(Path::to_path_buf)
599        .map(Some)
600        .ok_or_else(|| GitError::InvalidPath("git dir has no parent worktree".into()))
601}
602
603/// Resolve a path read from a git-directory administrative file (e.g. the
604/// `gitdir` link of a linked worktree): absolute paths are kept as-is, relative
605/// paths are joined onto the administrative directory.
606fn resolve_worktree_admin_path(admin_dir: &Path, value: &str) -> PathBuf {
607    let path = PathBuf::from(value);
608    if path.is_absolute() {
609        path
610    } else {
611        admin_dir.join(path)
612    }
613}
614
615/// Whether the repository at `git_dir` is shallow — i.e. it has a `shallow`
616/// file recording grafted commit boundaries (`git clone --depth`).
617pub fn is_shallow_repository(git_dir: &Path) -> bool {
618    git_dir.join("shallow").exists()
619}
620
621#[derive(Debug, Clone, Copy, PartialEq, Eq)]
622pub struct RemoveOptions {
623    pub recursive: bool,
624    pub cached: bool,
625    pub force: bool,
626    pub dry_run: bool,
627    pub ignore_unmatch: bool,
628}
629
630#[derive(Debug, Clone, Copy, PartialEq, Eq)]
631pub struct MoveOptions {
632    pub force: bool,
633    pub dry_run: bool,
634    pub skip_errors: bool,
635}
636
637impl ShortStatusEntry {
638    pub fn line(&self) -> String {
639        format!(
640            "{}{} {}",
641            self.index as char,
642            self.worktree as char,
643            String::from_utf8_lossy(&self.path)
644        )
645    }
646}
647
648pub fn add_paths_to_index(
649    worktree_root: impl AsRef<Path>,
650    git_dir: impl AsRef<Path>,
651    format: ObjectFormat,
652    paths: &[PathBuf],
653) -> Result<UpdateIndexResult> {
654    update_index_paths(
655        worktree_root,
656        git_dir,
657        format,
658        paths,
659        UpdateIndexOptions {
660            add: true,
661            remove: false,
662            force_remove: false,
663            chmod: None,
664            info_only: false,
665            ignore_skip_worktree_entries: false,
666        },
667    )
668}
669
670pub fn update_index_paths(
671    worktree_root: impl AsRef<Path>,
672    git_dir: impl AsRef<Path>,
673    format: ObjectFormat,
674    paths: &[PathBuf],
675    options: UpdateIndexOptions,
676) -> Result<UpdateIndexResult> {
677    let ordered = ordered_paths_from_plain(paths, options.chmod);
678    update_index_paths_impl(
679        worktree_root.as_ref(),
680        git_dir.as_ref(),
681        format,
682        &ordered,
683        options,
684        None,
685        false,
686    )
687}
688
689fn ordered_paths_from_plain(paths: &[PathBuf], chmod: Option<bool>) -> Vec<UpdateIndexPath> {
690    paths
691        .iter()
692        .map(|path| UpdateIndexPath {
693            path: path.clone(),
694            chmod,
695        })
696        .collect()
697}
698
699/// Stage an ordered list of paths, each carrying its own `--chmod` state, and
700/// (under `verbose`) print the `add`/`remove`/`chmod` action lines inline in
701/// command-line order. This is the entry point `git update-index <path>...`
702/// uses so that `--chmod=+x A --chmod=-x B --verbose` produces the interleaved
703/// `add 'A'` / `chmod +x 'A'` / `add 'B'` / `chmod -x 'B'` output git emits.
704pub fn update_index_ordered_paths_filtered(
705    worktree_root: impl AsRef<Path>,
706    git_dir: impl AsRef<Path>,
707    format: ObjectFormat,
708    paths: &[UpdateIndexPath],
709    options: UpdateIndexOptions,
710    config: &GitConfig,
711    verbose: bool,
712) -> Result<UpdateIndexResult> {
713    update_index_paths_impl(
714        worktree_root.as_ref(),
715        git_dir.as_ref(),
716        format,
717        paths,
718        options,
719        Some(config),
720        verbose,
721    )
722}
723
724/// Like [`add_paths_to_index`], but runs the configured content filters
725/// (`core.autocrlf`/`text`/`eol` EOL conversion and `filter.<name>.clean`
726/// drivers) on each file's contents before hashing it into the object store.
727///
728/// `config` is the repository config used to resolve the filters; pass the
729/// parsed `<git_dir>/config` (the orchestrator typically already has this).
730pub fn add_paths_to_index_filtered(
731    worktree_root: impl AsRef<Path>,
732    git_dir: impl AsRef<Path>,
733    format: ObjectFormat,
734    paths: &[PathBuf],
735    config: &GitConfig,
736) -> Result<UpdateIndexResult> {
737    update_index_paths_filtered(
738        worktree_root,
739        git_dir,
740        format,
741        paths,
742        UpdateIndexOptions {
743            add: true,
744            remove: false,
745            force_remove: false,
746            chmod: None,
747            info_only: false,
748            ignore_skip_worktree_entries: false,
749        },
750        config,
751    )
752}
753
754/// Like [`update_index_paths`], but applies the clean-side content filters (see
755/// [`apply_clean_filter`]) to file contents before they are hashed/written.
756pub fn update_index_paths_filtered(
757    worktree_root: impl AsRef<Path>,
758    git_dir: impl AsRef<Path>,
759    format: ObjectFormat,
760    paths: &[PathBuf],
761    options: UpdateIndexOptions,
762    config: &GitConfig,
763) -> Result<UpdateIndexResult> {
764    let ordered = ordered_paths_from_plain(paths, options.chmod);
765    update_index_paths_impl(
766        worktree_root.as_ref(),
767        git_dir.as_ref(),
768        format,
769        &ordered,
770        options,
771        Some(config),
772        false,
773    )
774}
775
776fn update_index_paths_impl(
777    worktree_root: &Path,
778    git_dir: &Path,
779    format: ObjectFormat,
780    paths: &[UpdateIndexPath],
781    options: UpdateIndexOptions,
782    clean_config: Option<&GitConfig>,
783    verbose: bool,
784) -> Result<UpdateIndexResult> {
785    let index_path = repository_index_path(git_dir);
786    let mut index = if index_path.exists() {
787        Index::parse(&fs::read(&index_path)?, format)?
788    } else {
789        Index {
790            version: 2,
791            entries: Vec::new(),
792            extensions: Vec::new(),
793            checksum: None,
794        }
795    };
796    let odb = FileObjectDatabase::from_git_dir(git_dir, format);
797    // Build the `.gitattributes` matcher ONCE for the whole batch when clean
798    // filters are in play. `apply_clean_filter` rebuilds it from scratch on every
799    // call — and `AttributeMatcher::from_worktree_root` walks the entire worktree
800    // (a stat per file) to collect `.gitattributes`. Calling it per staged path
801    // made `add -u` of D dirty files in an N-file tree cost D*N stats (sley#27's
802    // dominant remaining term after the fsync fix: 10 dirty x 1000 files ~ 11k
803    // statx vs git's ~1k). Resolving attributes per path against the shared
804    // matcher is byte-identical to the per-call rebuild, just without the
805    // redundant tree walks.
806    let attribute_matcher = match clean_config {
807        Some(_) => Some(AttributeMatcher::from_worktree_root(worktree_root)?),
808        None => None,
809    };
810    let requested_filter_attrs = filter_attribute_names();
811    let mut updated = Vec::new();
812    let mut reports: Vec<String> = Vec::new();
813    for update_path in paths {
814        let path = &update_path.path;
815        let path_chmod = update_path.chmod;
816        let absolute = if path.is_absolute() {
817            path.clone()
818        } else {
819            worktree_root.join(path)
820        };
821        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
822            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
823        })?;
824        let git_path = git_path_bytes(relative)?;
825        if options.force_remove {
826            index.entries.retain(|existing| existing.path != git_path);
827            // git's update_one() reports `remove` for a --force-remove path.
828            reports.push(format!("remove '{}'", String::from_utf8_lossy(&git_path)));
829            continue;
830        }
831        if let Some(existing) = index
832            .entries
833            .iter()
834            .find(|existing| existing.path == git_path)
835            && index_entry_skip_worktree(existing)
836        {
837            if options.remove && !options.ignore_skip_worktree_entries {
838                index.entries.retain(|existing| existing.path != git_path);
839            }
840            continue;
841        }
842        // lstat (not stat): a symlink must be inspected as the link itself, never
843        // followed to its target. `Path::exists`/`fs::metadata` both stat through
844        // the link, which makes a symlink-to-directory look like a directory
845        // (fs::read then fails with "Is a directory") and a symlink-to-file get
846        // staged with the target's content + a regular-file mode. git stages a
847        // symlink as mode 120000 whose blob is the link target string, regardless
848        // of what (if anything) the target resolves to.
849        let symlink_metadata = match fs::symlink_metadata(&absolute) {
850            Ok(metadata) => Some(metadata),
851            Err(err) if err.kind() == std::io::ErrorKind::NotFound => None,
852            Err(err) => return Err(err.into()),
853        };
854        let Some(metadata) = symlink_metadata else {
855            if options.remove {
856                index.entries.retain(|existing| existing.path != git_path);
857                // git's update_one() unconditionally reports `add '<path>'`
858                // after process_path(), even when the missing file was removed
859                // from the index via the `--remove` (not --force-remove) path.
860                reports.push(format!("add '{}'", String::from_utf8_lossy(&git_path)));
861                continue;
862            }
863            print_update_index_path_error(&git_path, "does not exist and --remove not passed");
864            return Err(GitError::Exit(128));
865        };
866        if !options.add
867            && !index
868                .entries
869                .iter()
870                .any(|existing| existing.path == git_path)
871        {
872            print_update_index_path_error(
873                &git_path,
874                "cannot add to the index - missing --add option?",
875            );
876            return Err(GitError::Exit(128));
877        }
878        if metadata.is_dir() {
879            // A directory is stageable only as a gitlink: when it is an
880            // embedded repository with a commit checked out, git records a
881            // mode-160000 entry whose oid is that commit (no object is
882            // written). Otherwise it errors — with upstream's exact messages
883            // for the embedded-repo-without-commit and plain-directory cases
884            // (object-file.c index_path / builtin/update-index.c
885            // process_directory).
886            let display = String::from_utf8_lossy(&git_path).into_owned();
887            let has_dot_git = absolute.join(".git").exists();
888            let Some(head_oid) = sley_diff_merge::gitlink_head_oid(&absolute, format) else {
889                if has_dot_git {
890                    eprintln!("error: '{display}' does not have a commit checked out");
891                } else {
892                    eprintln!("error: {display}: is a directory - add files inside instead");
893                }
894                eprintln!("fatal: Unable to process path {display}");
895                return Err(GitError::Exit(128));
896            };
897            if path_chmod.is_some() {
898                eprintln!(
899                    "fatal: git update-index: cannot chmod {}x '{display}'",
900                    if path_chmod == Some(true) { '+' } else { '-' },
901                );
902                return Err(GitError::Exit(128));
903            }
904            let mut entry = index_entry_from_metadata(git_path.clone(), head_oid, &metadata);
905            entry.mode = 0o160000;
906            reports.push(format!("add '{display}'"));
907            index.entries.retain(|existing| existing.path != git_path);
908            index.entries.push(entry);
909            updated.push(head_oid);
910            continue;
911        }
912        let is_symlink = metadata.file_type().is_symlink();
913        let body = if is_symlink {
914            // The blob is the raw link target bytes; clean filters never apply to
915            // a symlink (git treats it as binary content, not a text path).
916            symlink_target_bytes(&absolute)?
917        } else {
918            let body = fs::read(&absolute)?;
919            match (clean_config, &attribute_matcher) {
920                (Some(config), Some(matcher)) => {
921                    // Identical to `apply_clean_filter`, but reuses the batch's
922                    // matcher instead of rebuilding it (and re-walking the tree)
923                    // for this path.
924                    let checks =
925                        matcher.attributes_for_path(&git_path, &requested_filter_attrs, false);
926                    apply_clean_filter_with_attributes(config, &checks, &git_path, &body)?
927                }
928                _ => body,
929            }
930        };
931        let object = EncodedObject::new(ObjectType::Blob, body);
932        let oid = if options.info_only {
933            object.object_id(format)?
934        } else {
935            odb.write_object(object)?
936        };
937        let mut entry = index_entry_from_metadata(git_path.clone(), oid, &metadata);
938        if is_symlink {
939            entry.mode = 0o120000;
940        }
941        // git's update_one() reports `add` for every staged path (whether the
942        // entry is new or an update), then chmod_path() reports the chmod after.
943        reports.push(format!("add '{}'", String::from_utf8_lossy(&git_path)));
944        if let Some(executable) = path_chmod {
945            // git's chmod_path() refuses to flip the executable bit on anything
946            // that is not a regular file (a symlink/gitlink has no such bit). It
947            // writes the blob first, then errors with this exact message and
948            // leaves the index untouched.
949            if is_symlink {
950                eprintln!(
951                    "fatal: git update-index: cannot chmod {}x '{}'",
952                    if executable { '+' } else { '-' },
953                    String::from_utf8_lossy(&git_path)
954                );
955                return Err(GitError::Exit(128));
956            }
957            entry.mode = if executable { 0o100755 } else { 0o100644 };
958            reports.push(format!(
959                "chmod {}x '{}'",
960                if executable { '+' } else { '-' },
961                String::from_utf8_lossy(&git_path)
962            ));
963        }
964        index.entries.retain(|existing| existing.path != git_path);
965        index.entries.push(entry);
966        updated.push(oid);
967    }
968    index
969        .entries
970        .sort_by(|left, right| left.path.cmp(&right.path));
971    normalize_index_version_for_extended_flags(&mut index);
972    index.extensions = index_extensions_without_cache_tree(&index.extensions);
973    fs::write(index_path, index.write(format)?)?;
974    if verbose {
975        let mut stdout = std::io::stdout().lock();
976        for line in &reports {
977            writeln!(stdout, "{line}")?;
978        }
979        stdout.flush()?;
980    }
981    Ok(UpdateIndexResult {
982        entries: index.entries.len(),
983        updated,
984    })
985}
986
987pub fn refresh_index_paths(
988    worktree_root: impl AsRef<Path>,
989    git_dir: impl AsRef<Path>,
990    format: ObjectFormat,
991    paths: &[PathBuf],
992    quiet: bool,
993    ignore_missing: bool,
994    really_refresh: bool,
995) -> Result<UpdateIndexResult> {
996    let worktree_root = worktree_root.as_ref();
997    let git_dir = git_dir.as_ref();
998    let index_path = repository_index_path(git_dir);
999    if !index_path.exists() {
1000        return Ok(UpdateIndexResult {
1001            entries: 0,
1002            updated: Vec::new(),
1003        });
1004    }
1005    let mut index = Index::parse(&fs::read(&index_path)?, format)?;
1006    // git's `update-index --refresh` trusts the cached stat: a stage-0 entry
1007    // whose size+mtime still match the worktree file (and is not racily clean) is
1008    // known unchanged, so its content is NOT re-read or re-hashed
1009    // (read-cache.c `refresh_cache_ent` → `ie_match_stat`). Without this shortcut
1010    // sley re-hashed every tracked file on every refresh — the 3.2x slowdown in
1011    // sley#27. We build the cache from the same parsed index + the index file's
1012    // own mtime (the racy-clean reference) so no extra parse is needed.
1013    let stat_cache = IndexStatCache::from_index(&index, &index_path);
1014    let selected_paths = paths
1015        .iter()
1016        .map(|path| {
1017            let absolute = if path.is_absolute() {
1018                path.clone()
1019            } else {
1020                worktree_root.join(path)
1021            };
1022            let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
1023                GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
1024            })?;
1025            git_path_bytes(relative)
1026        })
1027        .collect::<Result<Vec<_>>>()?;
1028    let selected_paths = selected_paths.into_iter().collect::<BTreeSet<_>>();
1029    let mut needs_update = false;
1030    for entry in &mut index.entries {
1031        if index_entry_stage(entry) != 0 {
1032            continue;
1033        }
1034        let selected_for_update =
1035            !selected_paths.is_empty() && selected_paths.contains(entry.path.as_bytes());
1036        if entry.flags & INDEX_FLAG_ASSUME_UNCHANGED != 0 {
1037            if !really_refresh {
1038                continue;
1039            }
1040            entry.flags &= !INDEX_FLAG_ASSUME_UNCHANGED;
1041        }
1042        let absolute = worktree_root.join(repo_path_to_os_path(entry.path.as_bytes())?);
1043        let Ok(metadata) = fs::metadata(&absolute) else {
1044            if ignore_missing {
1045                continue;
1046            }
1047            if !quiet {
1048                print_update_index_needs_update(entry.path.as_bytes());
1049            }
1050            needs_update = true;
1051            continue;
1052        };
1053        if !metadata.is_file() {
1054            if !quiet {
1055                print_update_index_needs_update(entry.path.as_bytes());
1056            }
1057            needs_update = true;
1058            continue;
1059        }
1060        // Stat shortcut: when the cached stat proves the file is unchanged since
1061        // it was staged, its content hashes to the cached oid by construction
1062        // (see `IndexStatCache`'s safety invariant). Skip the read+hash and just
1063        // refresh the stat fields from current metadata — byte-identical to the
1064        // clean arm below, since the oid stamped is the cached one and the
1065        // metadata is the same one that re-stamp would read.
1066        if let Some(tracked) = stat_cache.reuse_tracked_entry(entry.path.as_bytes(), &metadata) {
1067            *entry = index_entry_from_metadata(entry.path.clone(), tracked.oid, &metadata);
1068            continue;
1069        }
1070        let body = fs::read(&absolute)?;
1071        let object = EncodedObject::new(ObjectType::Blob, body);
1072        let oid = object.object_id(format)?;
1073        if oid != entry.oid || file_mode(&metadata) != entry.mode {
1074            if !quiet {
1075                print_update_index_needs_update(entry.path.as_bytes());
1076            }
1077            needs_update = true;
1078            if selected_for_update {
1079                *entry = index_entry_from_metadata(entry.path.clone(), oid, &metadata);
1080            }
1081            continue;
1082        }
1083        *entry = index_entry_from_metadata(entry.path.clone(), oid, &metadata);
1084    }
1085    fs::write(&index_path, index.write(format)?)?;
1086    if needs_update && !quiet {
1087        return Err(GitError::Exit(1));
1088    }
1089    Ok(UpdateIndexResult {
1090        entries: index.entries.len(),
1091        updated: Vec::new(),
1092    })
1093}
1094
1095pub fn update_index_again(
1096    worktree_root: impl AsRef<Path>,
1097    git_dir: impl AsRef<Path>,
1098    format: ObjectFormat,
1099    paths: &[PathBuf],
1100    options: UpdateIndexOptions,
1101) -> Result<UpdateIndexResult> {
1102    let worktree_root = worktree_root.as_ref();
1103    let git_dir = git_dir.as_ref();
1104    let index_path = repository_index_path(git_dir);
1105    if !index_path.exists() {
1106        return Ok(UpdateIndexResult {
1107            entries: 0,
1108            updated: Vec::new(),
1109        });
1110    }
1111    let index = Index::parse(&fs::read(&index_path)?, format)?;
1112    let db = FileObjectDatabase::from_git_dir(git_dir, format);
1113    let head_entries = head_tree_entries(git_dir, format, &db)?;
1114    let selected_paths = selected_git_paths(worktree_root, paths)?;
1115    let mut again_paths = Vec::new();
1116    for entry in &index.entries {
1117        if index_entry_stage(entry) != 0 {
1118            continue;
1119        }
1120        if !selected_paths.is_empty() && !git_path_selected(entry.path.as_bytes(), &selected_paths)
1121        {
1122            continue;
1123        }
1124        let differs_from_head = match head_entries.get(entry.path.as_bytes()) {
1125            Some(head_entry) => head_entry.oid != entry.oid || head_entry.mode != entry.mode,
1126            None => true,
1127        };
1128        if differs_from_head {
1129            again_paths.push(worktree_root.join(repo_path_to_os_path(entry.path.as_bytes())?));
1130        }
1131    }
1132    if again_paths.is_empty() {
1133        return Ok(UpdateIndexResult {
1134            entries: index.entries.len(),
1135            updated: Vec::new(),
1136        });
1137    }
1138    update_index_paths(worktree_root, git_dir, format, &again_paths, options)
1139}
1140
1141pub fn set_index_assume_unchanged_paths(
1142    worktree_root: impl AsRef<Path>,
1143    git_dir: impl AsRef<Path>,
1144    format: ObjectFormat,
1145    paths: &[PathBuf],
1146    assume_unchanged: bool,
1147) -> Result<UpdateIndexResult> {
1148    let worktree_root = worktree_root.as_ref();
1149    let git_dir = git_dir.as_ref();
1150    let index_path = repository_index_path(git_dir);
1151    let mut index = if index_path.exists() {
1152        Index::parse(&fs::read(&index_path)?, format)?
1153    } else {
1154        Index {
1155            version: 2,
1156            entries: Vec::new(),
1157            extensions: Vec::new(),
1158            checksum: None,
1159        }
1160    };
1161    let selected_paths = paths
1162        .iter()
1163        .map(|path| {
1164            let absolute = if path.is_absolute() {
1165                path.clone()
1166            } else {
1167                worktree_root.join(path)
1168            };
1169            let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
1170                GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
1171            })?;
1172            git_path_bytes(relative)
1173        })
1174        .collect::<Result<Vec<_>>>()?;
1175    for path in selected_paths {
1176        if let Some(entry) = index.entries.iter_mut().find(|entry| entry.path == path) {
1177            if assume_unchanged {
1178                entry.flags |= INDEX_FLAG_ASSUME_UNCHANGED;
1179            } else {
1180                entry.flags &= !INDEX_FLAG_ASSUME_UNCHANGED;
1181            }
1182        }
1183    }
1184    normalize_index_version_for_extended_flags(&mut index);
1185    fs::write(index_path, index.write(format)?)?;
1186    Ok(UpdateIndexResult {
1187        entries: index.entries.len(),
1188        updated: Vec::new(),
1189    })
1190}
1191
1192fn selected_git_paths(worktree_root: &Path, paths: &[PathBuf]) -> Result<BTreeSet<Vec<u8>>> {
1193    paths
1194        .iter()
1195        .map(|path| {
1196            let absolute = if path.is_absolute() {
1197                path.clone()
1198            } else {
1199                worktree_root.join(path)
1200            };
1201            let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
1202                GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
1203            })?;
1204            git_path_bytes(relative)
1205        })
1206        .collect()
1207}
1208
1209fn git_path_selected(path: &[u8], selected_paths: &BTreeSet<Vec<u8>>) -> bool {
1210    selected_paths
1211        .iter()
1212        .any(|selected| path == selected || index_entry_is_under_path(path, selected))
1213}
1214
1215pub fn set_index_skip_worktree_paths(
1216    worktree_root: impl AsRef<Path>,
1217    git_dir: impl AsRef<Path>,
1218    format: ObjectFormat,
1219    paths: &[PathBuf],
1220    skip_worktree: bool,
1221) -> Result<UpdateIndexResult> {
1222    let worktree_root = worktree_root.as_ref();
1223    let git_dir = git_dir.as_ref();
1224    let index_path = repository_index_path(git_dir);
1225    let mut index = if index_path.exists() {
1226        Index::parse(&fs::read(&index_path)?, format)?
1227    } else {
1228        Index {
1229            version: 2,
1230            entries: Vec::new(),
1231            extensions: Vec::new(),
1232            checksum: None,
1233        }
1234    };
1235    let selected_paths = paths
1236        .iter()
1237        .map(|path| {
1238            let absolute = if path.is_absolute() {
1239                path.clone()
1240            } else {
1241                worktree_root.join(path)
1242            };
1243            let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
1244                GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
1245            })?;
1246            git_path_bytes(relative)
1247        })
1248        .collect::<Result<Vec<_>>>()?;
1249    for path in selected_paths {
1250        if let Some(entry) = index.entries.iter_mut().find(|entry| entry.path == path) {
1251            if skip_worktree {
1252                entry.flags |= INDEX_FLAG_EXTENDED;
1253                entry.flags_extended |= INDEX_EXTENDED_FLAG_SKIP_WORKTREE;
1254            } else {
1255                entry.flags_extended &= !INDEX_EXTENDED_FLAG_SKIP_WORKTREE;
1256                if entry.flags_extended == 0 {
1257                    entry.flags &= !INDEX_FLAG_EXTENDED;
1258                }
1259            }
1260        }
1261    }
1262    normalize_index_version_for_extended_flags(&mut index);
1263    fs::write(index_path, index.write(format)?)?;
1264    Ok(UpdateIndexResult {
1265        entries: index.entries.len(),
1266        updated: Vec::new(),
1267    })
1268}
1269
1270pub fn set_index_fsmonitor_valid_paths(
1271    worktree_root: impl AsRef<Path>,
1272    git_dir: impl AsRef<Path>,
1273    format: ObjectFormat,
1274    paths: &[PathBuf],
1275    _fsmonitor_valid: bool,
1276) -> Result<UpdateIndexResult> {
1277    let worktree_root = worktree_root.as_ref();
1278    let git_dir = git_dir.as_ref();
1279    let index_path = repository_index_path(git_dir);
1280    let index = if index_path.exists() {
1281        Index::parse(&fs::read(&index_path)?, format)?
1282    } else {
1283        Index {
1284            version: 2,
1285            entries: Vec::new(),
1286            extensions: Vec::new(),
1287            checksum: None,
1288        }
1289    };
1290    let selected_paths = paths
1291        .iter()
1292        .map(|path| {
1293            let absolute = if path.is_absolute() {
1294                path.clone()
1295            } else {
1296                worktree_root.join(path)
1297            };
1298            let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
1299                GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
1300            })?;
1301            git_path_bytes(relative)
1302        })
1303        .collect::<Result<Vec<_>>>()?;
1304    for path in selected_paths {
1305        if !index.entries.iter().any(|entry| entry.path == path) {
1306            eprintln!(
1307                "fatal: Unable to mark file {}",
1308                String::from_utf8_lossy(&path)
1309            );
1310            return Err(GitError::Exit(128));
1311        }
1312    }
1313    Ok(UpdateIndexResult {
1314        entries: index.entries.len(),
1315        updated: Vec::new(),
1316    })
1317}
1318
1319pub fn set_index_version(
1320    git_dir: impl AsRef<Path>,
1321    format: ObjectFormat,
1322    version: u32,
1323    verbose: bool,
1324) -> Result<UpdateIndexResult> {
1325    if !matches!(version, 2..=4) {
1326        return Err(GitError::Unsupported(format!(
1327            "update-index currently supports --index-version 2, 3, or 4, got {version}"
1328        )));
1329    }
1330    let git_dir = git_dir.as_ref();
1331    let index_path = repository_index_path(git_dir);
1332    let mut index = if index_path.exists() {
1333        Index::parse(&fs::read(&index_path)?, format)?
1334    } else {
1335        Index {
1336            version: 2,
1337            entries: Vec::new(),
1338            extensions: Vec::new(),
1339            checksum: None,
1340        }
1341    };
1342    // git reports the transition unconditionally under --verbose, even when the
1343    // requested version equals the current one ("was 4, set to 4").
1344    let previous = index.version;
1345    if verbose {
1346        println!("index-version: was {previous}, set to {version}");
1347    }
1348    index.version = version;
1349    normalize_index_version_for_extended_flags(&mut index);
1350    fs::write(index_path, index.write(format)?)?;
1351    Ok(UpdateIndexResult {
1352        entries: index.entries.len(),
1353        updated: Vec::new(),
1354    })
1355}
1356
1357pub fn force_write_index(
1358    git_dir: impl AsRef<Path>,
1359    format: ObjectFormat,
1360) -> Result<UpdateIndexResult> {
1361    let git_dir = git_dir.as_ref();
1362    let index_path = repository_index_path(git_dir);
1363    let mut index = if index_path.exists() {
1364        Index::parse(&fs::read(&index_path)?, format)?
1365    } else {
1366        Index {
1367            version: 2,
1368            entries: Vec::new(),
1369            extensions: Vec::new(),
1370            checksum: None,
1371        }
1372    };
1373    normalize_index_version_for_extended_flags(&mut index);
1374    fs::write(index_path, index.write(format)?)?;
1375    Ok(UpdateIndexResult {
1376        entries: index.entries.len(),
1377        updated: Vec::new(),
1378    })
1379}
1380
1381fn index_extensions_without_cache_tree(extensions: &[u8]) -> Vec<u8> {
1382    let mut offset = 0;
1383    let mut filtered = Vec::new();
1384    while offset < extensions.len() {
1385        if extensions.len().saturating_sub(offset) < 8 {
1386            return Vec::new();
1387        }
1388        let signature = &extensions[offset..offset + 4];
1389        let size = u32::from_be_bytes([
1390            extensions[offset + 4],
1391            extensions[offset + 5],
1392            extensions[offset + 6],
1393            extensions[offset + 7],
1394        ]) as usize;
1395        let end = offset + 8 + size;
1396        if end > extensions.len() {
1397            return Vec::new();
1398        }
1399        if signature != b"TREE" {
1400            filtered.extend_from_slice(&extensions[offset..end]);
1401        }
1402        offset = end;
1403    }
1404    filtered
1405}
1406
1407pub fn update_index_cacheinfo(
1408    git_dir: impl AsRef<Path>,
1409    format: ObjectFormat,
1410    entries: &[CacheInfoEntry],
1411    add: bool,
1412    verbose: bool,
1413) -> Result<UpdateIndexResult> {
1414    let git_dir = git_dir.as_ref();
1415    let index_path = repository_index_path(git_dir);
1416    let mut index = if index_path.exists() {
1417        Index::parse(&fs::read(&index_path)?, format)?
1418    } else {
1419        Index {
1420            version: 2,
1421            entries: Vec::new(),
1422            extensions: Vec::new(),
1423            checksum: None,
1424        }
1425    };
1426    let mut updated = Vec::new();
1427    let mut reports: Vec<String> = Vec::new();
1428    for cacheinfo in entries {
1429        if !add
1430            && !index
1431                .entries
1432                .iter()
1433                .any(|existing| existing.path == cacheinfo.path)
1434        {
1435            let path = String::from_utf8_lossy(&cacheinfo.path);
1436            eprintln!("error: {path}: cannot add to the index - missing --add option?");
1437            eprintln!("fatal: git update-index: --cacheinfo cannot add {path}");
1438            return Err(GitError::Exit(128));
1439        }
1440        let flags = index_flags(cacheinfo.path.len(), cacheinfo.stage);
1441        let entry = IndexEntry {
1442            ctime_seconds: 0,
1443            ctime_nanoseconds: 0,
1444            mtime_seconds: 0,
1445            mtime_nanoseconds: 0,
1446            dev: 0,
1447            ino: 0,
1448            mode: cacheinfo.mode,
1449            uid: 0,
1450            gid: 0,
1451            size: 0,
1452            oid: cacheinfo.oid,
1453            flags,
1454            flags_extended: 0,
1455            path: BString::from(cacheinfo.path.as_slice()),
1456        };
1457        index.entries.retain(|existing| {
1458            existing.path != cacheinfo.path || index_entry_stage(existing) != cacheinfo.stage
1459        });
1460        index.entries.push(entry);
1461        updated.push(cacheinfo.oid);
1462        // git's add_cacheinfo() calls report("add '%s'") *after* the entry is
1463        // staged, regardless of whether the subsequent index write succeeds.
1464        reports.push(format!(
1465            "add '{}'",
1466            String::from_utf8_lossy(&cacheinfo.path)
1467        ));
1468    }
1469    index
1470        .entries
1471        .sort_by(|left, right| left.path.cmp(&right.path));
1472    // git refuses to write an index entry whose object id is the null oid:
1473    // do_write_index() emits `error: cache entry has null sha1: <path>` and
1474    // returns nonzero, leaving the on-disk index untouched. The verbose `add`
1475    // line has already been printed by then.
1476    let null_entry = index.entries.iter().find(|entry| entry.oid.is_null());
1477    if let Some(entry) = null_entry {
1478        if verbose {
1479            flush_update_index_reports(&reports)?;
1480        }
1481        eprintln!(
1482            "error: cache entry has null sha1: {}",
1483            String::from_utf8_lossy(&entry.path)
1484        );
1485        return Err(GitError::Exit(128));
1486    }
1487    fs::write(index_path, index.write(format)?)?;
1488    if verbose {
1489        flush_update_index_reports(&reports)?;
1490    }
1491    Ok(UpdateIndexResult {
1492        entries: index.entries.len(),
1493        updated,
1494    })
1495}
1496
1497fn flush_update_index_reports(reports: &[String]) -> Result<()> {
1498    let mut stdout = std::io::stdout().lock();
1499    for line in reports {
1500        writeln!(stdout, "{line}")?;
1501    }
1502    stdout.flush()?;
1503    Ok(())
1504}
1505
1506pub fn update_index_index_info(
1507    git_dir: impl AsRef<Path>,
1508    format: ObjectFormat,
1509    records: &[IndexInfoRecord],
1510) -> Result<UpdateIndexResult> {
1511    let git_dir = git_dir.as_ref();
1512    let index_path = repository_index_path(git_dir);
1513    let mut index = if index_path.exists() {
1514        Index::parse(&fs::read(&index_path)?, format)?
1515    } else {
1516        Index {
1517            version: 2,
1518            entries: Vec::new(),
1519            extensions: Vec::new(),
1520            checksum: None,
1521        }
1522    };
1523    let mut updated = Vec::new();
1524    for record in records {
1525        match record {
1526            IndexInfoRecord::Remove { path } => {
1527                index.entries.retain(|existing| existing.path != *path);
1528            }
1529            IndexInfoRecord::Add(cacheinfo) => {
1530                let flags = index_flags(cacheinfo.path.len(), cacheinfo.stage);
1531                let entry = IndexEntry {
1532                    ctime_seconds: 0,
1533                    ctime_nanoseconds: 0,
1534                    mtime_seconds: 0,
1535                    mtime_nanoseconds: 0,
1536                    dev: 0,
1537                    ino: 0,
1538                    mode: cacheinfo.mode,
1539                    uid: 0,
1540                    gid: 0,
1541                    size: 0,
1542                    oid: cacheinfo.oid,
1543                    flags,
1544                    flags_extended: 0,
1545                    path: BString::from(cacheinfo.path.as_slice()),
1546                };
1547                if cacheinfo.stage == 0 {
1548                    index
1549                        .entries
1550                        .retain(|existing| existing.path != cacheinfo.path);
1551                } else {
1552                    index.entries.retain(|existing| {
1553                        existing.path != cacheinfo.path
1554                            || index_entry_stage(existing) != cacheinfo.stage
1555                    });
1556                }
1557                index.entries.push(entry);
1558                updated.push(cacheinfo.oid);
1559            }
1560        }
1561    }
1562    index.entries.sort_by(|left, right| {
1563        left.path
1564            .cmp(&right.path)
1565            .then_with(|| index_entry_stage(left).cmp(&index_entry_stage(right)))
1566    });
1567    fs::write(index_path, index.write(format)?)?;
1568    Ok(UpdateIndexResult {
1569        entries: index.entries.len(),
1570        updated,
1571    })
1572}
1573
1574fn index_flags(path_len: usize, stage: u16) -> u16 {
1575    ((stage & 0x3) << 12) | ((path_len.min(0xfff) as u16) & 0x0fff)
1576}
1577
1578const INDEX_FLAG_ASSUME_UNCHANGED: u16 = 0x8000;
1579const INDEX_FLAG_EXTENDED: u16 = 0x4000;
1580const INDEX_EXTENDED_FLAG_SKIP_WORKTREE: u16 = 0x4000;
1581
1582fn normalize_index_version_for_extended_flags(index: &mut Index) {
1583    let has_extended_flags = index
1584        .entries
1585        .iter()
1586        .any(|entry| entry.flags & INDEX_FLAG_EXTENDED != 0 || entry.flags_extended != 0);
1587    if has_extended_flags && index.version < 3 {
1588        index.version = 3;
1589    } else if !has_extended_flags && index.version == 3 {
1590        index.version = 2;
1591    }
1592}
1593
1594fn index_entry_stage(entry: &IndexEntry) -> u16 {
1595    (entry.flags >> 12) & 0x3
1596}
1597
1598fn index_entry_skip_worktree(entry: &IndexEntry) -> bool {
1599    entry.flags & INDEX_FLAG_EXTENDED != 0
1600        && entry.flags_extended & INDEX_EXTENDED_FLAG_SKIP_WORKTREE != 0
1601}
1602
1603fn print_update_index_path_error(path: &[u8], message: &str) {
1604    let path = String::from_utf8_lossy(path);
1605    eprintln!("error: {path}: {message}");
1606    eprintln!("fatal: Unable to process path {path}");
1607}
1608
1609fn print_update_index_needs_update(path: &[u8]) {
1610    let path = String::from_utf8_lossy(path);
1611    println!("{path}: needs update");
1612}
1613
1614pub fn write_tree_from_index(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Result<ObjectId> {
1615    write_tree_from_index_with_options(git_dir, format, WriteTreeOptions::default())
1616}
1617
1618pub fn write_tree_from_index_with_options(
1619    git_dir: impl AsRef<Path>,
1620    format: ObjectFormat,
1621    options: WriteTreeOptions,
1622) -> Result<ObjectId> {
1623    let git_dir = git_dir.as_ref();
1624    let index_path = repository_index_path(git_dir);
1625    // A repository with no index file yet (fresh init, nothing staged) is an
1626    // empty index: `git write-tree` / `git commit --allow-empty` produce the
1627    // empty tree rather than erroring.
1628    let index = match fs::read(&index_path) {
1629        Ok(bytes) => Index::parse(&bytes, format)?,
1630        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Index {
1631            version: 2,
1632            entries: Vec::new(),
1633            extensions: Vec::new(),
1634            checksum: None,
1635        },
1636        Err(err) => return Err(err.into()),
1637    };
1638    let entries = write_tree_entries_for_prefix(&index.entries, options.prefix.as_deref())?;
1639    let mut root = TreeNode::default();
1640    let odb = FileObjectDatabase::from_git_dir(git_dir, format);
1641    if !options.missing_ok {
1642        let mut missing = false;
1643        for entry in &entries {
1644            // A gitlink's oid names a commit in the *submodule's* repository;
1645            // it is never expected to exist in this odb (upstream
1646            // update_one_entry: REF_OBJ check skips S_IFGITLINK entries).
1647            if entry.mode == 0o160000 {
1648                continue;
1649            }
1650            if !odb.contains(&entry.oid)? {
1651                eprintln!(
1652                    "error: invalid object {:o} {} for '{}'",
1653                    entry.mode,
1654                    entry.oid,
1655                    String::from_utf8_lossy(entry.path.as_bytes())
1656                );
1657                missing = true;
1658            }
1659        }
1660        if missing {
1661            eprintln!("fatal: git-write-tree: error building trees");
1662            return Err(GitError::Exit(128));
1663        }
1664    }
1665    for entry in &entries {
1666        root.insert(entry)?;
1667    }
1668    let mut odb = FileObjectDatabase::from_git_dir(git_dir, format);
1669    write_tree_node(&root, &mut odb)
1670}
1671
1672fn write_tree_entries_for_prefix(
1673    entries: &[IndexEntry],
1674    prefix: Option<&[u8]>,
1675) -> Result<Vec<IndexEntry>> {
1676    let Some(prefix) = prefix else {
1677        return Ok(entries.to_vec());
1678    };
1679    let trimmed_len = prefix
1680        .iter()
1681        .rposition(|byte| *byte != b'/')
1682        .map(|idx| idx + 1)
1683        .unwrap_or(0);
1684    let trimmed = &prefix[..trimmed_len];
1685    if trimmed.is_empty() {
1686        return Ok(entries.to_vec());
1687    }
1688    let mut prefixed = Vec::new();
1689    for entry in entries {
1690        let Some(remainder) = entry.path.as_bytes().strip_prefix(trimmed) else {
1691            continue;
1692        };
1693        let Some(stripped) = remainder.strip_prefix(b"/") else {
1694            continue;
1695        };
1696        if stripped.is_empty() {
1697            continue;
1698        }
1699        let mut entry = entry.clone();
1700        entry.path = BString::from(stripped);
1701        prefixed.push(entry);
1702    }
1703    if prefixed.is_empty() {
1704        eprintln!(
1705            "fatal: git-write-tree: prefix {} not found",
1706            String::from_utf8_lossy(prefix)
1707        );
1708        return Err(GitError::Exit(128));
1709    }
1710    Ok(prefixed)
1711}
1712
1713pub fn short_status(
1714    worktree_root: impl AsRef<Path>,
1715    git_dir: impl AsRef<Path>,
1716    format: ObjectFormat,
1717) -> Result<Vec<ShortStatusEntry>> {
1718    short_status_with_options(
1719        worktree_root,
1720        git_dir,
1721        format,
1722        ShortStatusOptions::default(),
1723    )
1724}
1725
1726/// Compare one expected tracked entry to the worktree path named by `path`.
1727///
1728/// `path` is repository-relative and uses the platform path representation. For
1729/// callers that already carry git's byte path form, use
1730/// [`worktree_entry_state_by_git_path`].
1731pub fn worktree_entry_state(
1732    worktree_root: impl AsRef<Path>,
1733    git_dir: impl AsRef<Path>,
1734    format: ObjectFormat,
1735    path: impl AsRef<Path>,
1736    expected_oid: &ObjectId,
1737    expected_mode: u32,
1738    index_probe: Option<&IndexStatProbe>,
1739) -> Result<WorktreeEntryState> {
1740    let path = path.as_ref();
1741    if path.is_absolute() {
1742        return Err(GitError::InvalidPath(format!(
1743            "worktree entry path {} is absolute",
1744            path.display()
1745        )));
1746    }
1747    let git_path = git_path_bytes(path)?;
1748    worktree_entry_state_by_git_path(
1749        worktree_root,
1750        git_dir,
1751        format,
1752        &git_path,
1753        expected_oid,
1754        expected_mode,
1755        index_probe,
1756    )
1757}
1758
1759/// Compare one expected tracked entry to the worktree path named by a
1760/// repository-relative git path (`/` separators, raw bytes).
1761///
1762/// The comparison uses the same clean-filter, symlink-target, gitlink, and
1763/// racy-clean stat shortcut rules as [`short_status_with_options`].
1764pub fn worktree_entry_state_by_git_path(
1765    worktree_root: impl AsRef<Path>,
1766    git_dir: impl AsRef<Path>,
1767    format: ObjectFormat,
1768    git_path: &[u8],
1769    expected_oid: &ObjectId,
1770    expected_mode: u32,
1771    index_probe: Option<&IndexStatProbe>,
1772) -> Result<WorktreeEntryState> {
1773    let worktree_root = worktree_root.as_ref();
1774    let git_dir = git_dir.as_ref();
1775    let stat_cache =
1776        index_probe.and_then(|probe| probe.stat_cache_for(git_path, expected_oid, expected_mode));
1777    let Some(worktree_entry) = worktree_entry_for_git_path(
1778        worktree_root,
1779        git_dir,
1780        format,
1781        git_path,
1782        expected_oid,
1783        expected_mode,
1784        stat_cache.as_ref(),
1785    )?
1786    else {
1787        return Ok(WorktreeEntryState::Deleted);
1788    };
1789    if worktree_entry.mode == expected_mode && worktree_entry.oid == *expected_oid {
1790        Ok(WorktreeEntryState::Clean)
1791    } else {
1792        Ok(WorktreeEntryState::Modified)
1793    }
1794}
1795
1796pub fn short_status_with_options(
1797    worktree_root: impl AsRef<Path>,
1798    git_dir: impl AsRef<Path>,
1799    format: ObjectFormat,
1800    options: ShortStatusOptions,
1801) -> Result<Vec<ShortStatusEntry>> {
1802    let worktree_root = worktree_root.as_ref();
1803    let git_dir = git_dir.as_ref();
1804    let db = FileObjectDatabase::from_git_dir(git_dir, format);
1805    if options.untracked_mode == StatusUntrackedMode::None && !options.include_ignored {
1806        let (index, stat_cache, head_matches_index) =
1807            read_index_with_stat_cache(git_dir, format, &db)?;
1808        return short_status_tracked_only(
1809            worktree_root,
1810            git_dir,
1811            format,
1812            &db,
1813            &index,
1814            &stat_cache,
1815            head_matches_index,
1816        );
1817    }
1818    // Parse the index once: the stat cache lets the worktree walk skip
1819    // re-hashing files whose stat proves they are unchanged since staging
1820    // (git's racy-git shortcut). When HEAD matches the index, the status
1821    // comparison can stream directly from the parsed index and avoid building a
1822    // second path-sorted copy of every tracked entry.
1823    let (parsed_index, stat_cache, head_matches_index) =
1824        read_index_with_stat_cache(git_dir, format, &db)?;
1825    if head_matches_index && !options.include_ignored {
1826        let mut ignores = IgnoreMatcher::from_worktree_base(worktree_root)?;
1827        let (worktree, submodule_dirt_map, tracked_presence) =
1828            status_worktree_entries_with_submodule_dirt(
1829                worktree_root,
1830                git_dir,
1831                format,
1832                &stat_cache,
1833                None,
1834                Some(&mut ignores),
1835            )?;
1836        let mut entries = Vec::new();
1837        collect_status_entries_parsed_index_head_matches(
1838            &parsed_index,
1839            &worktree,
1840            &tracked_presence,
1841            &submodule_dirt_map,
1842            options.untracked_mode,
1843            &mut entries,
1844        );
1845        let untracked_paths: Vec<Vec<u8>> = match options.untracked_mode {
1846            StatusUntrackedMode::All => worktree
1847                .keys()
1848                .filter(|path| !stat_cache.contains(path) && !ignores.is_ignored(path, false))
1849                .cloned()
1850                .collect(),
1851            StatusUntrackedMode::Normal => {
1852                normal_untracked_paths_from_worktree_stat_cache(&worktree, &stat_cache, &ignores)
1853            }
1854            StatusUntrackedMode::None => Vec::new(),
1855        };
1856        for path in untracked_paths {
1857            entries.push(ShortStatusEntry {
1858                index: b'?',
1859                worktree: b'?',
1860                path,
1861                head_mode: None,
1862                index_mode: None,
1863                worktree_mode: None,
1864                head_oid: None,
1865                index_oid: None,
1866                submodule: None,
1867            });
1868        }
1869        entries.sort_by(|left, right| {
1870            status_sort_category(left)
1871                .cmp(&status_sort_category(right))
1872                .then_with(|| left.path.cmp(&right.path))
1873        });
1874        return Ok(entries);
1875    }
1876    let index = index_entries_from_index(parsed_index);
1877    let head = if head_matches_index {
1878        None
1879    } else {
1880        Some(head_tree_entries(git_dir, format, &db)?)
1881    };
1882    let tracked_paths = if options.untracked_mode == StatusUntrackedMode::None {
1883        Some(index.keys().cloned().collect::<BTreeSet<_>>())
1884    } else {
1885        None
1886    };
1887    let mut ignores = IgnoreMatcher::from_worktree_base(worktree_root)?;
1888    let (worktree, submodule_dirt_map, tracked_presence) =
1889        status_worktree_entries_with_submodule_dirt(
1890            worktree_root,
1891            git_dir,
1892            format,
1893            &stat_cache,
1894            tracked_paths.as_ref(),
1895            Some(&mut ignores),
1896        )?;
1897    let mut entries = Vec::new();
1898    if head_matches_index {
1899        collect_status_entries_head_matches_index(
1900            &index,
1901            &worktree,
1902            &tracked_presence,
1903            &submodule_dirt_map,
1904            options.untracked_mode,
1905            &mut entries,
1906        );
1907    } else if let Some(head) = head.as_ref() {
1908        collect_status_entries_with_head(
1909            StatusComparisonInputs {
1910                head,
1911                index: &index,
1912                worktree: &worktree,
1913                tracked_presence: &tracked_presence,
1914                submodule_dirt_map: &submodule_dirt_map,
1915                ignores: &ignores,
1916            },
1917            options.untracked_mode,
1918            &mut entries,
1919        );
1920    }
1921    if options.include_ignored {
1922        for path in ignored_untracked_paths(worktree_root, git_dir, &index, &ignores, true)? {
1923            entries.push(ShortStatusEntry {
1924                index: b'!',
1925                worktree: b'!',
1926                path,
1927                head_mode: None,
1928                index_mode: None,
1929                worktree_mode: None,
1930                head_oid: None,
1931                index_oid: None,
1932                submodule: None,
1933            });
1934        }
1935    }
1936    let untracked_paths: Vec<Vec<u8>> = match options.untracked_mode {
1937        StatusUntrackedMode::All => worktree
1938            .keys()
1939            .filter(|path| !index.contains_key(*path) && !ignores.is_ignored(path, false))
1940            .cloned()
1941            .collect(),
1942        StatusUntrackedMode::Normal => {
1943            normal_untracked_paths_from_worktree(&worktree, &index, &ignores)
1944        }
1945        StatusUntrackedMode::None => Vec::new(),
1946    };
1947    for path in untracked_paths {
1948        entries.push(ShortStatusEntry {
1949            index: b'?',
1950            worktree: b'?',
1951            path,
1952            head_mode: None,
1953            index_mode: None,
1954            worktree_mode: None,
1955            head_oid: None,
1956            index_oid: None,
1957            submodule: None,
1958        });
1959    }
1960    entries.sort_by(|left, right| {
1961        status_sort_category(left)
1962            .cmp(&status_sort_category(right))
1963            .then_with(|| left.path.cmp(&right.path))
1964    });
1965    Ok(entries)
1966}
1967
1968fn collect_status_entries_head_matches_index(
1969    index: &BTreeMap<Vec<u8>, TrackedEntry>,
1970    worktree: &BTreeMap<Vec<u8>, TrackedEntry>,
1971    tracked_presence: &HashSet<Vec<u8>>,
1972    submodule_dirt_map: &BTreeMap<Vec<u8>, u8>,
1973    untracked_mode: StatusUntrackedMode,
1974    entries: &mut Vec<ShortStatusEntry>,
1975) {
1976    for (path, index_entry) in index {
1977        let worktree_entry = worktree.get(path);
1978        let worktree_present =
1979            worktree_entry.is_some() || tracked_presence.contains(path.as_slice());
1980        let submodule = status_submodule_from_entries(
1981            path,
1982            index_entry,
1983            worktree_entry,
1984            submodule_dirt_map,
1985            untracked_mode,
1986        );
1987        let worktree_code = match worktree_entry {
1988            None if !worktree_present => b'D',
1989            Some(worktree_entry) if worktree_entry != index_entry => b'M',
1990            _ if submodule.is_some_and(|sub| sub.any()) => b'M',
1991            _ => b' ',
1992        };
1993        if worktree_code != b' ' {
1994            entries.push(ShortStatusEntry {
1995                index: b' ',
1996                worktree: worktree_code,
1997                path: path.clone(),
1998                head_mode: Some(index_entry.mode),
1999                index_mode: Some(index_entry.mode),
2000                worktree_mode: status_worktree_mode(Some(index_entry), worktree_entry, worktree_present),
2001                head_oid: Some(index_entry.oid),
2002                index_oid: Some(index_entry.oid),
2003                submodule: submodule.filter(|sub| sub.any()),
2004            });
2005        }
2006    }
2007}
2008
2009fn collect_status_entries_parsed_index_head_matches(
2010    index: &Index,
2011    worktree: &BTreeMap<Vec<u8>, TrackedEntry>,
2012    tracked_presence: &HashSet<Vec<u8>>,
2013    submodule_dirt_map: &BTreeMap<Vec<u8>, u8>,
2014    untracked_mode: StatusUntrackedMode,
2015    entries: &mut Vec<ShortStatusEntry>,
2016) {
2017    for entry in index
2018        .entries
2019        .iter()
2020        .filter(|entry| entry.stage() == Stage::Normal)
2021    {
2022        let path = entry.path.as_bytes();
2023        let index_entry = TrackedEntry {
2024            mode: entry.mode,
2025            oid: entry.oid,
2026        };
2027        let worktree_entry = worktree.get(path);
2028        let worktree_present = worktree_entry.is_some() || tracked_presence.contains(path);
2029        let submodule = status_submodule_from_entries(
2030            path,
2031            &index_entry,
2032            worktree_entry,
2033            submodule_dirt_map,
2034            untracked_mode,
2035        );
2036        let worktree_code = match worktree_entry {
2037            None if !worktree_present => b'D',
2038            Some(worktree_entry) if *worktree_entry != index_entry => b'M',
2039            _ if submodule.is_some_and(|sub| sub.any()) => b'M',
2040            _ => b' ',
2041        };
2042        if worktree_code != b' ' {
2043            entries.push(ShortStatusEntry {
2044                index: b' ',
2045                worktree: worktree_code,
2046                path: path.to_vec(),
2047                head_mode: Some(index_entry.mode),
2048                index_mode: Some(index_entry.mode),
2049                worktree_mode: status_worktree_mode(Some(&index_entry), worktree_entry, worktree_present),
2050                head_oid: Some(index_entry.oid),
2051                index_oid: Some(index_entry.oid),
2052                submodule: submodule.filter(|sub| sub.any()),
2053            });
2054        }
2055    }
2056}
2057
2058struct StatusComparisonInputs<'a> {
2059    head: &'a BTreeMap<Vec<u8>, TrackedEntry>,
2060    index: &'a BTreeMap<Vec<u8>, TrackedEntry>,
2061    worktree: &'a BTreeMap<Vec<u8>, TrackedEntry>,
2062    tracked_presence: &'a HashSet<Vec<u8>>,
2063    submodule_dirt_map: &'a BTreeMap<Vec<u8>, u8>,
2064    ignores: &'a IgnoreMatcher,
2065}
2066
2067fn collect_status_entries_with_head(
2068    inputs: StatusComparisonInputs<'_>,
2069    untracked_mode: StatusUntrackedMode,
2070    entries: &mut Vec<ShortStatusEntry>,
2071) {
2072    let mut paths = BTreeSet::new();
2073    paths.extend(inputs.head.keys().cloned());
2074    paths.extend(inputs.index.keys().cloned());
2075    paths.extend(
2076        inputs
2077            .worktree
2078            .keys()
2079            .filter(|path| inputs.index.contains_key(*path))
2080            .cloned(),
2081    );
2082
2083    for path in paths {
2084        let head_entry = inputs.head.get(&path);
2085        let index_entry = inputs.index.get(&path);
2086        let worktree_entry = inputs.worktree.get(&path);
2087        let worktree_present =
2088            worktree_entry.is_some() || inputs.tracked_presence.contains(path.as_slice());
2089        if head_entry.is_none()
2090            && index_entry.is_none()
2091            && worktree_entry.is_some()
2092            && inputs.ignores.is_ignored(&path, false)
2093        {
2094            continue;
2095        }
2096        let submodule = match index_entry {
2097            Some(index_entry) => status_submodule_from_entries(
2098                &path,
2099                index_entry,
2100                worktree_entry,
2101                inputs.submodule_dirt_map,
2102                untracked_mode,
2103            ),
2104            None => None,
2105        };
2106        let (index_code, worktree_code) =
2107            if head_entry.is_none() && index_entry.is_none() && worktree_entry.is_some() {
2108                (b'?', b'?')
2109            } else {
2110                let index_code = match (head_entry, index_entry) {
2111                    (None, Some(_)) => b'A',
2112                    (Some(_), None) => b'D',
2113                    (Some(left), Some(right)) if left != right => b'M',
2114                    _ => b' ',
2115                };
2116                let worktree_code = match (index_entry, worktree_entry) {
2117                    (None, Some(_)) => b'?',
2118                    (Some(_), None) if !worktree_present => b'D',
2119                    (Some(left), Some(right)) if left != right => b'M',
2120                    _ if submodule.is_some_and(|sub| sub.any()) => b'M',
2121                    _ => b' ',
2122                };
2123                (index_code, worktree_code)
2124            };
2125        if index_code != b' ' || worktree_code != b' ' {
2126            entries.push(ShortStatusEntry {
2127                index: index_code,
2128                worktree: worktree_code,
2129                path,
2130                head_mode: head_entry.map(|entry| entry.mode),
2131                index_mode: index_entry.map(|entry| entry.mode),
2132                worktree_mode: status_worktree_mode(index_entry, worktree_entry, worktree_present),
2133                head_oid: head_entry.map(|entry| entry.oid),
2134                index_oid: index_entry.map(|entry| entry.oid),
2135                submodule: submodule.filter(|sub| sub.any()),
2136            });
2137        }
2138    }
2139}
2140
2141fn status_worktree_mode(
2142    index_entry: Option<&TrackedEntry>,
2143    worktree_entry: Option<&TrackedEntry>,
2144    worktree_present: bool,
2145) -> Option<u32> {
2146    worktree_entry
2147        .map(|entry| entry.mode)
2148        .or_else(|| worktree_present.then(|| index_entry.map(|entry| entry.mode)).flatten())
2149}
2150
2151fn status_submodule_from_entries(
2152    path: &[u8],
2153    index_entry: &TrackedEntry,
2154    worktree_entry: Option<&TrackedEntry>,
2155    submodule_dirt_map: &BTreeMap<Vec<u8>, u8>,
2156    untracked_mode: StatusUntrackedMode,
2157) -> Option<SubmoduleStatus> {
2158    let worktree_entry = worktree_entry?;
2159    if index_entry.mode != 0o160000 || worktree_entry.mode != 0o160000 {
2160        return None;
2161    }
2162    let dirt = submodule_dirt_map.get(path).copied().unwrap_or(0);
2163    Some(SubmoduleStatus {
2164        new_commits: index_entry.oid != worktree_entry.oid,
2165        modified_content: dirt & DIRTY_SUBMODULE_MODIFIED != 0,
2166        untracked_content: dirt & DIRTY_SUBMODULE_UNTRACKED != 0
2167            && !matches!(untracked_mode, StatusUntrackedMode::None),
2168    })
2169}
2170
2171fn short_status_tracked_only(
2172    worktree_root: &Path,
2173    git_dir: &Path,
2174    format: ObjectFormat,
2175    db: &FileObjectDatabase,
2176    index: &Index,
2177    stat_cache: &IndexStatCache,
2178    head_matches_index: bool,
2179) -> Result<Vec<ShortStatusEntry>> {
2180    if head_matches_index && stat_cache.entries.len() >= 512 {
2181        return short_status_tracked_only_head_matches_index_parallel(
2182            worktree_root,
2183            git_dir,
2184            format,
2185            index,
2186            stat_cache,
2187        );
2188    }
2189    let head = if head_matches_index {
2190        None
2191    } else {
2192        Some(head_tree_entries(git_dir, format, db)?)
2193    };
2194    let mut clean_filter = None;
2195    let mut entries = Vec::new();
2196    for entry in index
2197        .entries
2198        .iter()
2199        .filter(|entry| entry.stage() == Stage::Normal)
2200    {
2201        let path = entry.path.as_bytes();
2202        let index_entry = TrackedEntry {
2203            mode: entry.mode,
2204            oid: entry.oid,
2205        };
2206        let head_entry = if head_matches_index {
2207            Some(&index_entry)
2208        } else {
2209            head.as_ref().and_then(|head| head.get(path))
2210        };
2211        let worktree_entry = worktree_entry_for_index_entry_with_attributes(
2212            worktree_root,
2213            git_dir,
2214            format,
2215            entry,
2216            stat_cache,
2217            &mut clean_filter,
2218        )?;
2219        let submodule = tracked_only_submodule_status(
2220            worktree_root,
2221            path,
2222            &index_entry,
2223            worktree_entry.as_ref(),
2224        )?;
2225        let index_code = match head_entry {
2226            None => b'A',
2227            Some(head_entry) if *head_entry != index_entry => b'M',
2228            _ => b' ',
2229        };
2230        let worktree_code = match worktree_entry.as_ref() {
2231            None => b'D',
2232            Some(worktree_entry) if *worktree_entry != index_entry => b'M',
2233            _ if submodule.is_some_and(|sub| sub.any()) => b'M',
2234            _ => b' ',
2235        };
2236        if index_code != b' ' || worktree_code != b' ' {
2237            entries.push(ShortStatusEntry {
2238                index: index_code,
2239                worktree: worktree_code,
2240                path: path.to_vec(),
2241                head_mode: head_entry.map(|entry| entry.mode),
2242                index_mode: Some(index_entry.mode),
2243                worktree_mode: worktree_entry.as_ref().map(|entry| entry.mode),
2244                head_oid: head_entry.map(|entry| entry.oid),
2245                index_oid: Some(index_entry.oid),
2246                submodule: submodule.filter(|sub| sub.any()),
2247            });
2248        }
2249    }
2250    if let Some(head) = head.as_ref() {
2251        let index_paths = index
2252            .entries
2253            .iter()
2254            .filter(|entry| entry.stage() == Stage::Normal)
2255            .map(|entry| entry.path.as_bytes().to_vec())
2256            .collect::<HashSet<_>>();
2257        for (path, head_entry) in head {
2258            if index_paths.contains(path.as_slice()) {
2259                continue;
2260            }
2261            entries.push(ShortStatusEntry {
2262                index: b'D',
2263                worktree: b' ',
2264                path: path.clone(),
2265                head_mode: Some(head_entry.mode),
2266                index_mode: None,
2267                worktree_mode: None,
2268                head_oid: Some(head_entry.oid),
2269                index_oid: None,
2270                submodule: None,
2271            });
2272        }
2273    }
2274    entries.sort_by(|left, right| {
2275        status_sort_category(left)
2276            .cmp(&status_sort_category(right))
2277            .then_with(|| left.path.cmp(&right.path))
2278    });
2279    Ok(entries)
2280}
2281
2282#[derive(Debug)]
2283enum TrackedOnlyPrecheck {
2284    Deleted(usize),
2285    Slow(usize),
2286}
2287
2288#[derive(Debug)]
2289enum TrackedOnlyPrecheckOutcome {
2290    Clean,
2291    Deleted,
2292    Slow,
2293}
2294
2295fn short_status_tracked_only_head_matches_index_parallel(
2296    worktree_root: &Path,
2297    git_dir: &Path,
2298    format: ObjectFormat,
2299    index: &Index,
2300    stat_cache: &IndexStatCache,
2301) -> Result<Vec<ShortStatusEntry>> {
2302    let normal_indices = index
2303        .entries
2304        .iter()
2305        .enumerate()
2306        .filter_map(|(idx, entry)| (entry.stage() == Stage::Normal).then_some(idx))
2307        .collect::<Vec<_>>();
2308    let max_workers = std::thread::available_parallelism()
2309        .map(|count| count.get())
2310        .unwrap_or(1)
2311        .min(8);
2312    let worker_count = max_workers.min(normal_indices.len().div_ceil(512)).max(1);
2313    let chunk_size = normal_indices.len().div_ceil(worker_count);
2314    let mut prechecks = std::thread::scope(|scope| -> Result<Vec<TrackedOnlyPrecheck>> {
2315        let mut handles = Vec::new();
2316        for chunk in normal_indices.chunks(chunk_size) {
2317            handles.push(scope.spawn(move || -> Result<Vec<TrackedOnlyPrecheck>> {
2318                let mut prechecks = Vec::new();
2319                for &idx in chunk {
2320                    let entry = &index.entries[idx];
2321                    match tracked_only_stat_precheck(worktree_root, entry, stat_cache)? {
2322                        TrackedOnlyPrecheckOutcome::Clean => {}
2323                        TrackedOnlyPrecheckOutcome::Deleted => {
2324                            prechecks.push(TrackedOnlyPrecheck::Deleted(idx));
2325                        }
2326                        TrackedOnlyPrecheckOutcome::Slow => {
2327                            prechecks.push(TrackedOnlyPrecheck::Slow(idx));
2328                        }
2329                    }
2330                }
2331                Ok(prechecks)
2332            }));
2333        }
2334        let mut prechecks = Vec::new();
2335        for handle in handles {
2336            let mut chunk = handle
2337                .join()
2338                .map_err(|_| GitError::Command("status worker panicked".into()))??;
2339            prechecks.append(&mut chunk);
2340        }
2341        Ok(prechecks)
2342    })?;
2343    prechecks.sort_by_key(|precheck| match precheck {
2344        TrackedOnlyPrecheck::Deleted(idx) | TrackedOnlyPrecheck::Slow(idx) => *idx,
2345    });
2346
2347    let mut clean_filter = None;
2348    let mut entries = Vec::new();
2349    for precheck in prechecks {
2350        match precheck {
2351            TrackedOnlyPrecheck::Deleted(idx) => {
2352                let entry = &index.entries[idx];
2353                let path = entry.path.as_bytes();
2354                entries.push(ShortStatusEntry {
2355                    index: b' ',
2356                    worktree: b'D',
2357                    path: path.to_vec(),
2358                    head_mode: Some(entry.mode),
2359                    index_mode: Some(entry.mode),
2360                    worktree_mode: None,
2361                    head_oid: Some(entry.oid),
2362                    index_oid: Some(entry.oid),
2363                    submodule: None,
2364                });
2365            }
2366            TrackedOnlyPrecheck::Slow(idx) => {
2367                let entry = &index.entries[idx];
2368                let path = entry.path.as_bytes();
2369                let index_entry = TrackedEntry {
2370                    mode: entry.mode,
2371                    oid: entry.oid,
2372                };
2373                let worktree_entry = worktree_entry_for_index_entry_with_attributes(
2374                    worktree_root,
2375                    git_dir,
2376                    format,
2377                    entry,
2378                    stat_cache,
2379                    &mut clean_filter,
2380                )?;
2381                let submodule = tracked_only_submodule_status(
2382                    worktree_root,
2383                    path,
2384                    &index_entry,
2385                    worktree_entry.as_ref(),
2386                )?;
2387                let worktree_code = match worktree_entry.as_ref() {
2388                    None => b'D',
2389                    Some(worktree_entry) if *worktree_entry != index_entry => b'M',
2390                    _ if submodule.is_some_and(|sub| sub.any()) => b'M',
2391                    _ => b' ',
2392                };
2393                if worktree_code != b' ' {
2394                    entries.push(ShortStatusEntry {
2395                        index: b' ',
2396                        worktree: worktree_code,
2397                        path: path.to_vec(),
2398                        head_mode: Some(index_entry.mode),
2399                        index_mode: Some(index_entry.mode),
2400                        worktree_mode: worktree_entry.as_ref().map(|entry| entry.mode),
2401                        head_oid: Some(index_entry.oid),
2402                        index_oid: Some(index_entry.oid),
2403                        submodule: submodule.filter(|sub| sub.any()),
2404                    });
2405                }
2406            }
2407        }
2408    }
2409    entries.sort_by(|left, right| {
2410        status_sort_category(left)
2411            .cmp(&status_sort_category(right))
2412            .then_with(|| left.path.cmp(&right.path))
2413    });
2414    Ok(entries)
2415}
2416
2417fn tracked_only_stat_precheck(
2418    worktree_root: &Path,
2419    index_entry: &IndexEntry,
2420    stat_cache: &IndexStatCache,
2421) -> Result<TrackedOnlyPrecheckOutcome> {
2422    if index_entry.mode == 0o160000 {
2423        return Ok(TrackedOnlyPrecheckOutcome::Slow);
2424    }
2425    let git_path = index_entry.path.as_bytes();
2426    let absolute = worktree_root.join(repo_path_to_os_path(git_path)?);
2427    let metadata = match fs::symlink_metadata(&absolute) {
2428        Ok(metadata) => metadata,
2429        Err(err)
2430            if matches!(
2431                err.kind(),
2432                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
2433            ) =>
2434        {
2435            return Ok(TrackedOnlyPrecheckOutcome::Deleted);
2436        }
2437        Err(err) => return Err(err.into()),
2438    };
2439    let file_type = metadata.file_type();
2440    if file_type.is_dir() || !(file_type.is_file() || file_type.is_symlink()) {
2441        return Ok(TrackedOnlyPrecheckOutcome::Slow);
2442    }
2443    if stat_cache
2444        .reuse_index_entry(index_entry, &metadata)
2445        .is_some()
2446    {
2447        Ok(TrackedOnlyPrecheckOutcome::Clean)
2448    } else {
2449        Ok(TrackedOnlyPrecheckOutcome::Slow)
2450    }
2451}
2452
2453fn tracked_only_submodule_status(
2454    worktree_root: &Path,
2455    path: &[u8],
2456    index_entry: &TrackedEntry,
2457    worktree_entry: Option<&TrackedEntry>,
2458) -> Result<Option<SubmoduleStatus>> {
2459    let Some(worktree_entry) = worktree_entry else {
2460        return Ok(None);
2461    };
2462    if index_entry.mode != 0o160000 || worktree_entry.mode != 0o160000 {
2463        return Ok(None);
2464    }
2465    let absolute = worktree_root.join(repo_path_to_os_path(path)?);
2466    let dirt = if absolute.is_dir() {
2467        submodule_dirt(&absolute)
2468    } else {
2469        0
2470    };
2471    Ok(Some(SubmoduleStatus {
2472        new_commits: index_entry.oid != worktree_entry.oid,
2473        modified_content: dirt & DIRTY_SUBMODULE_MODIFIED != 0,
2474        untracked_content: false,
2475    }))
2476}
2477
2478fn status_sort_category(entry: &ShortStatusEntry) -> u8 {
2479    match (entry.index, entry.worktree) {
2480        (b'?', b'?') => 1,
2481        (b'!', b'!') => 2,
2482        _ => 0,
2483    }
2484}
2485
2486pub fn untracked_paths(
2487    worktree_root: impl AsRef<Path>,
2488    git_dir: impl AsRef<Path>,
2489    format: ObjectFormat,
2490) -> Result<Vec<Vec<u8>>> {
2491    untracked_paths_with_options(
2492        worktree_root,
2493        git_dir,
2494        format,
2495        UntrackedPathOptions::default(),
2496    )
2497}
2498
2499/// Pathspec filter for untracked collection. Mirrors git `ls-files` pathspec
2500/// semantics: literal paths, recursive directory prefixes, and fnmatch globs.
2501#[derive(Debug, Clone, PartialEq, Eq)]
2502pub struct UntrackedPathspecFilter {
2503    pub path: Vec<u8>,
2504    pub recursive: bool,
2505    pub is_glob: bool,
2506}
2507
2508#[derive(Debug, Clone, PartialEq, Eq, Default)]
2509pub struct UntrackedPathOptions {
2510    pub directory: bool,
2511    pub no_empty_directory: bool,
2512    pub preserve_ignored_directories: bool,
2513    pub exclude_standard: bool,
2514    pub ignored_only: bool,
2515    pub exclude_patterns: Vec<Vec<u8>>,
2516    pub exclude_per_directory: Vec<String>,
2517    pub pathspecs: Vec<UntrackedPathspecFilter>,
2518}
2519
2520// The wildmatch engine and the single-item pathspec matcher now live in the
2521// shared `sley-pathspec` crate. Re-export them so existing `sley-worktree`
2522// callers (and the t3070 `ls-files` path) keep their public surface unchanged.
2523pub use sley_pathspec::{
2524    PathspecMatchMagic, WM_CASEFOLD, WM_PATHNAME, pathspec_is_glob, pathspec_item_matches,
2525    wildmatch,
2526};
2527
2528/// Whether `path` matches an `ls-files` pathspec (literal, directory prefix, or glob).
2529pub fn untracked_pathspec_matches(spec: &UntrackedPathspecFilter, path: &[u8]) -> bool {
2530    if spec.path.is_empty() {
2531        return true;
2532    }
2533    let path_no_slash = path.strip_suffix(b"/").unwrap_or(path);
2534    if path == spec.path.as_slice() || path_no_slash == spec.path.as_slice() {
2535        return true;
2536    }
2537    if spec.recursive
2538        && let Some(rest) = path
2539            .strip_prefix(spec.path.as_slice())
2540            .and_then(|rest| rest.strip_prefix(b"/"))
2541        && !rest.is_empty()
2542    {
2543        return true;
2544    }
2545    if spec.is_glob {
2546        return untracked_wildmatch(&spec.path, path)
2547            || untracked_wildmatch(&spec.path, path_no_slash);
2548    }
2549    false
2550}
2551
2552/// Whether a directory walk must descend into `parent` to satisfy active pathspecs.
2553pub fn untracked_pathspec_needs_descent(parent: &[u8], specs: &[UntrackedPathspecFilter]) -> bool {
2554    if specs.is_empty() {
2555        return false;
2556    }
2557    let parent_prefix = if parent.is_empty() {
2558        Vec::new()
2559    } else {
2560        let mut prefix = parent.to_vec();
2561        prefix.push(b'/');
2562        prefix
2563    };
2564    for spec in specs {
2565        if !parent.is_empty()
2566            && spec.path.starts_with(&parent_prefix)
2567            && spec.path.as_slice() != parent
2568        {
2569            return true;
2570        }
2571        if spec.is_glob && glob_pathspec_may_match_under(&spec.path, parent) {
2572            return true;
2573        }
2574        if spec.recursive
2575            && !parent.is_empty()
2576            && parent.starts_with(spec.path.as_slice())
2577            && parent != spec.path.as_slice()
2578        {
2579            return true;
2580        }
2581    }
2582    false
2583}
2584
2585/// Whether some pathspec selects the directory `git_path` *as a whole* (so an
2586/// untracked directory can roll up to `dir/` under `--directory`), as opposed to
2587/// only matching something strictly below it (which forces descent). A
2588/// directory-prefix pathspec covering the directory, an exact directory match, or
2589/// a glob matching the directory's own name all count; a deeper glob such as
2590/// `dir/*.c` or an exact file path inside the directory does not.
2591fn untracked_pathspec_selects_directory(
2592    specs: &[UntrackedPathspecFilter],
2593    git_path: &[u8],
2594) -> bool {
2595    specs
2596        .iter()
2597        .any(|spec| untracked_pathspec_matches(spec, git_path))
2598}
2599
2600fn glob_pathspec_may_match_under(pattern: &[u8], dir: &[u8]) -> bool {
2601    let literal_prefix = literal_prefix_before_glob(pattern);
2602    if literal_prefix.is_empty() {
2603        return true;
2604    }
2605    if dir.is_empty() {
2606        return true;
2607    }
2608    let mut dir_prefix = dir.to_vec();
2609    dir_prefix.push(b'/');
2610    if literal_prefix.starts_with(&dir_prefix) {
2611        return true;
2612    }
2613    if dir_prefix.starts_with(&literal_prefix) {
2614        return true;
2615    }
2616    literal_prefix
2617        .strip_suffix(b"/")
2618        .is_some_and(|prefix| prefix == dir)
2619}
2620
2621fn literal_prefix_before_glob(pattern: &[u8]) -> Vec<u8> {
2622    let mut prefix = Vec::new();
2623    for &byte in pattern {
2624        if matches!(byte, b'*' | b'?' | b'[') {
2625            break;
2626        }
2627        prefix.push(byte);
2628    }
2629    prefix
2630}
2631
2632fn insert_untracked_directory(paths: &mut BTreeSet<Vec<u8>>, git_path: &[u8]) {
2633    let mut directory = git_path.to_vec();
2634    if directory.last() != Some(&b'/') {
2635        directory.push(b'/');
2636    }
2637    paths.insert(directory);
2638}
2639
2640/// fnmatch-style glob where `*` and `?` match any byte including `/`.
2641fn untracked_wildmatch(pattern: &[u8], text: &[u8]) -> bool {
2642    // Untracked-walk pathspec globs match with PATHMATCH semantics (`*` crosses
2643    // `/`), matching git's default (non-GLOB-magic) pathspec behavior.
2644    wildmatch(pattern, text, 0)
2645}
2646
2647#[derive(Debug, Clone, PartialEq, Eq)]
2648pub struct IgnoreMatch {
2649    pub source: Vec<u8>,
2650    pub line_number: usize,
2651    pub pattern: Vec<u8>,
2652    pub ignored: bool,
2653}
2654
2655#[derive(Debug, Clone, PartialEq, Eq)]
2656pub enum AttributeState {
2657    Set,
2658    Unset,
2659    Value(Vec<u8>),
2660}
2661
2662#[derive(Debug, Clone, PartialEq, Eq)]
2663pub struct AttributeCheck {
2664    pub attribute: Vec<u8>,
2665    pub state: Option<AttributeState>,
2666}
2667
2668pub fn untracked_paths_with_options(
2669    worktree_root: impl AsRef<Path>,
2670    git_dir: impl AsRef<Path>,
2671    format: ObjectFormat,
2672    options: UntrackedPathOptions,
2673) -> Result<Vec<Vec<u8>>> {
2674    let worktree_root = worktree_root.as_ref();
2675    let git_dir = git_dir.as_ref();
2676    let db = FileObjectDatabase::from_git_dir(git_dir, format);
2677    let (index, stat_cache, _) = read_index_entries_with_stat_cache(git_dir, format, &db)?;
2678    let ignores = IgnoreMatcher::from_sources(
2679        worktree_root,
2680        options.exclude_standard,
2681        &options.exclude_patterns,
2682        &options.exclude_per_directory,
2683    )?;
2684    if options.ignored_only {
2685        return ignored_untracked_paths(
2686            worktree_root,
2687            git_dir,
2688            &index,
2689            &ignores,
2690            options.directory,
2691        );
2692    }
2693    if options.directory {
2694        let mut paths = BTreeSet::new();
2695        collect_untracked_directory_paths(
2696            worktree_root,
2697            git_dir,
2698            worktree_root,
2699            &index,
2700            &ignores,
2701            &options,
2702            &mut paths,
2703        )?;
2704        return Ok(paths.into_iter().collect());
2705    }
2706    let worktree = worktree_entries_with_stat_cache(
2707        worktree_root,
2708        git_dir,
2709        format,
2710        Some(&stat_cache),
2711        None,
2712        None,
2713    )?;
2714    Ok(ls_files_untracked_paths_from_worktree(
2715        &worktree, &index, &ignores,
2716    ))
2717}
2718
2719/// Untracked paths for `ls-files --others` (without `--directory`): every
2720/// untracked file is listed individually, except embedded-repository boundaries
2721/// which are emitted as `dir/` to match git's non-submodule `.git` handling.
2722fn ls_files_untracked_paths_from_worktree(
2723    worktree: &BTreeMap<Vec<u8>, TrackedEntry>,
2724    index: &BTreeMap<Vec<u8>, TrackedEntry>,
2725    ignores: &IgnoreMatcher,
2726) -> Vec<Vec<u8>> {
2727    let mut paths = BTreeSet::new();
2728    for (path, entry) in worktree {
2729        if index.contains_key(path) || ignores.is_ignored(path, false) {
2730            continue;
2731        }
2732        if entry.mode == 0o040000 && entry.oid.is_null() {
2733            insert_untracked_directory(&mut paths, path);
2734            continue;
2735        }
2736        paths.insert(path.clone());
2737    }
2738    paths.into_iter().collect()
2739}
2740
2741pub fn path_matches_standard_ignore(
2742    worktree_root: impl AsRef<Path>,
2743    path: &[u8],
2744    is_dir: bool,
2745) -> Result<bool> {
2746    path_matches_ignore(worktree_root, path, is_dir, true, &[])
2747}
2748
2749pub fn standard_ignore_match(
2750    worktree_root: impl AsRef<Path>,
2751    path: &[u8],
2752    is_dir: bool,
2753) -> Result<Option<IgnoreMatch>> {
2754    let ignores = IgnoreMatcher::from_worktree_root(worktree_root.as_ref())?;
2755    Ok(ignores.match_for(path, is_dir).map(IgnorePattern::to_match))
2756}
2757
2758pub fn standard_attributes_for_path(
2759    worktree_root: impl AsRef<Path>,
2760    path: &[u8],
2761    requested: &[Vec<u8>],
2762    all: bool,
2763) -> Result<Vec<AttributeCheck>> {
2764    let matcher = AttributeMatcher::from_worktree_root(worktree_root.as_ref())?;
2765    Ok(matcher.attributes_for_path(path, requested, all))
2766}
2767
2768/// A reusable matcher for standard worktree attributes (global or
2769/// `core.attributesFile`, every in-tree `.gitattributes`, and
2770/// `$GIT_DIR/info/attributes`).
2771///
2772/// This is behaviourally identical to [`standard_attributes_for_path`] except
2773/// the attribute sources are read once and reused for each path.
2774pub struct StandardAttributeMatcher {
2775    matcher: AttributeMatcher,
2776}
2777
2778impl StandardAttributeMatcher {
2779    pub fn from_worktree_root(worktree_root: impl AsRef<Path>) -> Result<Self> {
2780        Ok(Self {
2781            matcher: AttributeMatcher::from_worktree_root(worktree_root.as_ref())?,
2782        })
2783    }
2784
2785    pub fn attributes_for_path(
2786        &self,
2787        path: &[u8],
2788        requested: &[Vec<u8>],
2789        all: bool,
2790    ) -> Vec<AttributeCheck> {
2791        self.matcher.attributes_for_path(path, requested, all)
2792    }
2793}
2794
2795pub fn standard_attributes_for_path_from_tree(
2796    worktree_root: impl AsRef<Path>,
2797    db: &FileObjectDatabase,
2798    format: ObjectFormat,
2799    tree_oid: &ObjectId,
2800    path: &[u8],
2801    requested: &[Vec<u8>],
2802    all: bool,
2803) -> Result<Vec<AttributeCheck>> {
2804    let mut matcher = AttributeMatcher::default();
2805    let worktree_root = worktree_root.as_ref();
2806    if !matcher.read_configured_attributes(worktree_root) {
2807        matcher.read_default_global_attributes();
2808    }
2809    collect_attribute_patterns_from_tree(db, format, tree_oid, Vec::new(), &mut matcher)?;
2810    read_attribute_patterns(
2811        worktree_root.join(".git").join("info").join("attributes"),
2812        &mut matcher,
2813        &[],
2814        b".git/info/attributes",
2815    );
2816    Ok(matcher.attributes_for_path(path, requested, all))
2817}
2818
2819pub fn standard_attributes_for_path_from_index(
2820    worktree_root: impl AsRef<Path>,
2821    git_dir: impl AsRef<Path>,
2822    format: ObjectFormat,
2823    path: &[u8],
2824    requested: &[Vec<u8>],
2825    all: bool,
2826) -> Result<Vec<AttributeCheck>> {
2827    let worktree_root = worktree_root.as_ref();
2828    let git_dir = git_dir.as_ref();
2829    let mut matcher = AttributeMatcher::default();
2830    if !matcher.read_configured_attributes(worktree_root) {
2831        matcher.read_default_global_attributes();
2832    }
2833    let db = FileObjectDatabase::from_git_dir(git_dir, format);
2834    collect_attribute_patterns_from_index(git_dir, format, &db, &mut matcher)?;
2835    read_attribute_patterns(
2836        worktree_root.join(".git").join("info").join("attributes"),
2837        &mut matcher,
2838        &[],
2839        b".git/info/attributes",
2840    );
2841    Ok(matcher.attributes_for_path(path, requested, all))
2842}
2843
2844pub fn path_matches_ignore(
2845    worktree_root: impl AsRef<Path>,
2846    path: &[u8],
2847    is_dir: bool,
2848    exclude_standard: bool,
2849    exclude_patterns: &[Vec<u8>],
2850) -> Result<bool> {
2851    path_matches_ignore_with_per_directory(
2852        worktree_root,
2853        path,
2854        is_dir,
2855        exclude_standard,
2856        exclude_patterns,
2857        &[],
2858    )
2859}
2860
2861pub fn path_matches_ignore_with_per_directory(
2862    worktree_root: impl AsRef<Path>,
2863    path: &[u8],
2864    is_dir: bool,
2865    exclude_standard: bool,
2866    exclude_patterns: &[Vec<u8>],
2867    exclude_per_directory: &[String],
2868) -> Result<bool> {
2869    let ignores = IgnoreMatcher::from_sources(
2870        worktree_root.as_ref(),
2871        exclude_standard,
2872        exclude_patterns,
2873        exclude_per_directory,
2874    )?;
2875    Ok(ignores.is_ignored(path, is_dir))
2876}
2877
2878pub fn ignored_index_entries<'a>(
2879    worktree_root: impl AsRef<Path>,
2880    entries: &'a [IndexEntry],
2881    exclude_standard: bool,
2882    exclude_patterns: &[Vec<u8>],
2883    exclude_per_directory: &[String],
2884) -> Result<Vec<&'a IndexEntry>> {
2885    let ignores = IgnoreMatcher::from_sources(
2886        worktree_root.as_ref(),
2887        exclude_standard,
2888        exclude_patterns,
2889        exclude_per_directory,
2890    )?;
2891    Ok(entries
2892        .iter()
2893        .filter(|entry| ignores.is_ignored(entry.path.as_bytes(), false))
2894        .collect())
2895}
2896
2897fn collect_untracked_directory_paths(
2898    root: &Path,
2899    git_dir: &Path,
2900    dir: &Path,
2901    index: &BTreeMap<Vec<u8>, TrackedEntry>,
2902    ignores: &IgnoreMatcher,
2903    options: &UntrackedPathOptions,
2904    paths: &mut BTreeSet<Vec<u8>>,
2905) -> Result<()> {
2906    if is_same_path(dir, git_dir) {
2907        return Ok(());
2908    }
2909    let mut entries = fs::read_dir(dir)?.collect::<std::result::Result<Vec<_>, _>>()?;
2910    entries.sort_by_key(|entry| entry.file_name());
2911    for entry in entries {
2912        let path = entry.path();
2913        if is_dot_git_entry(&path) {
2914            continue;
2915        }
2916        if is_embedded_git_internals(root, &path) {
2917            continue;
2918        }
2919        if is_same_path(&path, git_dir) {
2920            continue;
2921        }
2922        let metadata = entry.metadata()?;
2923        let relative = path.strip_prefix(root).map_err(|_| {
2924            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
2925        })?;
2926        let git_path = git_path_bytes(relative)?;
2927        if ignores.is_ignored(&git_path, metadata.is_dir()) {
2928            continue;
2929        }
2930        if metadata.is_dir() {
2931            if is_nested_repository_boundary(&path) {
2932                insert_untracked_directory(paths, &git_path);
2933                continue;
2934            }
2935            let has_tracked_below = index_has_path_under(index, &git_path);
2936            let needs_descent = untracked_pathspec_needs_descent(&git_path, &options.pathspecs);
2937            if has_tracked_below {
2938                collect_untracked_directory_paths(
2939                    root, git_dir, &path, index, ignores, options, paths,
2940                )?;
2941            } else if needs_descent {
2942                // A pathspec reaches into this wholly-untracked directory. Git's
2943                // `--directory` still rolls it up to `dir/` when a pathspec selects
2944                // the directory *as a whole* (a directory-prefix that covers it, or
2945                // a glob matching its name). It descends only when a pathspec
2946                // targets something strictly below it that does not select the
2947                // directory itself (e.g. a deeper glob like `dir/*.c` or an exact
2948                // file path).
2949                if untracked_pathspec_selects_directory(&options.pathspecs, &git_path) {
2950                    insert_untracked_directory(paths, &git_path);
2951                    continue;
2952                }
2953                collect_untracked_directory_paths(
2954                    root, git_dir, &path, index, ignores, options, paths,
2955                )?;
2956            } else if options.preserve_ignored_directories
2957                && directory_has_ignored(&path, root, git_dir, ignores)?
2958            {
2959                collect_untracked_directory_paths(
2960                    root, git_dir, &path, index, ignores, options, paths,
2961                )?;
2962            } else if !options.no_empty_directory
2963                || directory_has_file(&path, root, git_dir, ignores)?
2964            {
2965                insert_untracked_directory(paths, &git_path);
2966            }
2967        } else if !index.contains_key(&git_path)
2968            && (metadata.is_file() || metadata.file_type().is_symlink())
2969            && (options.pathspecs.is_empty()
2970                || options
2971                    .pathspecs
2972                    .iter()
2973                    .any(|spec| untracked_pathspec_matches(spec, &git_path)))
2974        {
2975            // A file reached here was found by descending into its parent
2976            // directory, which happens only when that directory is not eligible
2977            // for rollup (it contains tracked content, has ignored entries `-d`
2978            // must preserve, or a pathspec selects something strictly below it).
2979            // Git's `--directory` rollup is a directory-level decision made when
2980            // the whole directory matches; an individually-reached file is always
2981            // listed individually.
2982            paths.insert(git_path);
2983        }
2984    }
2985    Ok(())
2986}
2987
2988fn index_has_path_under(index: &BTreeMap<Vec<u8>, TrackedEntry>, directory: &[u8]) -> bool {
2989    // The index map is sorted, so a single range query finds whether any tracked
2990    // path lives under `directory/` in O(log n) — scanning every key was O(n) per
2991    // untracked directory (quadratic over a deep untracked tree).
2992    let mut prefix = directory.to_vec();
2993    prefix.push(b'/');
2994    index
2995        .range::<[u8], _>((
2996            std::ops::Bound::Included(prefix.as_slice()),
2997            std::ops::Bound::Unbounded,
2998        ))
2999        .next()
3000        .is_some_and(|(path, _)| path.starts_with(&prefix))
3001}
3002
3003/// Derives normal-mode untracked paths (directory rollup) from the worktree map
3004/// produced by the single status walk, avoiding a third filesystem traversal.
3005fn normal_untracked_paths_from_worktree(
3006    worktree: &BTreeMap<Vec<u8>, TrackedEntry>,
3007    index: &BTreeMap<Vec<u8>, TrackedEntry>,
3008    ignores: &IgnoreMatcher,
3009) -> Vec<Vec<u8>> {
3010    let mut paths = BTreeSet::new();
3011    for (path, entry) in worktree {
3012        if index.contains_key(path) || ignores.is_ignored(path, false) {
3013            continue;
3014        }
3015        if entry.mode == 0o040000 && entry.oid.is_null() {
3016            insert_untracked_directory(&mut paths, path);
3017            continue;
3018        }
3019        paths.insert(untracked_normal_rollup_path(path, index, ignores));
3020    }
3021    paths.into_iter().collect()
3022}
3023
3024fn normal_untracked_paths_from_worktree_stat_cache(
3025    worktree: &BTreeMap<Vec<u8>, TrackedEntry>,
3026    stat_cache: &IndexStatCache,
3027    ignores: &IgnoreMatcher,
3028) -> Vec<Vec<u8>> {
3029    let mut paths = BTreeSet::new();
3030    let mut index_paths = None;
3031    for (path, entry) in worktree {
3032        if stat_cache.contains(path) || ignores.is_ignored(path, false) {
3033            continue;
3034        }
3035        if entry.mode == 0o040000 && entry.oid.is_null() {
3036            insert_untracked_directory(&mut paths, path);
3037            continue;
3038        }
3039        let index_paths = index_paths
3040            .get_or_insert_with(|| stat_cache.entries.keys().cloned().collect::<BTreeSet<_>>());
3041        paths.insert(untracked_normal_rollup_path_from_paths(
3042            path,
3043            index_paths,
3044            ignores,
3045        ));
3046    }
3047    paths.into_iter().collect()
3048}
3049
3050fn untracked_normal_rollup_path(
3051    file_path: &[u8],
3052    index: &BTreeMap<Vec<u8>, TrackedEntry>,
3053    ignores: &IgnoreMatcher,
3054) -> Vec<u8> {
3055    let segments = file_path
3056        .split(|byte| *byte == b'/')
3057        .filter(|segment| !segment.is_empty())
3058        .collect::<Vec<_>>();
3059    if segments.len() <= 1 {
3060        return file_path.to_vec();
3061    }
3062    let mut prefix = Vec::new();
3063    for segment in &segments[..segments.len() - 1] {
3064        if !prefix.is_empty() {
3065            prefix.push(b'/');
3066        }
3067        prefix.extend_from_slice(segment);
3068        if index_has_path_under(index, &prefix) {
3069            break;
3070        }
3071        if !ignores.is_ignored(&prefix, true) {
3072            let mut directory = prefix;
3073            directory.push(b'/');
3074            return directory;
3075        }
3076    }
3077    file_path.to_vec()
3078}
3079
3080fn untracked_normal_rollup_path_from_paths(
3081    file_path: &[u8],
3082    index_paths: &BTreeSet<Vec<u8>>,
3083    ignores: &IgnoreMatcher,
3084) -> Vec<u8> {
3085    let segments = file_path
3086        .split(|byte| *byte == b'/')
3087        .filter(|segment| !segment.is_empty())
3088        .collect::<Vec<_>>();
3089    if segments.len() <= 1 {
3090        return file_path.to_vec();
3091    }
3092    let mut prefix = Vec::new();
3093    for segment in &segments[..segments.len() - 1] {
3094        if !prefix.is_empty() {
3095            prefix.push(b'/');
3096        }
3097        prefix.extend_from_slice(segment);
3098        if tracked_paths_may_contain(index_paths, &prefix) {
3099            break;
3100        }
3101        if !ignores.is_ignored(&prefix, true) {
3102            let mut directory = prefix;
3103            directory.push(b'/');
3104            return directory;
3105        }
3106    }
3107    file_path.to_vec()
3108}
3109
3110fn directory_has_file(
3111    dir: &Path,
3112    root: &Path,
3113    git_dir: &Path,
3114    ignores: &IgnoreMatcher,
3115) -> Result<bool> {
3116    if is_same_path(dir, git_dir) {
3117        return Ok(false);
3118    }
3119    for entry in fs::read_dir(dir)? {
3120        let entry = entry?;
3121        let path = entry.path();
3122        if is_dot_git_entry(&path) {
3123            continue;
3124        }
3125        if is_embedded_git_internals(root, &path) {
3126            continue;
3127        }
3128        if is_same_path(&path, git_dir) {
3129            continue;
3130        }
3131        let metadata = entry.metadata()?;
3132        let relative = path.strip_prefix(root).map_err(|_| {
3133            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
3134        })?;
3135        let git_path = git_path_bytes(relative)?;
3136        if ignores.is_ignored(&git_path, metadata.is_dir()) {
3137            continue;
3138        }
3139        if metadata.is_file() || metadata.file_type().is_symlink() {
3140            return Ok(true);
3141        }
3142        if metadata.is_dir() {
3143            if is_nested_repository_boundary(&path) {
3144                continue;
3145            }
3146            if directory_has_file(&path, root, git_dir, ignores)? {
3147                return Ok(true);
3148            }
3149        }
3150    }
3151    Ok(false)
3152}
3153
3154fn directory_has_ignored(
3155    dir: &Path,
3156    root: &Path,
3157    git_dir: &Path,
3158    ignores: &IgnoreMatcher,
3159) -> Result<bool> {
3160    if is_same_path(dir, git_dir) {
3161        return Ok(false);
3162    }
3163    for entry in fs::read_dir(dir)? {
3164        let entry = entry?;
3165        let path = entry.path();
3166        if is_dot_git_entry(&path) {
3167            continue;
3168        }
3169        if is_same_path(&path, git_dir) {
3170            continue;
3171        }
3172        let metadata = entry.metadata()?;
3173        let relative = path.strip_prefix(root).map_err(|_| {
3174            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
3175        })?;
3176        let git_path = git_path_bytes(relative)?;
3177        if ignores.is_ignored(&git_path, metadata.is_dir()) {
3178            return Ok(true);
3179        }
3180        if metadata.is_dir() && directory_has_ignored(&path, root, git_dir, ignores)? {
3181            return Ok(true);
3182        }
3183    }
3184    Ok(false)
3185}
3186
3187fn ignored_untracked_paths(
3188    root: &Path,
3189    git_dir: &Path,
3190    index: &BTreeMap<Vec<u8>, TrackedEntry>,
3191    ignores: &IgnoreMatcher,
3192    directory: bool,
3193) -> Result<Vec<Vec<u8>>> {
3194    let mut paths = BTreeSet::new();
3195    let context = IgnoredUntrackedContext {
3196        root,
3197        git_dir,
3198        index,
3199        ignores,
3200        directory,
3201    };
3202    collect_ignored_untracked_paths(&context, root, false, &mut paths)?;
3203    Ok(paths.into_iter().collect())
3204}
3205
3206struct IgnoredUntrackedContext<'a> {
3207    root: &'a Path,
3208    git_dir: &'a Path,
3209    index: &'a BTreeMap<Vec<u8>, TrackedEntry>,
3210    ignores: &'a IgnoreMatcher,
3211    directory: bool,
3212}
3213
3214fn collect_ignored_untracked_paths(
3215    context: &IgnoredUntrackedContext<'_>,
3216    dir: &Path,
3217    parent_ignored: bool,
3218    paths: &mut BTreeSet<Vec<u8>>,
3219) -> Result<()> {
3220    if is_same_path(dir, context.git_dir) {
3221        return Ok(());
3222    }
3223    let mut entries = fs::read_dir(dir)?.collect::<std::result::Result<Vec<_>, _>>()?;
3224    entries.sort_by_key(|entry| entry.file_name());
3225    for entry in entries {
3226        let path = entry.path();
3227        if is_dot_git_entry(&path) {
3228            continue;
3229        }
3230        if is_same_path(&path, context.git_dir) {
3231            continue;
3232        }
3233        let metadata = entry.metadata()?;
3234        let relative = path.strip_prefix(context.root).map_err(|_| {
3235            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
3236        })?;
3237        let git_path = git_path_bytes(relative)?;
3238        if metadata.is_dir() {
3239            if is_nested_repository_boundary(&path) {
3240                continue;
3241            }
3242            let ignored = parent_ignored || context.ignores.is_ignored(&git_path, true);
3243            if ignored && !index_has_path_under(context.index, &git_path) {
3244                if context.directory {
3245                    let mut directory_path = git_path;
3246                    directory_path.push(b'/');
3247                    paths.insert(directory_path);
3248                } else {
3249                    collect_ignored_untracked_paths(context, &path, true, paths)?;
3250                }
3251            } else {
3252                collect_ignored_untracked_paths(context, &path, ignored, paths)?;
3253            }
3254        } else if !context.index.contains_key(&git_path)
3255            && (metadata.is_file() || metadata.file_type().is_symlink())
3256            && (parent_ignored || context.ignores.is_ignored(&git_path, false))
3257        {
3258            paths.insert(git_path);
3259        }
3260    }
3261    Ok(())
3262}
3263
3264#[derive(Debug, Default)]
3265struct IgnoreMatcher {
3266    patterns: Vec<IgnorePattern>,
3267}
3268
3269#[derive(Debug)]
3270struct IgnorePattern {
3271    base: Vec<u8>,
3272    pattern: Vec<u8>,
3273    original: Vec<u8>,
3274    source: Vec<u8>,
3275    line_number: usize,
3276    negated: bool,
3277    directory_only: bool,
3278    anchored: bool,
3279    has_slash: bool,
3280    /// How `pattern` should be matched against a slash-free segment. Most
3281    /// `.gitignore` entries are literals or simple `*.ext` / `prefix*` globs, all
3282    /// of which match without the allocating wildcard DP engine; only genuinely
3283    /// complex globs fall through to [`wildcard_path_matches`].
3284    match_kind: MatchKind,
3285}
3286
3287/// Classification of an [`IgnorePattern`] that lets common shapes skip the
3288/// general wildcard matcher. Every variant matches a *slash-free* segment
3289/// (basename or path component); patterns containing `/` are always
3290/// [`MatchKind::Glob`] so they only ever reach the full engine.
3291#[derive(Debug)]
3292enum MatchKind {
3293    /// No metacharacters: matches by byte equality.
3294    Literal,
3295    /// `*X` with `X` literal: matches a segment ending in `X`.
3296    Suffix,
3297    /// `X*` with `X` literal: matches a segment starting with `X`.
3298    Prefix,
3299    /// Anything else: defer to [`wildcard_path_matches`].
3300    Glob,
3301}
3302
3303/// Classify `pattern` for [`MatchKind`]. `*X`/`X*` fast paths require the literal
3304/// part to be slash-free so that `ends_with`/`starts_with` on a single segment is
3305/// exactly equivalent to the glob (`*` never crosses `/`).
3306fn classify_ignore_pattern(pattern: &[u8]) -> MatchKind {
3307    let stars = pattern.iter().filter(|byte| **byte == b'*').count();
3308    let other_meta = pattern
3309        .iter()
3310        .any(|byte| matches!(byte, b'?' | b'[' | b'\\'));
3311    if stars == 0 && !other_meta {
3312        return MatchKind::Literal;
3313    }
3314    if stars == 1 && !other_meta {
3315        let literal = if pattern.first() == Some(&b'*') {
3316            Some((&pattern[1..], MatchKind::Suffix))
3317        } else if pattern.last() == Some(&b'*') {
3318            Some((&pattern[..pattern.len() - 1], MatchKind::Prefix))
3319        } else {
3320            None
3321        };
3322        if let Some((literal, kind)) = literal
3323            && !literal.is_empty()
3324            && !literal.contains(&b'/')
3325        {
3326            return kind;
3327        }
3328    }
3329    MatchKind::Glob
3330}
3331
3332impl IgnoreMatcher {
3333    fn from_sources(
3334        root: &Path,
3335        exclude_standard: bool,
3336        patterns: &[Vec<u8>],
3337        per_directory: &[String],
3338    ) -> Result<Self> {
3339        let mut matcher = if exclude_standard {
3340            Self::from_worktree_root(root)?
3341        } else {
3342            Self::default()
3343        };
3344        matcher.extend_patterns(patterns);
3345        matcher.extend_per_directory_patterns(root, per_directory)?;
3346        Ok(matcher)
3347    }
3348
3349    /// Builds only the repository-wide ignore sources — `core.excludesFile` (or the
3350    /// default global) and `$GIT_DIR/info/exclude` — *without* walking the worktree
3351    /// for `.gitignore`. The caller folds each directory's `.gitignore` into the
3352    /// matcher as it descends (see [`read_dir_ignore_patterns`]), so status reads
3353    /// the tree exactly once instead of doing a separate full-tree ignore pass.
3354    fn from_worktree_base(root: &Path) -> Result<Self> {
3355        let mut patterns = Vec::new();
3356        read_ignore_patterns(
3357            root.join(".git").join("info").join("exclude"),
3358            &mut patterns,
3359            &[],
3360            b".git/info/exclude",
3361        );
3362        if !read_core_excludes_file(root, &mut patterns) {
3363            read_default_global_excludes_file(&mut patterns);
3364        }
3365        Ok(Self { patterns })
3366    }
3367
3368    fn from_worktree_root(root: &Path) -> Result<Self> {
3369        let mut patterns = Vec::new();
3370        read_ignore_patterns(
3371            root.join(".git").join("info").join("exclude"),
3372            &mut patterns,
3373            &[],
3374            b".git/info/exclude",
3375        );
3376        if !read_core_excludes_file(root, &mut patterns) {
3377            read_default_global_excludes_file(&mut patterns);
3378        }
3379        collect_per_directory_patterns(root, root, &[String::from(".gitignore")], &mut patterns)?;
3380        Ok(Self { patterns })
3381    }
3382
3383    fn extend_patterns(&mut self, patterns: &[Vec<u8>]) {
3384        for pattern in patterns {
3385            push_ignore_pattern(&mut self.patterns, pattern, &[], &[], 0);
3386        }
3387    }
3388
3389    fn extend_per_directory_patterns(&mut self, root: &Path, names: &[String]) -> Result<()> {
3390        if names.is_empty() {
3391            return Ok(());
3392        }
3393        collect_per_directory_patterns(root, root, names, &mut self.patterns)
3394    }
3395
3396    fn is_ignored(&self, path: &[u8], is_dir: bool) -> bool {
3397        let mut ignored = false;
3398        for pattern in &self.patterns {
3399            if pattern.matches(path, is_dir) {
3400                ignored = !pattern.negated;
3401            }
3402        }
3403        ignored
3404    }
3405
3406    fn match_for(&self, path: &[u8], is_dir: bool) -> Option<&IgnorePattern> {
3407        let mut matched = None;
3408        for pattern in &self.patterns {
3409            if pattern.matches(path, is_dir) {
3410                matched = Some(pattern);
3411            }
3412        }
3413        matched
3414    }
3415}
3416
3417/// Decides whether a worktree path is included by a [`SparseCheckout`].
3418///
3419/// In [`SparseCheckoutMode::Full`] the sparse patterns are compiled with the
3420/// same `.gitignore` grammar used elsewhere in this crate ([`IgnorePattern`]);
3421/// a path is *in cone* when the last matching pattern is positive. In
3422/// [`SparseCheckoutMode::Cone`] the patterns are reduced to a set of recursive
3423/// directory prefixes plus a flag for whether top-level files are kept, and
3424/// inclusion is decided by literal prefix containment.
3425#[derive(Debug)]
3426enum SparseMatcher {
3427    Full { patterns: Vec<IgnorePattern> },
3428    Cone(ConeMatcher),
3429}
3430
3431#[derive(Debug, Default)]
3432struct ConeMatcher {
3433    /// `true` when files directly at the repository root are in cone (`/*`).
3434    root_files: bool,
3435    /// Directory prefixes (without leading or trailing `/`) whose entire
3436    /// subtree is in cone, e.g. `dir1/dir2`.
3437    recursive_dirs: Vec<Vec<u8>>,
3438    /// Parent directories that are in cone only for their direct files
3439    /// (the `/dir/*` guard Git emits so intermediate directories keep their
3440    /// own files). Stored without leading or trailing `/`.
3441    parent_dirs: Vec<Vec<u8>>,
3442}
3443
3444impl SparseMatcher {
3445    fn new(sparse: &SparseCheckout, mode: SparseCheckoutMode) -> Self {
3446        let resolved = match mode {
3447            SparseCheckoutMode::Auto => {
3448                if patterns_are_cone(&sparse.patterns) {
3449                    SparseCheckoutMode::Cone
3450                } else {
3451                    SparseCheckoutMode::Full
3452                }
3453            }
3454            other => other,
3455        };
3456        match resolved {
3457            SparseCheckoutMode::Cone => SparseMatcher::Cone(ConeMatcher::compile(&sparse.patterns)),
3458            // `Auto` has been resolved above; everything else is full matching.
3459            _ => {
3460                let mut patterns = Vec::new();
3461                for pattern in &sparse.patterns {
3462                    push_ignore_pattern(&mut patterns, pattern, &[], b"sparse-checkout", 0);
3463                }
3464                SparseMatcher::Full { patterns }
3465            }
3466        }
3467    }
3468
3469    /// Returns `true` when the given file path should be present in the
3470    /// worktree under this sparse specification.
3471    fn includes_file(&self, path: &[u8]) -> bool {
3472        match self {
3473            SparseMatcher::Full { patterns } => {
3474                let mut included = false;
3475                for pattern in patterns {
3476                    if pattern.matches(path, false) {
3477                        included = !pattern.negated;
3478                    }
3479                }
3480                included
3481            }
3482            SparseMatcher::Cone(cone) => cone.includes_file(path),
3483        }
3484    }
3485}
3486
3487impl ConeMatcher {
3488    fn compile(patterns: &[Vec<u8>]) -> Self {
3489        let mut matcher = ConeMatcher::default();
3490        for raw in patterns {
3491            let line = sparse_clean_line(raw);
3492            if line.is_empty() || line.starts_with(b"#") {
3493                continue;
3494            }
3495            // Negated guards such as `!/*/` and `!/dir/*/` only exist to stop a
3496            // recursive match from pulling in nested directories; the positive
3497            // patterns already capture the cone, so we ignore the negations.
3498            if line.starts_with(b"!") {
3499                continue;
3500            }
3501            if line == b"/*" {
3502                matcher.root_files = true;
3503                continue;
3504            }
3505            // `/dir/` -> recursive subtree.
3506            if let Some(rest) = line.strip_prefix(b"/")
3507                && let Some(dir) = rest.strip_suffix(b"/")
3508                && !dir.is_empty()
3509            {
3510                matcher.recursive_dirs.push(dir.to_vec());
3511                continue;
3512            }
3513            // `/dir/*` -> direct files of `dir` only (parent guard).
3514            if let Some(rest) = line.strip_prefix(b"/")
3515                && let Some(dir) = rest.strip_suffix(b"/*")
3516                && !dir.is_empty()
3517            {
3518                matcher.parent_dirs.push(dir.to_vec());
3519                continue;
3520            }
3521        }
3522        matcher
3523    }
3524
3525    fn includes_file(&self, path: &[u8]) -> bool {
3526        let parent = match path.iter().rposition(|byte| *byte == b'/') {
3527            Some(index) => &path[..index],
3528            None => {
3529                // A path with no slash is a top-level file.
3530                return self.root_files;
3531            }
3532        };
3533        if self
3534            .recursive_dirs
3535            .iter()
3536            .any(|dir| path_is_under_dir(path, dir))
3537        {
3538            return true;
3539        }
3540        self.parent_dirs.iter().any(|dir| dir.as_slice() == parent)
3541    }
3542}
3543
3544/// Strips a CR, leading/trailing whitespace, and an optional trailing slash is
3545/// preserved (cone patterns are slash sensitive) from a raw sparse line.
3546fn sparse_clean_line(raw: &[u8]) -> &[u8] {
3547    let line = raw.strip_suffix(b"\r").unwrap_or(raw);
3548    trim_ascii_whitespace(line)
3549}
3550
3551/// Returns `true` when `path` is the directory `dir` itself or lives anywhere
3552/// beneath it.
3553fn path_is_under_dir(path: &[u8], dir: &[u8]) -> bool {
3554    if dir.is_empty() {
3555        return true;
3556    }
3557    path.strip_prefix(dir)
3558        .is_some_and(|rest| rest.first() == Some(&b'/'))
3559}
3560
3561/// Heuristic used by [`SparseCheckoutMode::Auto`]: the pattern set is cone
3562/// shaped when every (non-comment, non-blank) line is one of the restricted
3563/// cone forms Git emits.
3564fn patterns_are_cone(patterns: &[Vec<u8>]) -> bool {
3565    let mut saw_pattern = false;
3566    for raw in patterns {
3567        let line = sparse_clean_line(raw);
3568        if line.is_empty() || line.starts_with(b"#") {
3569            continue;
3570        }
3571        saw_pattern = true;
3572        let body = line.strip_prefix(b"!").unwrap_or(line);
3573        let is_cone_shaped = body == b"/*"
3574            || body == b"/*/"
3575            || (body.starts_with(b"/")
3576                && (body.ends_with(b"/") || body.ends_with(b"/*"))
3577                && !sparse_has_glob_meta(body));
3578        if !is_cone_shaped {
3579            return false;
3580        }
3581    }
3582    saw_pattern
3583}
3584
3585/// Detects glob metacharacters that disqualify a line from cone interpretation.
3586/// A single trailing `/*` is allowed by the caller and handled separately.
3587fn sparse_has_glob_meta(body: &[u8]) -> bool {
3588    let trimmed = body.strip_suffix(b"/*").unwrap_or(body);
3589    trimmed
3590        .iter()
3591        .any(|byte| matches!(byte, b'*' | b'?' | b'[' | b']' | b'\\'))
3592}
3593
3594fn read_core_excludes_file(root: &Path, patterns: &mut Vec<IgnorePattern>) -> bool {
3595    let Ok(config) = sley_config::read_repo_config(&root.join(".git"), None) else {
3596        return false;
3597    };
3598    let Some(value) = config.get("core", None, "excludesFile") else {
3599        return false;
3600    };
3601    let path = expand_core_excludes_file(root, value);
3602    read_ignore_patterns(path, patterns, &[], value.as_bytes());
3603    true
3604}
3605
3606fn expand_core_excludes_file(root: &Path, value: &str) -> PathBuf {
3607    let path = Path::new(value);
3608    if path.is_absolute() {
3609        return path.to_path_buf();
3610    }
3611    if let Some(rest) = value.strip_prefix("~/")
3612        && let Some(home) = std::env::var_os("HOME")
3613    {
3614        return PathBuf::from(home).join(rest);
3615    }
3616    root.join(path)
3617}
3618
3619fn read_default_global_excludes_file(patterns: &mut Vec<IgnorePattern>) {
3620    if let Some(config_home) = std::env::var_os("XDG_CONFIG_HOME")
3621        && !config_home.is_empty()
3622    {
3623        let path = PathBuf::from(config_home).join("git").join("ignore");
3624        let source = path.to_string_lossy().into_owned();
3625        read_ignore_patterns(path, patterns, &[], source.as_bytes());
3626        return;
3627    }
3628    if let Some(home) = std::env::var_os("HOME") {
3629        let path = PathBuf::from(home)
3630            .join(".config")
3631            .join("git")
3632            .join("ignore");
3633        let source = path.to_string_lossy().into_owned();
3634        read_ignore_patterns(path, patterns, &[], source.as_bytes());
3635    }
3636}
3637
3638fn collect_per_directory_patterns(
3639    root: &Path,
3640    dir: &Path,
3641    names: &[String],
3642    patterns: &mut Vec<IgnorePattern>,
3643) -> Result<()> {
3644    let mut entries = fs::read_dir(dir)?.collect::<std::result::Result<Vec<_>, _>>()?;
3645    entries.sort_by_key(|entry| entry.file_name());
3646    for entry in entries {
3647        let path = entry.path();
3648        if path.file_name().and_then(|name| name.to_str()) == Some(".git") {
3649            continue;
3650        }
3651        let metadata = entry.metadata()?;
3652        if metadata.is_dir() {
3653            collect_per_directory_patterns(root, &path, names, patterns)?;
3654            continue;
3655        }
3656        if !metadata.is_file() {
3657            continue;
3658        }
3659        let Some(file_name) = path.file_name().and_then(|name| name.to_str()) else {
3660            continue;
3661        };
3662        if !names.iter().any(|name| name == file_name) {
3663            continue;
3664        }
3665        let parent = path.parent().unwrap_or(root);
3666        let relative = parent.strip_prefix(root).map_err(|_| {
3667            GitError::InvalidPath(format!("path {} is outside worktree", parent.display()))
3668        })?;
3669        let base = git_path_bytes(relative)?;
3670        let mut source = base.clone();
3671        if !source.is_empty() {
3672            source.push(b'/');
3673        }
3674        source.extend_from_slice(file_name.as_bytes());
3675        read_ignore_patterns(&path, patterns, &base, &source);
3676    }
3677    Ok(())
3678}
3679
3680fn read_ignore_patterns(
3681    path: impl AsRef<Path>,
3682    patterns: &mut Vec<IgnorePattern>,
3683    base: &[u8],
3684    source: &[u8],
3685) {
3686    let Ok(contents) = fs::read(path) else {
3687        return;
3688    };
3689    for (line, raw) in contents.split(|byte| *byte == b'\n').enumerate() {
3690        push_ignore_pattern(patterns, raw, base, source, line + 1);
3691    }
3692}
3693
3694fn push_ignore_pattern(
3695    patterns: &mut Vec<IgnorePattern>,
3696    raw: &[u8],
3697    base: &[u8],
3698    source: &[u8],
3699    line_number: usize,
3700) {
3701    let mut line = raw.strip_suffix(b"\r").unwrap_or(raw).to_vec();
3702    normalize_ignore_trailing_spaces(&mut line);
3703    let original = line.clone();
3704    let mut line = line.as_slice();
3705    if line.is_empty() || line.starts_with(b"#") {
3706        return;
3707    }
3708    let negated = if line.starts_with(b"\\#") || line.starts_with(b"\\!") {
3709        line = &line[1..];
3710        false
3711    } else if let Some(pattern) = line.strip_prefix(b"!") {
3712        line = pattern;
3713        true
3714    } else {
3715        false
3716    };
3717    let directory_only = line.ends_with(b"/");
3718    let pattern = if directory_only {
3719        line.strip_suffix(b"/").unwrap_or(line)
3720    } else {
3721        line
3722    };
3723    let (anchored, pattern) = if let Some(pattern) = pattern.strip_prefix(b"/") {
3724        (true, pattern)
3725    } else {
3726        (false, pattern)
3727    };
3728    // A leading `**/` followed by a slash-free segment is, per gitignore,
3729    // identical to the bare segment ("match in all directories"): `**/Pods` ≡
3730    // `Pods`, `**/*.jks` ≡ `*.jks`. Collapse it so the pattern matches the
3731    // basename directly (a literal/suffix compare) instead of paying for the
3732    // `**` wildcard engine on the full path — verified against `git check-ignore`.
3733    let pattern = match pattern.strip_prefix(b"**/") {
3734        Some(rest) if !rest.is_empty() && !rest.contains(&b'/') => rest,
3735        _ => pattern,
3736    };
3737    if pattern.is_empty() {
3738        return;
3739    }
3740    patterns.push(IgnorePattern {
3741        base: base.to_vec(),
3742        pattern: pattern.to_vec(),
3743        original,
3744        source: source.to_vec(),
3745        line_number,
3746        negated,
3747        directory_only,
3748        anchored,
3749        has_slash: pattern.contains(&b'/'),
3750        match_kind: classify_ignore_pattern(pattern),
3751    });
3752}
3753
3754fn normalize_ignore_trailing_spaces(line: &mut Vec<u8>) {
3755    while line.last() == Some(&b' ') {
3756        let space_index = line.len() - 1;
3757        let backslashes = line[..space_index]
3758            .iter()
3759            .rev()
3760            .take_while(|byte| **byte == b'\\')
3761            .count();
3762        if backslashes % 2 == 1 {
3763            line.remove(space_index - 1);
3764            break;
3765        }
3766        line.pop();
3767    }
3768}
3769
3770impl IgnorePattern {
3771    fn to_match(&self) -> IgnoreMatch {
3772        IgnoreMatch {
3773            source: self.source.clone(),
3774            line_number: self.line_number,
3775            pattern: self.original.clone(),
3776            ignored: !self.negated,
3777        }
3778    }
3779
3780    fn matches(&self, path: &[u8], is_dir: bool) -> bool {
3781        let path = if self.base.is_empty() {
3782            path
3783        } else {
3784            let Some(rest) = path
3785                .strip_prefix(self.base.as_slice())
3786                .and_then(|rest| rest.strip_prefix(b"/"))
3787            else {
3788                return false;
3789            };
3790            rest
3791        };
3792        if self.directory_only {
3793            return self.matches_directory(path, is_dir);
3794        }
3795        if self.anchored || self.has_slash {
3796            return self.match_segment(path);
3797        }
3798        path.rsplit(|byte| *byte == b'/')
3799            .next()
3800            .is_some_and(|basename| self.match_segment(basename))
3801    }
3802
3803    fn matches_directory(&self, path: &[u8], is_dir: bool) -> bool {
3804        if self.anchored || self.has_slash {
3805            return path == self.pattern
3806                || path
3807                    .strip_prefix(self.pattern.as_slice())
3808                    .and_then(|rest| rest.strip_prefix(b"/"))
3809                    .is_some();
3810        }
3811        path.split(|byte| *byte == b'/')
3812            .enumerate()
3813            .any(|(index, component)| {
3814                self.match_segment(component)
3815                    && (is_dir || index + 1 < path.split(|byte| *byte == b'/').count())
3816            })
3817    }
3818
3819    /// Match a slash-free `value` (a basename or path component) against this
3820    /// pattern. Literal and simple `*X`/`X*` patterns resolve with a direct
3821    /// comparison; only complex globs pay for the allocating wildcard engine.
3822    fn match_segment(&self, value: &[u8]) -> bool {
3823        match self.match_kind {
3824            MatchKind::Literal => self.pattern == value,
3825            // `*X` ≡ ends_with(X) and `X*` ≡ starts_with(X), but only on a
3826            // slash-free segment: `*` never crosses `/`, so an anchored `/*.log`
3827            // applied to a multi-segment path must not match (the slash guard
3828            // rejects it). Basename/component call sites are slash-free already.
3829            MatchKind::Suffix => !value.contains(&b'/') && value.ends_with(&self.pattern[1..]),
3830            MatchKind::Prefix => {
3831                !value.contains(&b'/') && value.starts_with(&self.pattern[..self.pattern.len() - 1])
3832            }
3833            MatchKind::Glob => wildcard_path_matches(&self.pattern, value),
3834        }
3835    }
3836}
3837
3838thread_local! {
3839    /// Reused dynamic-programming scratch for [`wildcard_path_matches`]. Flat
3840    /// `(pattern.len()+1) * (value.len()+1)` grid of memoised results, kept across
3841    /// calls so the hot ignore/attribute matching loop never reallocates.
3842    static WILDCARD_MEMO: RefCell<Vec<Option<bool>>> = const { RefCell::new(Vec::new()) };
3843}
3844
3845fn wildcard_path_matches(pattern: &[u8], value: &[u8]) -> bool {
3846    let stride = value.len() + 1;
3847    let cells = (pattern.len() + 1) * stride;
3848    WILDCARD_MEMO.with_borrow_mut(|memo| {
3849        // One reused allocation; clearing then resizing fills the grid with `None`.
3850        memo.clear();
3851        memo.resize(cells, None);
3852        wildcard_path_matches_from(pattern, value, 0, 0, memo, stride)
3853    })
3854}
3855
3856fn wildcard_path_matches_from(
3857    pattern: &[u8],
3858    value: &[u8],
3859    pattern_index: usize,
3860    value_index: usize,
3861    memo: &mut [Option<bool>],
3862    stride: usize,
3863) -> bool {
3864    let cell = pattern_index * stride + value_index;
3865    if let Some(cached) = memo[cell] {
3866        return cached;
3867    }
3868    let matched = if pattern_index == pattern.len() {
3869        value_index == value.len()
3870    } else {
3871        match pattern[pattern_index] {
3872            b'*' if pattern.get(pattern_index + 1) == Some(&b'*') => wildcard_double_star_matches(
3873                pattern,
3874                value,
3875                pattern_index,
3876                value_index,
3877                memo,
3878                stride,
3879            ),
3880            b'*' => {
3881                if wildcard_path_matches_from(
3882                    pattern,
3883                    value,
3884                    pattern_index + 1,
3885                    value_index,
3886                    memo,
3887                    stride,
3888                ) {
3889                    true
3890                } else {
3891                    let mut next = value_index;
3892                    while next < value.len() && value[next] != b'/' {
3893                        next += 1;
3894                        if wildcard_path_matches_from(
3895                            pattern,
3896                            value,
3897                            pattern_index + 1,
3898                            next,
3899                            memo,
3900                            stride,
3901                        ) {
3902                            return true;
3903                        }
3904                    }
3905                    false
3906                }
3907            }
3908            b'?' => {
3909                value_index < value.len()
3910                    && value[value_index] != b'/'
3911                    && wildcard_path_matches_from(
3912                        pattern,
3913                        value,
3914                        pattern_index + 1,
3915                        value_index + 1,
3916                        memo,
3917                        stride,
3918                    )
3919            }
3920            b'[' => {
3921                if value_index < value.len() && value[value_index] != b'/' {
3922                    if let Some((class_matches, next_pattern_index)) =
3923                        wildcard_class_matches(pattern, pattern_index, value[value_index])
3924                    {
3925                        class_matches
3926                            && wildcard_path_matches_from(
3927                                pattern,
3928                                value,
3929                                next_pattern_index,
3930                                value_index + 1,
3931                                memo,
3932                                stride,
3933                            )
3934                    } else {
3935                        value[value_index] == b'['
3936                            && wildcard_path_matches_from(
3937                                pattern,
3938                                value,
3939                                pattern_index + 1,
3940                                value_index + 1,
3941                                memo,
3942                                stride,
3943                            )
3944                    }
3945                } else {
3946                    false
3947                }
3948            }
3949            b'\\' if pattern_index + 1 < pattern.len() => {
3950                value_index < value.len()
3951                    && pattern[pattern_index + 1] == value[value_index]
3952                    && wildcard_path_matches_from(
3953                        pattern,
3954                        value,
3955                        pattern_index + 2,
3956                        value_index + 1,
3957                        memo,
3958                        stride,
3959                    )
3960            }
3961            literal => {
3962                value_index < value.len()
3963                    && literal == value[value_index]
3964                    && wildcard_path_matches_from(
3965                        pattern,
3966                        value,
3967                        pattern_index + 1,
3968                        value_index + 1,
3969                        memo,
3970                        stride,
3971                    )
3972            }
3973        }
3974    };
3975    memo[cell] = Some(matched);
3976    matched
3977}
3978
3979fn wildcard_double_star_matches(
3980    pattern: &[u8],
3981    value: &[u8],
3982    pattern_index: usize,
3983    value_index: usize,
3984    memo: &mut [Option<bool>],
3985    stride: usize,
3986) -> bool {
3987    let after_stars = pattern_index + 2;
3988    if pattern.get(after_stars) == Some(&b'/') {
3989        if wildcard_path_matches_from(pattern, value, after_stars + 1, value_index, memo, stride) {
3990            return true;
3991        }
3992        for next in value_index..value.len() {
3993            if value[next] == b'/'
3994                && wildcard_path_matches_from(
3995                    pattern,
3996                    value,
3997                    after_stars + 1,
3998                    next + 1,
3999                    memo,
4000                    stride,
4001                )
4002            {
4003                return true;
4004            }
4005        }
4006        return false;
4007    }
4008    for next in value_index..=value.len() {
4009        if wildcard_path_matches_from(pattern, value, after_stars, next, memo, stride) {
4010            return true;
4011        }
4012    }
4013    false
4014}
4015
4016fn wildcard_class_matches(pattern: &[u8], start: usize, value: u8) -> Option<(bool, usize)> {
4017    let mut index = start + 1;
4018    let negated = matches!(pattern.get(index), Some(b'!' | b'^'));
4019    if negated {
4020        index += 1;
4021    }
4022    let class_start = index;
4023    let end = pattern[class_start..]
4024        .iter()
4025        .position(|byte| *byte == b']')
4026        .map(|position| class_start + position)?;
4027    if end == class_start {
4028        return None;
4029    }
4030    let mut matched = false;
4031    while index < end {
4032        if index + 2 < end && pattern[index + 1] == b'-' {
4033            let lower = pattern[index].min(pattern[index + 2]);
4034            let upper = pattern[index].max(pattern[index + 2]);
4035            matched |= lower <= value && value <= upper;
4036            index += 3;
4037        } else {
4038            matched |= pattern[index] == value;
4039            index += 1;
4040        }
4041    }
4042    Some((if negated { !matched } else { matched }, end + 1))
4043}
4044
4045#[derive(Debug, Default)]
4046struct AttributeMatcher {
4047    patterns: Vec<AttributePattern>,
4048    attribute_order: BTreeMap<Vec<u8>, usize>,
4049    macros: BTreeMap<Vec<u8>, Vec<AttributeAssignment>>,
4050}
4051
4052#[derive(Debug)]
4053struct AttributePattern {
4054    base: Vec<u8>,
4055    pattern: Vec<u8>,
4056    anchored: bool,
4057    has_slash: bool,
4058    assignments: Vec<AttributeAssignment>,
4059}
4060
4061#[derive(Debug, Clone, PartialEq, Eq)]
4062struct AttributeAssignment {
4063    attribute: Vec<u8>,
4064    state: Option<AttributeState>,
4065}
4066
4067impl AttributeMatcher {
4068    fn from_worktree_root(root: &Path) -> Result<Self> {
4069        let mut matcher = Self::default();
4070        if !matcher.read_configured_attributes(root) {
4071            matcher.read_default_global_attributes();
4072        }
4073        collect_attribute_patterns(root, root, &mut matcher)?;
4074        read_attribute_patterns(
4075            root.join(".git").join("info").join("attributes"),
4076            &mut matcher,
4077            &[],
4078            b".git/info/attributes",
4079        );
4080        Ok(matcher)
4081    }
4082
4083    /// Builds only the repository-wide attribute sources — `core.attributesFile`
4084    /// (or the default global) and `$GIT_DIR/info/attributes` — *without* walking
4085    /// the worktree for `.gitattributes`. The caller is expected to fold each
4086    /// directory's `.gitattributes` into the matcher as it descends (see
4087    /// [`read_dir_attribute_patterns`]), so status/diff read the tree exactly once
4088    /// instead of doing a separate full-tree attribute pass. Lower-priority sources
4089    /// are added first, so in-tree patterns added during the walk take precedence —
4090    /// matching git's lookup order.
4091    fn from_worktree_base(root: &Path) -> Self {
4092        let mut matcher = Self::default();
4093        if !matcher.read_configured_attributes(root) {
4094            matcher.read_default_global_attributes();
4095        }
4096        read_attribute_patterns(
4097            root.join(".git").join("info").join("attributes"),
4098            &mut matcher,
4099            &[],
4100            b".git/info/attributes",
4101        );
4102        matcher
4103    }
4104
4105    fn attributes_for_path(
4106        &self,
4107        path: &[u8],
4108        requested: &[Vec<u8>],
4109        all: bool,
4110    ) -> Vec<AttributeCheck> {
4111        let mut states = BTreeMap::<Vec<u8>, Option<AttributeState>>::new();
4112        for pattern in &self.patterns {
4113            if !pattern.matches(path) {
4114                continue;
4115            }
4116            for assignment in &pattern.assignments {
4117                states.insert(assignment.attribute.clone(), assignment.state.clone());
4118            }
4119        }
4120        if all {
4121            let mut checks = states
4122                .into_iter()
4123                .filter_map(|(attribute, state)| {
4124                    state.map(|state| AttributeCheck {
4125                        attribute,
4126                        state: Some(state),
4127                    })
4128                })
4129                .collect::<Vec<_>>();
4130            checks.sort_by(|left, right| {
4131                attribute_all_rank(&left.attribute, &self.attribute_order)
4132                    .cmp(&attribute_all_rank(&right.attribute, &self.attribute_order))
4133                    .then_with(|| left.attribute.cmp(&right.attribute))
4134            });
4135            return checks;
4136        }
4137        requested
4138            .iter()
4139            .map(|attribute| AttributeCheck {
4140                attribute: attribute.clone(),
4141                state: states.get(attribute).cloned().flatten(),
4142            })
4143            .collect()
4144    }
4145
4146    fn push_attribute_order(&mut self, attribute: &[u8]) {
4147        let next = self.attribute_order.len();
4148        self.attribute_order
4149            .entry(attribute.to_vec())
4150            .or_insert(next);
4151    }
4152
4153    fn read_configured_attributes(&mut self, root: &Path) -> bool {
4154        let Ok(config) = sley_config::read_repo_config(&root.join(".git"), None) else {
4155            return false;
4156        };
4157        let Some(value) = config.get("core", None, "attributesFile") else {
4158            return false;
4159        };
4160        let path = expand_core_excludes_file(root, value);
4161        read_attribute_patterns(path, self, &[], value.as_bytes());
4162        true
4163    }
4164
4165    fn read_default_global_attributes(&mut self) {
4166        if let Some(config_home) = std::env::var_os("XDG_CONFIG_HOME")
4167            && !config_home.is_empty()
4168        {
4169            let path = PathBuf::from(config_home).join("git").join("attributes");
4170            let source = path.to_string_lossy().into_owned();
4171            read_attribute_patterns(path, self, &[], source.as_bytes());
4172            return;
4173        }
4174        if let Some(home) = std::env::var_os("HOME") {
4175            let path = PathBuf::from(home)
4176                .join(".config")
4177                .join("git")
4178                .join("attributes");
4179            let source = path.to_string_lossy().into_owned();
4180            read_attribute_patterns(path, self, &[], source.as_bytes());
4181        }
4182    }
4183}
4184
4185fn read_dir_ignore_patterns_for_base(
4186    dir: &Path,
4187    base: &[u8],
4188    matcher: &mut IgnoreMatcher,
4189) -> Result<()> {
4190    let mut source = base.to_vec();
4191    if !source.is_empty() {
4192        source.push(b'/');
4193    }
4194    source.extend_from_slice(b".gitignore");
4195    read_ignore_patterns(dir.join(".gitignore"), &mut matcher.patterns, base, &source);
4196    Ok(())
4197}
4198
4199/// Fold `dir`'s `.gitattributes` (if any) into `matcher`, scoped to `dir`'s path
4200/// within `root`. Used both by the eager full-tree pass and by the status/diff
4201/// worktree walk as it descends, so the tree is read for attributes exactly once.
4202fn read_dir_attribute_patterns(
4203    root: &Path,
4204    dir: &Path,
4205    matcher: &mut AttributeMatcher,
4206) -> Result<()> {
4207    let relative = dir.strip_prefix(root).map_err(|_| {
4208        GitError::InvalidPath(format!("path {} is outside worktree", dir.display()))
4209    })?;
4210    let base = git_path_bytes(relative)?;
4211    read_dir_attribute_patterns_for_base(dir, &base, matcher)
4212}
4213
4214fn read_dir_attribute_patterns_for_base(
4215    dir: &Path,
4216    base: &[u8],
4217    matcher: &mut AttributeMatcher,
4218) -> Result<()> {
4219    let mut source = base.to_vec();
4220    if !source.is_empty() {
4221        source.push(b'/');
4222    }
4223    source.extend_from_slice(b".gitattributes");
4224    read_attribute_patterns(dir.join(".gitattributes"), matcher, base, &source);
4225    Ok(())
4226}
4227
4228fn collect_attribute_patterns(
4229    root: &Path,
4230    dir: &Path,
4231    matcher: &mut AttributeMatcher,
4232) -> Result<()> {
4233    read_dir_attribute_patterns(root, dir, matcher)?;
4234
4235    let mut entries = fs::read_dir(dir)?.collect::<std::result::Result<Vec<_>, _>>()?;
4236    entries.sort_by_key(|entry| entry.file_name());
4237    for entry in entries {
4238        let path = entry.path();
4239        if path.file_name().and_then(|name| name.to_str()) == Some(".git") {
4240            continue;
4241        }
4242        if entry.metadata()?.is_dir() {
4243            collect_attribute_patterns(root, &path, matcher)?;
4244        }
4245    }
4246    Ok(())
4247}
4248
4249fn read_attribute_patterns(
4250    path: impl AsRef<Path>,
4251    matcher: &mut AttributeMatcher,
4252    base: &[u8],
4253    _source: &[u8],
4254) {
4255    let Ok(contents) = fs::read(path) else {
4256        return;
4257    };
4258    read_attribute_patterns_from_bytes(&contents, matcher, base);
4259}
4260
4261fn read_attribute_patterns_from_bytes(
4262    contents: &[u8],
4263    matcher: &mut AttributeMatcher,
4264    base: &[u8],
4265) {
4266    for raw in contents.split(|byte| *byte == b'\n') {
4267        push_attribute_pattern(matcher, raw, base);
4268    }
4269}
4270
4271fn collect_attribute_patterns_from_tree(
4272    db: &FileObjectDatabase,
4273    format: ObjectFormat,
4274    tree_oid: &ObjectId,
4275    base: Vec<u8>,
4276    matcher: &mut AttributeMatcher,
4277) -> Result<()> {
4278    let object = read_expected_object(db, tree_oid, ObjectType::Tree)?;
4279    let mut entries = Tree::parse(format, &object.body)?.entries;
4280    entries.sort_by(|left, right| left.name.cmp(&right.name));
4281    for entry in &entries {
4282        if entry.name == b".gitattributes" && tree_entry_object_type(entry.mode) == ObjectType::Blob
4283        {
4284            let object = db.read_object(&entry.oid).map_err(|err| {
4285                expect_missing_object_kind(err, entry.oid, MissingObjectKind::Blob)
4286            })?;
4287            if object.object_type == ObjectType::Blob {
4288                read_attribute_patterns_from_bytes(&object.body, matcher, &base);
4289            }
4290        }
4291    }
4292    for entry in entries {
4293        if tree_entry_object_type(entry.mode) != ObjectType::Tree {
4294            continue;
4295        }
4296        let mut child_base = base.clone();
4297        if !child_base.is_empty() {
4298            child_base.push(b'/');
4299        }
4300        child_base.extend_from_slice(entry.name.as_bytes());
4301        collect_attribute_patterns_from_tree(db, format, &entry.oid, child_base, matcher)?;
4302    }
4303    Ok(())
4304}
4305
4306fn collect_attribute_patterns_from_index(
4307    git_dir: &Path,
4308    format: ObjectFormat,
4309    db: &FileObjectDatabase,
4310    matcher: &mut AttributeMatcher,
4311) -> Result<()> {
4312    let index_path = repository_index_path(git_dir);
4313    if !index_path.exists() {
4314        return Ok(());
4315    }
4316    let mut entries = Index::parse(&fs::read(index_path)?, format)?.entries;
4317    entries.sort_by(|left, right| left.path.cmp(&right.path));
4318    for entry in entries {
4319        let is_attributes_file =
4320            entry.path == b".gitattributes" || entry.path.as_bytes().ends_with(b"/.gitattributes");
4321        if index_entry_stage(&entry) != 0
4322            || tree_entry_object_type(entry.mode) != ObjectType::Blob
4323            || !is_attributes_file
4324        {
4325            continue;
4326        }
4327        let base = match entry.path.as_bytes().strip_suffix(b".gitattributes") {
4328            Some(b"") => Vec::new(),
4329            Some(parent) => parent.strip_suffix(b"/").unwrap_or(parent).to_vec(),
4330            None => continue,
4331        };
4332        let object = db
4333            .read_object(&entry.oid)
4334            .map_err(|err| expect_missing_object_kind(err, entry.oid, MissingObjectKind::Blob))?;
4335        if object.object_type == ObjectType::Blob {
4336            read_attribute_patterns_from_bytes(&object.body, matcher, &base);
4337        }
4338    }
4339    Ok(())
4340}
4341
4342fn push_attribute_pattern(matcher: &mut AttributeMatcher, raw: &[u8], base: &[u8]) {
4343    let line = raw.strip_suffix(b"\r").unwrap_or(raw);
4344    let line = trim_ascii_whitespace(line);
4345    if line.is_empty() || line.starts_with(b"#") {
4346        return;
4347    }
4348    let mut fields = line
4349        .split(|byte| byte.is_ascii_whitespace())
4350        .filter(|field| !field.is_empty());
4351    let Some(raw_pattern) = fields.next() else {
4352        return;
4353    };
4354    if let Some(macro_name) = raw_pattern.strip_prefix(b"[attr]") {
4355        if macro_name.is_empty() {
4356            return;
4357        }
4358        let mut assignments = vec![AttributeAssignment {
4359            attribute: macro_name.to_vec(),
4360            state: Some(AttributeState::Set),
4361        }];
4362        for field in fields {
4363            push_attribute_assignments(&mut assignments, field, &matcher.macros);
4364        }
4365        for assignment in &assignments {
4366            matcher.push_attribute_order(&assignment.attribute);
4367        }
4368        matcher.macros.insert(macro_name.to_vec(), assignments);
4369        return;
4370    }
4371    let mut assignments = Vec::new();
4372    for field in fields {
4373        push_attribute_assignments(&mut assignments, field, &matcher.macros);
4374    }
4375    if assignments.is_empty() {
4376        return;
4377    }
4378    for assignment in &assignments {
4379        matcher.push_attribute_order(&assignment.attribute);
4380    }
4381    let (anchored, pattern) = if let Some(pattern) = raw_pattern.strip_prefix(b"/") {
4382        (true, pattern)
4383    } else {
4384        (false, raw_pattern)
4385    };
4386    if pattern.is_empty() {
4387        return;
4388    }
4389    matcher.patterns.push(AttributePattern {
4390        base: base.to_vec(),
4391        pattern: pattern.to_vec(),
4392        anchored,
4393        has_slash: pattern.contains(&b'/'),
4394        assignments,
4395    });
4396}
4397
4398fn push_attribute_assignments(
4399    assignments: &mut Vec<AttributeAssignment>,
4400    field: &[u8],
4401    macros: &BTreeMap<Vec<u8>, Vec<AttributeAssignment>>,
4402) {
4403    if let Some(macro_assignments) = macros.get(field) {
4404        assignments.extend(macro_assignments.iter().cloned());
4405        return;
4406    }
4407    if field == b"binary" {
4408        assignments.push(AttributeAssignment {
4409            attribute: b"binary".to_vec(),
4410            state: Some(AttributeState::Set),
4411        });
4412        assignments.push(AttributeAssignment {
4413            attribute: b"diff".to_vec(),
4414            state: Some(AttributeState::Unset),
4415        });
4416        assignments.push(AttributeAssignment {
4417            attribute: b"merge".to_vec(),
4418            state: Some(AttributeState::Unset),
4419        });
4420        assignments.push(AttributeAssignment {
4421            attribute: b"text".to_vec(),
4422            state: Some(AttributeState::Unset),
4423        });
4424        return;
4425    }
4426    if let Some(attribute) = field.strip_prefix(b"-") {
4427        if !attribute.is_empty() {
4428            assignments.push(AttributeAssignment {
4429                attribute: attribute.to_vec(),
4430                state: Some(AttributeState::Unset),
4431            });
4432        }
4433        return;
4434    }
4435    if let Some(attribute) = field.strip_prefix(b"!") {
4436        if !attribute.is_empty() {
4437            assignments.push(AttributeAssignment {
4438                attribute: attribute.to_vec(),
4439                state: None,
4440            });
4441        }
4442        return;
4443    }
4444    if let Some(equal) = field.iter().position(|byte| *byte == b'=') {
4445        let attribute = &field[..equal];
4446        let value = &field[equal + 1..];
4447        if !attribute.is_empty() {
4448            assignments.push(AttributeAssignment {
4449                attribute: attribute.to_vec(),
4450                state: Some(AttributeState::Value(value.to_vec())),
4451            });
4452        }
4453        return;
4454    }
4455    assignments.push(AttributeAssignment {
4456        attribute: field.to_vec(),
4457        state: Some(AttributeState::Set),
4458    });
4459}
4460
4461fn attribute_all_rank(
4462    attribute: &[u8],
4463    order: &BTreeMap<Vec<u8>, usize>,
4464) -> (usize, usize, Vec<u8>) {
4465    let rank = match attribute {
4466        b"binary" => 0,
4467        b"diff" => 1,
4468        b"merge" => 2,
4469        b"text" => 3,
4470        b"eol" => 5,
4471        _ => 4,
4472    };
4473    let order = order.get(attribute).copied().unwrap_or(usize::MAX);
4474    (rank, order, attribute.to_vec())
4475}
4476
4477fn trim_ascii_whitespace(mut value: &[u8]) -> &[u8] {
4478    while value.first().is_some_and(u8::is_ascii_whitespace) {
4479        value = &value[1..];
4480    }
4481    while value.last().is_some_and(u8::is_ascii_whitespace) {
4482        value = &value[..value.len() - 1];
4483    }
4484    value
4485}
4486
4487impl AttributePattern {
4488    fn matches(&self, path: &[u8]) -> bool {
4489        let path = if self.base.is_empty() {
4490            path
4491        } else {
4492            let Some(rest) = path
4493                .strip_prefix(self.base.as_slice())
4494                .and_then(|rest| rest.strip_prefix(b"/"))
4495            else {
4496                return false;
4497            };
4498            rest
4499        };
4500        if self.anchored || self.has_slash {
4501            return wildcard_path_matches(&self.pattern, path);
4502        }
4503        path.rsplit(|byte| *byte == b'/')
4504            .next()
4505            .is_some_and(|basename| wildcard_path_matches(&self.pattern, basename))
4506    }
4507}
4508
4509// ---------------------------------------------------------------------------
4510// Content filtering on the blob <-> worktree boundary
4511//
4512// Git runs two kinds of conversion when content crosses between the worktree
4513// and the object database:
4514//
4515//   * the line-ending / `core.autocrlf` conversion (driven by the `text`,
4516//     `eol` attributes and the `core.autocrlf` / `core.eol` config), and
4517//   * the long-running `filter.<name>.clean` / `.smudge` driver filters
4518//     (selected by the `filter=<name>` attribute and configured commands).
4519//
4520// "clean" runs on the way *into* the object store (worktree -> blob), e.g. on
4521// `git add` / `git hash-object -w`. "smudge" runs on the way *out* (blob ->
4522// worktree), e.g. on checkout / restore. The driver filter, when present,
4523// wraps the EOL conversion: on clean git first runs the configured `clean`
4524// command and then applies CRLF->LF normalization; on smudge git first applies
4525// LF->CRLF and then runs the `smudge` command.
4526// ---------------------------------------------------------------------------
4527
4528/// The line-ending conversion that applies to a path, derived from its
4529/// attributes and the repository config.
4530#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4531enum EolConversion {
4532    /// No conversion: binary content, or text with `core.autocrlf=false` and no
4533    /// `eol`/`text=auto` request to add carriage returns.
4534    None,
4535    /// Normalize to LF on clean; no carriage returns on smudge (`eol=lf`, or
4536    /// `core.autocrlf=input`).
4537    Lf,
4538    /// Normalize to LF on clean; emit CRLF on smudge (`eol=crlf`, or
4539    /// `core.autocrlf=true`).
4540    Crlf,
4541}
4542
4543/// How git should decide whether a path is text for the purpose of EOL
4544/// conversion.
4545#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4546enum TextDecision {
4547    /// `-text` / `binary`: never convert.
4548    Binary,
4549    /// `text` is set explicitly: always treat as text.
4550    Text,
4551    /// `text=auto` (or implied by `core.autocrlf`): treat as text unless the
4552    /// content looks binary.
4553    Auto,
4554    /// No opinion from attributes or config: leave content untouched.
4555    Unspecified,
4556}
4557
4558/// The fully resolved set of conversions that apply to a single path.
4559#[derive(Debug, Clone, PartialEq, Eq)]
4560struct ContentFilterPlan {
4561    text: TextDecision,
4562    /// The conversion to apply when `text` resolves to "this is text".
4563    eol: EolConversion,
4564    /// `filter.<name>` driver, if assigned via attributes and configured.
4565    driver: Option<FilterDriver>,
4566}
4567
4568#[derive(Debug, Clone, PartialEq, Eq)]
4569struct FilterDriver {
4570    name: Vec<u8>,
4571    clean: Option<String>,
4572    smudge: Option<String>,
4573    required: bool,
4574}
4575
4576/// Decode one crlf-family attribute (`text` or its legacy alias `crlf`) into a
4577/// text decision, plus whether the value form forced an EOL direction.
4578///
4579/// Mirrors git's `git_path_check_crlf` (convert.c): a *set* attribute is text,
4580/// an *unset* one is binary, `=auto` is auto, `=input` forces LF while still
4581/// counting as text, and any other value is "undefined" — i.e. no opinion, so
4582/// the caller falls through to the next source (the `crlf` alias, then config).
4583fn decode_crlf_family_attribute(state: Option<&AttributeState>) -> (TextDecision, EolConversion) {
4584    match state {
4585        Some(AttributeState::Set) => (TextDecision::Text, EolConversion::None),
4586        Some(AttributeState::Unset) => (TextDecision::Binary, EolConversion::None),
4587        Some(AttributeState::Value(value)) if value == b"auto" => {
4588            (TextDecision::Auto, EolConversion::None)
4589        }
4590        // `crlf=input` / `text=input`: text content normalized to LF (no CR on
4591        // smudge), exactly like `core.autocrlf=input`.
4592        Some(AttributeState::Value(value)) if value == b"input" => {
4593            (TextDecision::Text, EolConversion::Lf)
4594        }
4595        // `=<other>` is CRLF_UNDEFINED in git for the `crlf` alias: no opinion.
4596        _ => (TextDecision::Unspecified, EolConversion::None),
4597    }
4598}
4599
4600impl ContentFilterPlan {
4601    /// Build the plan for `path` from the parsed attributes and repo config.
4602    fn resolve(config: &GitConfig, checks: &[AttributeCheck]) -> Self {
4603        let text_attr = checks.iter().find(|check| check.attribute == b"text");
4604        let crlf_attr = checks.iter().find(|check| check.attribute == b"crlf");
4605        let eol_attr = checks.iter().find(|check| check.attribute == b"eol");
4606        let filter_attr = checks.iter().find(|check| check.attribute == b"filter");
4607
4608        // Resolve the eol attribute first; `eol=crlf|lf` also forces text.
4609        let eol_value = eol_attr.and_then(|check| match &check.state {
4610            Some(AttributeState::Value(value)) => Some(value.clone()),
4611            _ => None,
4612        });
4613
4614        // The `text` attribute decides first; only when it is unspecified does
4615        // git consult the legacy `crlf` alias (convert.c `convert_attrs`).
4616        let mut forced_eol = EolConversion::None;
4617        let mut text = match text_attr.map(|check| &check.state) {
4618            Some(Some(AttributeState::Set)) => TextDecision::Text,
4619            Some(Some(AttributeState::Unset)) => TextDecision::Binary,
4620            Some(Some(AttributeState::Value(value))) if value == b"auto" => TextDecision::Auto,
4621            Some(Some(AttributeState::Value(value))) if value == b"input" => {
4622                forced_eol = EolConversion::Lf;
4623                TextDecision::Text
4624            }
4625            // `text=<other>` is treated by git as a set text attribute.
4626            Some(Some(AttributeState::Value(_))) => TextDecision::Text,
4627            // `!text` (unspecified) or no text attribute: fall through to `crlf`.
4628            _ => {
4629                let (decision, eol) =
4630                    decode_crlf_family_attribute(crlf_attr.and_then(|check| check.state.as_ref()));
4631                forced_eol = eol;
4632                decision
4633            }
4634        };
4635
4636        // A concrete `eol` attribute implies the path is text even when `text`
4637        // was left unspecified (git: `eol` without `text` is treated as
4638        // `text=auto`-ish; upstream forces conversion). We honour eol only when
4639        // text is not explicitly binary.
4640        let eol = match (&text, eol_value.as_deref()) {
4641            (TextDecision::Binary, _) => EolConversion::None,
4642            (_, Some(b"crlf")) => {
4643                if text == TextDecision::Unspecified {
4644                    text = TextDecision::Text;
4645                }
4646                EolConversion::Crlf
4647            }
4648            (_, Some(b"lf")) => {
4649                if text == TextDecision::Unspecified {
4650                    text = TextDecision::Text;
4651                }
4652                EolConversion::Lf
4653            }
4654            // No explicit `eol` attribute, but `text=input`/`crlf=input` already
4655            // forced the LF direction (git's CRLF_TEXT_INPUT). Honour it over the
4656            // config-derived default.
4657            _ if forced_eol == EolConversion::Lf => EolConversion::Lf,
4658            // No eol attribute: derive direction from config.
4659            _ => eol_from_config(config),
4660        };
4661
4662        // When the path is text but neither `eol` nor `core.autocrlf`/`core.eol`
4663        // asked for carriage returns, we still normalize to LF on clean. That is
4664        // modelled by `EolConversion::Lf` (clean strips CR, smudge adds none).
4665        let eol = match (&text, eol) {
4666            (TextDecision::Text | TextDecision::Auto, EolConversion::None) => EolConversion::Lf,
4667            (_, eol) => eol,
4668        };
4669
4670        // If config does not enable autocrlf and there is no eol/text opinion,
4671        // there is genuinely nothing to do.
4672        let text = match (text, eol_attr.is_some()) {
4673            (TextDecision::Unspecified, _) => {
4674                // Without any text/eol attribute, only `core.autocrlf` can make a
4675                // path eligible, and then it behaves like `text=auto`.
4676                if autocrlf_enabled(config) {
4677                    TextDecision::Auto
4678                } else {
4679                    TextDecision::Unspecified
4680                }
4681            }
4682            (text, _) => text,
4683        };
4684
4685        let driver = resolve_filter_driver(config, filter_attr);
4686
4687        ContentFilterPlan { text, eol, driver }
4688    }
4689
4690    /// Whether EOL conversion should run for the given content.
4691    fn convert_eol(&self, content: &[u8]) -> bool {
4692        match self.text {
4693            TextDecision::Binary | TextDecision::Unspecified => false,
4694            TextDecision::Text => self.eol != EolConversion::None,
4695            // `text=auto`: only when the blob does not look binary.
4696            TextDecision::Auto => self.eol != EolConversion::None && !looks_binary(content),
4697        }
4698    }
4699
4700    /// The smudge-side LF->CRLF safety check, mirroring convert.c
4701    /// `will_convert_lf_to_crlf`. Returns false (no conversion) when:
4702    ///   * there is no naked LF to convert, or
4703    ///   * the action is `text=auto`-derived (the "new safer autocrlf") AND the
4704    ///     content already contains a lone CR or a CRLF pair, or looks binary.
4705    ///
4706    /// An explicit `text`/`eol=crlf` (non-auto) path always converts naked LFs.
4707    fn will_convert_lf_to_crlf(&self, content: &[u8]) -> bool {
4708        let stats = gather_convert_stats(content);
4709        // No naked LF? Nothing to convert.
4710        if stats.lonelf == 0 {
4711            return false;
4712        }
4713        if self.text == TextDecision::Auto {
4714            // Any CR or CRLF already present: leave it untouched (irreversible).
4715            if stats.lonecr > 0 || stats.crlf > 0 {
4716                return false;
4717            }
4718            if convert_is_binary(&stats) {
4719                return false;
4720            }
4721        }
4722        true
4723    }
4724}
4725
4726/// Derive the smudge-direction line ending from `core.autocrlf` / `core.eol`.
4727fn eol_from_config(config: &GitConfig) -> EolConversion {
4728    if let Some(value) = config.get("core", None, "autocrlf") {
4729        match value.to_ascii_lowercase().as_str() {
4730            "input" => return EolConversion::Lf,
4731            "true" | "yes" | "on" | "1" => return EolConversion::Crlf,
4732            _ => {}
4733        }
4734    }
4735    if config.get_bool("core", None, "autocrlf") == Some(true) {
4736        return EolConversion::Crlf;
4737    }
4738    match config
4739        .get("core", None, "eol")
4740        .map(|v| v.to_ascii_lowercase())
4741    {
4742        Some(ref v) if v == "crlf" => EolConversion::Crlf,
4743        Some(ref v) if v == "lf" => EolConversion::Lf,
4744        _ => EolConversion::None,
4745    }
4746}
4747
4748/// Whether `core.autocrlf` is set to anything that enables conversion
4749/// (`true` or `input`).
4750fn autocrlf_enabled(config: &GitConfig) -> bool {
4751    if let Some(value) = config.get("core", None, "autocrlf")
4752        && value.eq_ignore_ascii_case("input")
4753    {
4754        return true;
4755    }
4756    config.get_bool("core", None, "autocrlf") == Some(true)
4757}
4758
4759/// Resolve the `filter=<name>` attribute against `filter.<name>.*` config.
4760fn resolve_filter_driver(
4761    config: &GitConfig,
4762    filter_attr: Option<&AttributeCheck>,
4763) -> Option<FilterDriver> {
4764    let name = match filter_attr.map(|check| &check.state) {
4765        Some(Some(AttributeState::Value(value))) => value.clone(),
4766        // `filter` set/unset without a value selects no driver.
4767        _ => return None,
4768    };
4769    let subsection = String::from_utf8_lossy(&name).into_owned();
4770    let clean = config
4771        .get("filter", Some(&subsection), "clean")
4772        .filter(|cmd| !cmd.is_empty())
4773        .map(str::to_owned);
4774    let smudge = config
4775        .get("filter", Some(&subsection), "smudge")
4776        .filter(|cmd| !cmd.is_empty())
4777        .map(str::to_owned);
4778    let required = config
4779        .get_bool("filter", Some(&subsection), "required")
4780        .unwrap_or(false);
4781    // A filter with neither command and not required is a no-op.
4782    if clean.is_none() && smudge.is_none() && !required {
4783        return None;
4784    }
4785    Some(FilterDriver {
4786        name,
4787        clean,
4788        smudge,
4789        required,
4790    })
4791}
4792
4793/// Heuristic mirroring git's `buffer_is_binary`: content is treated as binary
4794/// when a NUL byte appears within the first 8000 bytes.
4795fn looks_binary(content: &[u8]) -> bool {
4796    const FIRST_FEW_BYTES: usize = 8000;
4797    let window = &content[..content.len().min(FIRST_FEW_BYTES)];
4798    window.contains(&0)
4799}
4800
4801/// Strip carriage returns that immediately precede a line feed (CRLF -> LF).
4802/// A lone CR (old-Mac line ending) is left untouched, matching git, which only
4803/// collapses CRLF pairs.
4804fn convert_crlf_to_lf(content: &[u8]) -> Vec<u8> {
4805    let mut out = Vec::with_capacity(content.len());
4806    let mut index = 0;
4807    while index < content.len() {
4808        let byte = content[index];
4809        if byte == b'\r' && content.get(index + 1) == Some(&b'\n') {
4810            // Drop the CR; the LF is emitted on the next iteration.
4811            index += 1;
4812            continue;
4813        }
4814        out.push(byte);
4815        index += 1;
4816    }
4817    out
4818}
4819
4820/// Convert lone LF bytes to CRLF (LF -> CRLF). An LF already preceded by a CR
4821/// is left as-is so content is not double-converted, matching git.
4822fn convert_lf_to_crlf(content: &[u8]) -> Vec<u8> {
4823    let mut out = Vec::with_capacity(content.len() + content.len() / 16);
4824    let mut prev = 0u8;
4825    for &byte in content {
4826        if byte == b'\n' && prev != b'\r' {
4827            out.push(b'\r');
4828        }
4829        out.push(byte);
4830        prev = byte;
4831    }
4832    out
4833}
4834
4835/// Run a configured `clean`/`smudge` command as a subprocess, feeding `content`
4836/// on stdin and returning its stdout. Errors carry enough context for the
4837/// caller to decide whether the failure is fatal (required filter) or should be
4838/// silently ignored (optional filter passthrough).
4839fn run_filter_command(command: &str, path: &[u8], content: &[u8]) -> Result<Vec<u8>> {
4840    // Git expands `%f` in the filter command to the path of the file being
4841    // filtered (quoted). We perform the same substitution.
4842    let display_path = String::from_utf8_lossy(path);
4843    let expanded = command.replace("%f", &shell_quote(&display_path));
4844    // Run through the platform shell so pipelines / arguments in the configured
4845    // command behave the same way git's `run_command`-with-shell does.
4846    let (shell, flag) = if cfg!(windows) {
4847        ("cmd", "/C")
4848    } else {
4849        ("/bin/sh", "-c")
4850    };
4851    let mut child = Command::new(shell)
4852        .arg(flag)
4853        .arg(&expanded)
4854        .stdin(Stdio::piped())
4855        .stdout(Stdio::piped())
4856        .stderr(Stdio::piped())
4857        .spawn()
4858        .map_err(|err| GitError::Command(format!("failed to spawn filter `{command}`: {err}")))?;
4859    // Write the content to the child's stdin on a separate thread so we never
4860    // deadlock against a filter that streams output before consuming all input.
4861    let mut stdin = child
4862        .stdin
4863        .take()
4864        .ok_or_else(|| GitError::Command(format!("filter `{command}` stdin unavailable")))?;
4865    let payload = content.to_vec();
4866    let writer = std::thread::spawn(move || {
4867        let _ = stdin.write_all(&payload);
4868        // Dropping `stdin` here closes the pipe so the child sees EOF.
4869    });
4870    let output = child
4871        .wait_with_output()
4872        .map_err(|err| GitError::Command(format!("filter `{command}` failed: {err}")))?;
4873    // Join the writer; its own errors (e.g. broken pipe) are non-fatal because
4874    // the child's exit status is the authoritative signal.
4875    let _ = writer.join();
4876    if !output.status.success() {
4877        let stderr = String::from_utf8_lossy(&output.stderr);
4878        return Err(GitError::Command(format!(
4879            "filter `{command}` exited with {}: {}",
4880            output.status,
4881            stderr.trim()
4882        )));
4883    }
4884    Ok(output.stdout)
4885}
4886
4887/// Minimal POSIX single-quote escaping for substituting `%f` into a shell
4888/// command (used only for the path passed to driver filters).
4889fn shell_quote(value: &str) -> String {
4890    let mut out = String::with_capacity(value.len() + 2);
4891    out.push('\'');
4892    for ch in value.chars() {
4893        if ch == '\'' {
4894            out.push_str("'\\''");
4895        } else {
4896            out.push(ch);
4897        }
4898    }
4899    out.push('\'');
4900    out
4901}
4902
4903/// Apply the *clean* conversion to `content` for `path` (worktree -> blob):
4904/// first the configured `filter.<name>.clean` driver (if any), then CRLF->LF
4905/// normalization when EOL conversion applies.
4906///
4907/// `config` is the repository config (`GitConfig`) and `path` is the
4908/// repository-relative path of the file (forward-slash separated, e.g.
4909/// `src/main.rs`). When no filter or EOL conversion applies the input is
4910/// returned unchanged.
4911///
4912/// A *required* driver (`filter.<name>.required=true`) whose `clean` command is
4913/// missing or fails produces a [`GitError::Command`]; a non-required driver
4914/// failure (or absence of a `clean` command) passes the content through
4915/// unfiltered, matching git.
4916pub fn apply_clean_filter(
4917    worktree_root: impl AsRef<Path>,
4918    git_dir: impl AsRef<Path>,
4919    config: &GitConfig,
4920    path: &[u8],
4921    content: &[u8],
4922) -> Result<Vec<u8>> {
4923    // On clean the worktree file exists, so the live `.gitattributes` chain is
4924    // authoritative. `git_dir` is accepted for symmetry with the smudge entry
4925    // point (which falls back to the index) and for future use.
4926    let _ = git_dir.as_ref();
4927    let checks = filter_attribute_checks(worktree_root.as_ref(), path)?;
4928    apply_clean_filter_with_attributes(config, &checks, path, content)
4929}
4930
4931/// A reusable handle that captures the worktree's `.gitattributes` chain once so
4932/// repeated clean-filter calls (e.g. `hash-object --stdin-paths` hashing many
4933/// paths in one process) don't re-walk the worktree and re-read every
4934/// `.gitattributes`/global config per path.
4935///
4936/// Build it once with [`WorktreeAttributes::from_worktree_root`], then call
4937/// [`WorktreeAttributes::apply_clean_filter`] per path. This mirrors
4938/// [`apply_clean_filter`] exactly except the expensive attribute-source scan is
4939/// amortized across calls.
4940pub struct WorktreeAttributes {
4941    matcher: AttributeMatcher,
4942}
4943
4944impl WorktreeAttributes {
4945    /// Read the worktree's attribute sources once (global/`core.attributesFile`,
4946    /// every in-tree `.gitattributes`, and `$GIT_DIR/info/attributes`).
4947    pub fn from_worktree_root(worktree_root: impl AsRef<Path>) -> Result<Self> {
4948        Ok(Self {
4949            matcher: AttributeMatcher::from_worktree_root(worktree_root.as_ref())?,
4950        })
4951    }
4952
4953    /// Apply the clean conversion to `content` for `path`, reusing the cached
4954    /// attribute chain. Behaviourally identical to [`apply_clean_filter`].
4955    pub fn apply_clean_filter(
4956        &self,
4957        config: &GitConfig,
4958        path: &[u8],
4959        content: &[u8],
4960    ) -> Result<Vec<u8>> {
4961        let checks = self
4962            .matcher
4963            .attributes_for_path(path, &filter_attribute_names(), false);
4964        apply_clean_filter_with_attributes(config, &checks, path, content)
4965    }
4966}
4967
4968/// A reusable handle that captures a *tree's* `.gitattributes` chain once so
4969/// repeated smudge-filter calls (e.g. `git archive` streaming every blob in a
4970/// tree) resolve attributes from the tree being processed rather than the live
4971/// worktree.
4972///
4973/// This is the attribute direction `git archive` uses: upstream unpacks the
4974/// archived tree into a scratch index and sets `GIT_ATTR_INDEX`, so the
4975/// `.gitattributes` that govern conversion come from the *archived tree* (plus
4976/// the global/`core.attributesFile` chain and `$GIT_DIR/info/attributes`), not
4977/// from whatever happens to be checked out. `--worktree-attributes` callers
4978/// should use [`WorktreeAttributes`] instead.
4979///
4980/// Build it once with [`TreeAttributes::from_tree`], then call
4981/// [`TreeAttributes::apply_smudge_filter`] per blob. Behaviourally this mirrors
4982/// [`apply_smudge_filter`] except the attribute source is the supplied tree and
4983/// the expensive source scan is amortized across calls.
4984pub struct TreeAttributes {
4985    matcher: AttributeMatcher,
4986}
4987
4988impl TreeAttributes {
4989    /// Read the attribute sources for `tree_oid` once: the global /
4990    /// `core.attributesFile` chain, every `.gitattributes` blob found while
4991    /// walking `tree_oid`, and `$GIT_DIR/info/attributes`.
4992    ///
4993    /// `attr_root` locates the global config (`read_configured_attributes`);
4994    /// pass the worktree root for a non-bare repo, or the git dir for a bare
4995    /// one. `git_dir` locates `info/attributes` directly (so this works for bare
4996    /// repos, where there is no nested `.git`). No worktree `.gitattributes`
4997    /// files are read — use [`WorktreeAttributes`] for the
4998    /// `--worktree-attributes` direction.
4999    pub fn from_tree(
5000        attr_root: impl AsRef<Path>,
5001        git_dir: impl AsRef<Path>,
5002        db: &FileObjectDatabase,
5003        format: ObjectFormat,
5004        tree_oid: &ObjectId,
5005    ) -> Result<Self> {
5006        let attr_root = attr_root.as_ref();
5007        let mut matcher = AttributeMatcher::default();
5008        if !matcher.read_configured_attributes(attr_root) {
5009            matcher.read_default_global_attributes();
5010        }
5011        collect_attribute_patterns_from_tree(db, format, tree_oid, Vec::new(), &mut matcher)?;
5012        read_attribute_patterns(
5013            git_dir.as_ref().join("info").join("attributes"),
5014            &mut matcher,
5015            &[],
5016            b"info/attributes",
5017        );
5018        Ok(Self { matcher })
5019    }
5020
5021    /// Apply the smudge conversion (blob -> worktree: EOL `LF`->`CRLF` plus any
5022    /// configured `filter.<name>.smudge` driver) to `content` for `path`,
5023    /// reusing the cached attribute chain. Behaviourally identical to
5024    /// [`apply_smudge_filter`] except attributes come from the tree this handle
5025    /// was built from.
5026    pub fn apply_smudge_filter(
5027        &self,
5028        config: &GitConfig,
5029        path: &[u8],
5030        content: &[u8],
5031    ) -> Result<Vec<u8>> {
5032        let checks = self
5033            .matcher
5034            .attributes_for_path(path, &filter_attribute_names(), false);
5035        apply_smudge_filter_with_attributes(config, &checks, path, content)
5036    }
5037}
5038
5039/// Like [`apply_clean_filter`] but takes already-resolved attribute checks,
5040/// letting callers that have computed attributes once reuse them.
5041pub fn apply_clean_filter_with_attributes(
5042    config: &GitConfig,
5043    attributes: &[AttributeCheck],
5044    path: &[u8],
5045    content: &[u8],
5046) -> Result<Vec<u8>> {
5047    Ok(apply_clean_filter_with_attributes_cow(config, attributes, path, content)?.into_owned())
5048}
5049
5050/// Borrow-first variant of [`apply_clean_filter_with_attributes`].
5051///
5052/// When no filter or EOL conversion changes the content, the returned value
5053/// borrows `content`; callers that can consume a [`Cow`] avoid allocating for
5054/// the common pass-through case.
5055pub fn apply_clean_filter_with_attributes_cow<'a>(
5056    config: &GitConfig,
5057    attributes: &[AttributeCheck],
5058    path: &[u8],
5059    content: &'a [u8],
5060) -> Result<Cow<'a, [u8]>> {
5061    let plan = ContentFilterPlan::resolve(config, attributes);
5062    let mut data = Cow::Borrowed(content);
5063    if let Some(driver) = &plan.driver {
5064        data = run_driver(driver, driver.clean.as_deref(), path, data)?;
5065    }
5066    if plan.convert_eol(&data) {
5067        data = Cow::Owned(convert_crlf_to_lf(&data));
5068    }
5069    Ok(data)
5070}
5071
5072/// Apply the *smudge* conversion to `content` for `path` (blob -> worktree):
5073/// first LF->CRLF when EOL conversion applies, then the configured
5074/// `filter.<name>.smudge` driver (if any).
5075///
5076/// Semantics mirror [`apply_clean_filter`]: a required driver with a missing or
5077/// failing `smudge` command errors, while a non-required one passes the content
5078/// through.
5079pub fn apply_smudge_filter(
5080    worktree_root: impl AsRef<Path>,
5081    git_dir: impl AsRef<Path>,
5082    format: ObjectFormat,
5083    config: &GitConfig,
5084    path: &[u8],
5085    content: &[u8],
5086) -> Result<Vec<u8>> {
5087    // On smudge (checkout) the worktree file may not exist yet, so resolve the
5088    // attributes from the `.gitattributes` recorded in the index.
5089    let checks =
5090        smudge_attribute_checks_from_index(worktree_root.as_ref(), git_dir.as_ref(), format, path)?;
5091    apply_smudge_filter_with_attributes(config, &checks, path, content)
5092}
5093
5094/// Like [`apply_smudge_filter`] but takes already-resolved attribute checks.
5095pub fn apply_smudge_filter_with_attributes(
5096    config: &GitConfig,
5097    attributes: &[AttributeCheck],
5098    path: &[u8],
5099    content: &[u8],
5100) -> Result<Vec<u8>> {
5101    Ok(apply_smudge_filter_with_attributes_cow(config, attributes, path, content)?.into_owned())
5102}
5103
5104/// Borrow-first variant of [`apply_smudge_filter_with_attributes`].
5105///
5106/// When no filter or EOL conversion changes the content, the returned value
5107/// borrows `content`; callers that can consume a [`Cow`] avoid allocating for
5108/// the common pass-through case.
5109pub fn apply_smudge_filter_with_attributes_cow<'a>(
5110    config: &GitConfig,
5111    attributes: &[AttributeCheck],
5112    path: &[u8],
5113    content: &'a [u8],
5114) -> Result<Cow<'a, [u8]>> {
5115    let plan = ContentFilterPlan::resolve(config, attributes);
5116    let mut data = Cow::Borrowed(content);
5117    if plan.eol == EolConversion::Crlf
5118        && plan.convert_eol(&data)
5119        && plan.will_convert_lf_to_crlf(&data)
5120    {
5121        data = Cow::Owned(convert_lf_to_crlf(&data));
5122    }
5123    if let Some(driver) = &plan.driver {
5124        data = run_driver(driver, driver.smudge.as_deref(), path, data)?;
5125    }
5126    Ok(data)
5127}
5128
5129/// Execute one direction of a driver filter, honouring the `required` flag.
5130fn run_driver<'a>(
5131    driver: &FilterDriver,
5132    command: Option<&str>,
5133    path: &[u8],
5134    content: Cow<'a, [u8]>,
5135) -> Result<Cow<'a, [u8]>> {
5136    let Some(command) = command else {
5137        // No command in this direction. Required filters must error; optional
5138        // ones pass content through unchanged.
5139        if driver.required {
5140            return Err(GitError::Command(format!(
5141                "required filter `{}` has no configured command for this direction",
5142                String::from_utf8_lossy(&driver.name)
5143            )));
5144        }
5145        return Ok(content);
5146    };
5147    match run_filter_command(command, path, &content) {
5148        Ok(output) => Ok(Cow::Owned(output)),
5149        Err(err) => {
5150            if driver.required {
5151                Err(err)
5152            } else {
5153                // Non-required filter failure: fall back to the unfiltered
5154                // content, matching git's behaviour.
5155                Ok(content)
5156            }
5157        }
5158    }
5159}
5160
5161/// Compute the attributes relevant to content filtering (`text`, `eol`,
5162/// `filter`) for `path` from the worktree `.gitattributes` chain.
5163fn filter_attribute_checks(worktree_root: &Path, path: &[u8]) -> Result<Vec<AttributeCheck>> {
5164    let requested = filter_attribute_names();
5165    standard_attributes_for_path(worktree_root, path, &requested, false)
5166}
5167
5168/// Compute filtering attributes for a checkout (blob -> worktree), reading
5169/// `.gitattributes` from the index so the rules in the tree being checked out
5170/// apply even before the worktree files exist.
5171fn smudge_attribute_checks_from_index(
5172    worktree_root: &Path,
5173    git_dir: &Path,
5174    format: ObjectFormat,
5175    path: &[u8],
5176) -> Result<Vec<AttributeCheck>> {
5177    let requested = filter_attribute_names();
5178    standard_attributes_for_path_from_index(worktree_root, git_dir, format, path, &requested, false)
5179}
5180
5181fn filter_attribute_names() -> Vec<Vec<u8>> {
5182    // `crlf` is git's legacy alias for `text` (convert.c registers both); it is
5183    // consulted as a fallback when `text` is unspecified, so we must resolve it.
5184    vec![
5185        b"text".to_vec(),
5186        b"crlf".to_vec(),
5187        b"eol".to_vec(),
5188        b"filter".to_vec(),
5189    ]
5190}
5191
5192// ---------------------------------------------------------------------------
5193// `ls-files --eol` line-ending information
5194//
5195// Git's `git ls-files --eol` prints, for each path, three fields:
5196//   i/<stat>  — line-ending statistics of the *index* blob content
5197//   w/<stat>  — line-ending statistics of the *worktree* file content
5198//   attr/<a>  — the resolved crlf/eol attribute action (attributes only, no
5199//               config) — `get_convert_attr_ascii` in convert.c
5200// The two stat fields mirror `gather_convert_stats_ascii`; the attr field
5201// mirrors `convert_attrs` up to `ca->attr_action` (i.e. *before* the config
5202// derived `text` -> input/crlf substitution and the `core.autocrlf` fallback).
5203// ---------------------------------------------------------------------------
5204
5205/// Line-ending statistics of a byte buffer, mirroring convert.c `gather_stats`.
5206struct ConvertStats {
5207    nul: u32,
5208    lonecr: u32,
5209    lonelf: u32,
5210    crlf: u32,
5211    printable: u32,
5212    nonprintable: u32,
5213}
5214
5215fn gather_convert_stats(buf: &[u8]) -> ConvertStats {
5216    let mut stats = ConvertStats {
5217        nul: 0,
5218        lonecr: 0,
5219        lonelf: 0,
5220        crlf: 0,
5221        printable: 0,
5222        nonprintable: 0,
5223    };
5224    let mut i = 0;
5225    while i < buf.len() {
5226        let c = buf[i];
5227        if c == b'\r' {
5228            if buf.get(i + 1) == Some(&b'\n') {
5229                stats.crlf += 1;
5230                i += 1;
5231            } else {
5232                stats.lonecr += 1;
5233            }
5234            i += 1;
5235            continue;
5236        }
5237        if c == b'\n' {
5238            stats.lonelf += 1;
5239            i += 1;
5240            continue;
5241        }
5242        if c == 127 {
5243            // DEL
5244            stats.nonprintable += 1;
5245        } else if c < 32 {
5246            match c {
5247                // BS, HT, ESC and FF are printable.
5248                0x08 | 0x09 | 0x1b | 0x0c => stats.printable += 1,
5249                0 => {
5250                    stats.nul += 1;
5251                    stats.nonprintable += 1;
5252                }
5253                _ => stats.nonprintable += 1,
5254            }
5255        } else {
5256            stats.printable += 1;
5257        }
5258        i += 1;
5259    }
5260    // A trailing EOF (^Z, 0x1a) is not counted as non-printable.
5261    if buf.last() == Some(&0x1a) {
5262        stats.nonprintable = stats.nonprintable.saturating_sub(1);
5263    }
5264    stats
5265}
5266
5267/// Mirror of convert.c `convert_is_binary`: a lone CR or NUL, or a high
5268/// non-printable ratio, marks the content as binary.
5269fn convert_is_binary(stats: &ConvertStats) -> bool {
5270    if stats.lonecr > 0 {
5271        return true;
5272    }
5273    if stats.nul > 0 {
5274        return true;
5275    }
5276    (stats.printable >> 7) < stats.nonprintable
5277}
5278
5279/// Compute the `i/` or `w/` stat string for `content`, mirroring
5280/// convert.c `gather_convert_stats_ascii`.
5281fn convert_stats_ascii(content: &[u8]) -> &'static str {
5282    if content.is_empty() {
5283        return "none";
5284    }
5285    let stats = gather_convert_stats(content);
5286    if convert_is_binary(&stats) {
5287        return "-text";
5288    }
5289    match (stats.lonelf > 0, stats.crlf > 0) {
5290        (true, false) => "lf",
5291        (false, true) => "crlf",
5292        (true, true) => "mixed",
5293        (false, false) => "none",
5294    }
5295}
5296
5297/// The resolved crlf/eol attribute action for a path, mirroring convert.c
5298/// `convert_attrs` up to `ca->attr_action` (attributes only, no config), and
5299/// `get_convert_attr_ascii` for the ascii spelling.
5300fn convert_attr_ascii(checks: &[AttributeCheck]) -> &'static str {
5301    fn state_of<'a>(checks: &'a [AttributeCheck], name: &[u8]) -> Option<&'a AttributeState> {
5302        checks
5303            .iter()
5304            .find(|check| check.attribute == name)
5305            .and_then(|check| check.state.as_ref())
5306    }
5307
5308    // git_path_check_crlf: ATTR_TRUE -> TEXT, ATTR_FALSE -> BINARY,
5309    // ATTR_UNSET -> (fall through), "input" -> TEXT_INPUT, "auto" -> AUTO,
5310    // anything else -> UNDEFINED.
5311    #[derive(Clone, Copy, PartialEq)]
5312    enum Action {
5313        Undefined,
5314        Binary,
5315        Text,
5316        TextInput,
5317        TextCrlf,
5318        Auto,
5319        AutoCrlf,
5320        AutoInput,
5321    }
5322    fn check_crlf(state: Option<&AttributeState>) -> Action {
5323        match state {
5324            Some(AttributeState::Set) => Action::Text,
5325            Some(AttributeState::Unset) => Action::Binary,
5326            Some(AttributeState::Value(value)) if value == b"input" => Action::TextInput,
5327            Some(AttributeState::Value(value)) if value == b"auto" => Action::Auto,
5328            // ATTR_UNSET / any other value -> CRLF_UNDEFINED.
5329            _ => Action::Undefined,
5330        }
5331    }
5332
5333    // Resolve from the `text` attribute, then fall back to the legacy `crlf`
5334    // alias only when `text` left the action undefined.
5335    let mut action = check_crlf(state_of(checks, b"text"));
5336    if action == Action::Undefined {
5337        action = check_crlf(state_of(checks, b"crlf"));
5338    }
5339
5340    if action != Action::Binary {
5341        // git_path_check_eol: only "lf"/"crlf" values matter.
5342        let eol = match state_of(checks, b"eol") {
5343            Some(AttributeState::Value(value)) if value == b"lf" => Some(false),
5344            Some(AttributeState::Value(value)) if value == b"crlf" => Some(true),
5345            _ => None,
5346        };
5347        action = match (action, eol) {
5348            (Action::Auto, Some(false)) => Action::AutoInput,
5349            (Action::Auto, Some(true)) => Action::AutoCrlf,
5350            (_, Some(false)) if action != Action::Auto => Action::TextInput,
5351            (_, Some(true)) if action != Action::Auto => Action::TextCrlf,
5352            _ => action,
5353        };
5354    }
5355
5356    match action {
5357        Action::Undefined => "",
5358        Action::Binary => "-text",
5359        Action::Text => "text",
5360        Action::TextInput => "text eol=lf",
5361        Action::TextCrlf => "text eol=crlf",
5362        Action::Auto => "text=auto",
5363        Action::AutoCrlf => "text=auto eol=crlf",
5364        Action::AutoInput => "text=auto eol=lf",
5365    }
5366}
5367
5368/// The three `ls-files --eol` fields for a single path.
5369pub struct EolInfo {
5370    /// Stat of the index blob (`i/...`); empty when there is no index blob.
5371    pub index: &'static str,
5372    /// Stat of the worktree file (`w/...`); empty when the file is absent.
5373    pub worktree: &'static str,
5374    /// Resolved crlf/eol attribute action (`attr/...`).
5375    pub attr: &'static str,
5376}
5377
5378impl EolInfo {
5379    /// Format as git's `ls-files --eol` prefix: `i/%-5s w/%-5s attr/%-17s\t`.
5380    pub fn format_prefix(&self) -> String {
5381        format!(
5382            "i/{:<5} w/{:<5} attr/{:<17}\t",
5383            self.index, self.worktree, self.attr
5384        )
5385    }
5386}
5387
5388/// Compute the `ls-files --eol` info for `path`.
5389///
5390/// `index_content` is the raw index blob bytes (None when the path has no
5391/// index entry or is not a regular file). The worktree file is read from
5392/// `worktree_root/path`; if it is absent or not a regular file the `w/` field
5393/// is empty. Attributes are resolved from the worktree `.gitattributes` chain
5394/// via `attr_checks`.
5395pub fn eol_info_for_path(
5396    worktree_root: impl AsRef<Path>,
5397    path: &[u8],
5398    index_content: Option<&[u8]>,
5399    attr_checks: &[AttributeCheck],
5400) -> EolInfo {
5401    let index = index_content.map(convert_stats_ascii).unwrap_or("");
5402
5403    let worktree_root = worktree_root.as_ref();
5404    let worktree = match repo_path_to_os_path(path) {
5405        Ok(rel) => {
5406            let absolute = worktree_root.join(rel);
5407            match fs::symlink_metadata(&absolute) {
5408                // git: only regular files get a `w/` stat (lstat + S_ISREG).
5409                Ok(meta) if meta.file_type().is_file() => match fs::read(&absolute) {
5410                    Ok(content) => convert_stats_ascii_owned(&content),
5411                    Err(_) => "",
5412                },
5413                _ => "",
5414            }
5415        }
5416        Err(_) => "",
5417    };
5418
5419    let attr = convert_attr_ascii(attr_checks);
5420
5421    EolInfo {
5422        index,
5423        worktree,
5424        attr,
5425    }
5426}
5427
5428/// `convert_stats_ascii` over an owned buffer; the result is a `'static` str so
5429/// the buffer can be dropped.
5430fn convert_stats_ascii_owned(content: &[u8]) -> &'static str {
5431    convert_stats_ascii(content)
5432}
5433
5434/// Resolve the crlf/eol/text/filter attributes for `path` from the worktree
5435/// `.gitattributes` chain (the set `ls-files --eol` needs for its `attr/`
5436/// field).
5437pub fn eol_attribute_checks(
5438    worktree_root: impl AsRef<Path>,
5439    path: &[u8],
5440) -> Result<Vec<AttributeCheck>> {
5441    filter_attribute_checks(worktree_root.as_ref(), path)
5442}
5443
5444pub fn deleted_index_entries(
5445    worktree_root: impl AsRef<Path>,
5446    git_dir: impl AsRef<Path>,
5447    format: ObjectFormat,
5448) -> Result<Vec<IndexEntry>> {
5449    let worktree_root = worktree_root.as_ref();
5450    let git_dir = git_dir.as_ref();
5451    let index_path = repository_index_path(git_dir);
5452    if !index_path.exists() {
5453        return Ok(Vec::new());
5454    }
5455    let index = Index::parse(&fs::read(index_path)?, format)?;
5456    let mut deleted = Vec::new();
5457    for entry in index.entries {
5458        if !worktree_path(worktree_root, entry.path.as_bytes())?.exists() {
5459            deleted.push(entry);
5460        }
5461    }
5462    Ok(deleted)
5463}
5464
5465pub fn modified_index_entries(
5466    worktree_root: impl AsRef<Path>,
5467    git_dir: impl AsRef<Path>,
5468    format: ObjectFormat,
5469) -> Result<Vec<IndexEntry>> {
5470    let worktree_root = worktree_root.as_ref();
5471    let git_dir = git_dir.as_ref();
5472    let index_path = repository_index_path(git_dir);
5473    if !index_path.exists() {
5474        return Ok(Vec::new());
5475    }
5476    let index = Index::parse(&fs::read(&index_path)?, format)?;
5477    // Reuse the same racy-git stat shortcut here: build the cache from the index
5478    // we just parsed (no second parse) so the worktree walk can skip re-hashing
5479    // unchanged files. A cached oid is only trusted on a non-racy stat match, so
5480    // genuinely modified files still fall through to a hash and are reported.
5481    let stat_cache = IndexStatCache::from_index(&index, &index_path);
5482    let worktree = worktree_entries_with_stat_cache(
5483        worktree_root,
5484        git_dir,
5485        format,
5486        Some(&stat_cache),
5487        None,
5488        None,
5489    )?;
5490    let mut modified = Vec::new();
5491    for entry in index.entries {
5492        let Some(worktree_entry) = worktree.get(entry.path.as_bytes()) else {
5493            modified.push(entry);
5494            continue;
5495        };
5496        if worktree_entry.mode != entry.mode || worktree_entry.oid != entry.oid {
5497            modified.push(entry);
5498        }
5499    }
5500    Ok(modified)
5501}
5502
5503pub fn checkout_branch(
5504    worktree_root: impl AsRef<Path>,
5505    git_dir: impl AsRef<Path>,
5506    format: ObjectFormat,
5507    branch: &str,
5508    committer: Vec<u8>,
5509) -> Result<CheckoutResult> {
5510    let worktree_root = worktree_root.as_ref();
5511    let git_dir = git_dir.as_ref();
5512    let branch_ref = branch_ref_name(branch)?;
5513    let refs = FileRefStore::new(git_dir, format);
5514    let target = match sley_refs::resolve_ref_peeled(&refs, &branch_ref)? {
5515        Some(oid) => oid,
5516        None => {
5517            checkout_switch_head_symbolic(&refs, branch_ref, committer, branch, None, None)?;
5518            return Ok(CheckoutResult {
5519                branch: branch.into(),
5520                oid: ObjectId::null(format),
5521                files: 0,
5522            });
5523        }
5524    };
5525    let current_head = resolve_head_commit_oid(git_dir, format)?;
5526    let files = if current_head == Some(target) {
5527        0
5528    } else {
5529        checkout_commit_to_index_and_worktree(worktree_root, git_dir, format, &target)?
5530    };
5531    checkout_switch_head_symbolic(
5532        &refs,
5533        branch_ref,
5534        committer,
5535        branch,
5536        Some(target),
5537        Some(target),
5538    )?;
5539    Ok(CheckoutResult {
5540        branch: branch.into(),
5541        oid: target,
5542        files,
5543    })
5544}
5545
5546pub fn checkout_detached(
5547    worktree_root: impl AsRef<Path>,
5548    git_dir: impl AsRef<Path>,
5549    format: ObjectFormat,
5550    target: &ObjectId,
5551    committer: Vec<u8>,
5552    message: Vec<u8>,
5553) -> Result<CheckoutResult> {
5554    let worktree_root = worktree_root.as_ref();
5555    let git_dir = git_dir.as_ref();
5556    let files = checkout_commit_to_index_and_worktree(worktree_root, git_dir, format, target)?;
5557    let refs = FileRefStore::new(git_dir, format);
5558    let zero = ObjectId::null(format);
5559    let mut tx = refs.transaction();
5560    tx.update(RefUpdate {
5561        name: "HEAD".into(),
5562        expected: None,
5563        new: RefTarget::Direct(*target),
5564        reflog: Some(ReflogEntry {
5565            old_oid: zero,
5566            new_oid: *target,
5567            committer,
5568            message,
5569        }),
5570    });
5571    tx.commit()?;
5572    Ok(CheckoutResult {
5573        branch: target.to_string(),
5574        oid: *target,
5575        files,
5576    })
5577}
5578
5579/// Like [`checkout_branch`], but runs the smudge-side content filters
5580/// (`core.autocrlf`/`text`/`eol` EOL conversion and `filter.<name>.smudge`
5581/// drivers) on each blob as it is written to the worktree. `config` is the
5582/// repository config used to resolve the filters.
5583pub fn checkout_branch_filtered(
5584    worktree_root: impl AsRef<Path>,
5585    git_dir: impl AsRef<Path>,
5586    format: ObjectFormat,
5587    branch: &str,
5588    committer: Vec<u8>,
5589    config: &GitConfig,
5590) -> Result<CheckoutResult> {
5591    let worktree_root = worktree_root.as_ref();
5592    let git_dir = git_dir.as_ref();
5593    let branch_ref = branch_ref_name(branch)?;
5594    let refs = FileRefStore::new(git_dir, format);
5595    let target = match sley_refs::resolve_ref_peeled(&refs, &branch_ref)? {
5596        Some(oid) => oid,
5597        None => {
5598            checkout_switch_head_symbolic(&refs, branch_ref, committer, branch, None, None)?;
5599            return Ok(CheckoutResult {
5600                branch: branch.into(),
5601                oid: ObjectId::null(format),
5602                files: 0,
5603            });
5604        }
5605    };
5606    let current_head = resolve_head_commit_oid(git_dir, format)?;
5607    let files = if current_head == Some(target) {
5608        0
5609    } else {
5610        checkout_commit_to_index_and_worktree_filtered(
5611            worktree_root,
5612            git_dir,
5613            format,
5614            &target,
5615            Some(config),
5616        )?
5617    };
5618    checkout_switch_head_symbolic(
5619        &refs,
5620        branch_ref,
5621        committer,
5622        branch,
5623        Some(target),
5624        Some(target),
5625    )?;
5626    Ok(CheckoutResult {
5627        branch: branch.into(),
5628        oid: target,
5629        files,
5630    })
5631}
5632
5633/// Like [`checkout_detached`], but runs the smudge-side content filters (see
5634/// [`checkout_branch_filtered`]).
5635pub fn checkout_detached_filtered(
5636    worktree_root: impl AsRef<Path>,
5637    git_dir: impl AsRef<Path>,
5638    format: ObjectFormat,
5639    target: &ObjectId,
5640    committer: Vec<u8>,
5641    message: Vec<u8>,
5642    config: &GitConfig,
5643) -> Result<CheckoutResult> {
5644    let worktree_root = worktree_root.as_ref();
5645    let git_dir = git_dir.as_ref();
5646    let files = checkout_commit_to_index_and_worktree_filtered(
5647        worktree_root,
5648        git_dir,
5649        format,
5650        target,
5651        Some(config),
5652    )?;
5653    let refs = FileRefStore::new(git_dir, format);
5654    let zero = ObjectId::null(format);
5655    let mut tx = refs.transaction();
5656    tx.update(RefUpdate {
5657        name: "HEAD".into(),
5658        expected: None,
5659        new: RefTarget::Direct(*target),
5660        reflog: Some(ReflogEntry {
5661            old_oid: zero,
5662            new_oid: *target,
5663            committer,
5664            message,
5665        }),
5666    });
5667    tx.commit()?;
5668    Ok(CheckoutResult {
5669        branch: target.to_string(),
5670        oid: *target,
5671        files,
5672    })
5673}
5674
5675fn checkout_commit_to_index_and_worktree(
5676    worktree_root: &Path,
5677    git_dir: &Path,
5678    format: ObjectFormat,
5679    target: &ObjectId,
5680) -> Result<usize> {
5681    checkout_commit_to_index_and_worktree_filtered(worktree_root, git_dir, format, target, None)
5682}
5683
5684/// Like [`checkout_commit_to_index_and_worktree`] but optionally runs the
5685/// smudge-side content filters (see [`apply_smudge_filter`]) on each blob before
5686/// it is written to the worktree. Attribute lookups use the `.gitattributes`
5687/// recorded in the *target tree* so the rules of the checked-out commit apply.
5688fn checkout_commit_to_index_and_worktree_filtered(
5689    worktree_root: &Path,
5690    git_dir: &Path,
5691    format: ObjectFormat,
5692    target: &ObjectId,
5693    smudge_config: Option<&GitConfig>,
5694) -> Result<usize> {
5695    let status = short_status(worktree_root, git_dir, format)?;
5696    if status
5697        .iter()
5698        .any(|entry| !status_entry_is_untracked_or_ignored(entry))
5699    {
5700        return Err(GitError::Transaction(
5701            "checkout requires a clean working tree".into(),
5702        ));
5703    }
5704    let db = FileObjectDatabase::from_git_dir(git_dir, format);
5705    let commit = read_commit(&db, format, target)?;
5706    let mut target_entries = BTreeMap::new();
5707    collect_tree_entries(&db, format, &commit.tree, &mut target_entries)?;
5708
5709    let attributes = smudge_config
5710        .map(|_| build_tree_attribute_matcher(worktree_root, &db, format, &commit.tree))
5711        .transpose()?;
5712
5713    for path in read_index_entries(git_dir, format)?.keys() {
5714        if !target_entries.contains_key(path) {
5715            remove_worktree_file(worktree_root, path)?;
5716        }
5717    }
5718
5719    let mut index_entries = Vec::new();
5720    for (path, entry) in &target_entries {
5721        // Gitlinks go through the shared materialization step (mkdir + zeroed
5722        // stat); smudge filters never apply to a submodule directory.
5723        if entry.mode == 0o160000 {
5724            index_entries.push(materialize_tree_entry(&db, worktree_root, path, entry)?);
5725            continue;
5726        }
5727        let object = read_expected_object(&db, &entry.oid, ObjectType::Blob)?;
5728        let body: Cow<'_, [u8]> = match (smudge_config, &attributes) {
5729            (Some(config), Some(matcher)) => {
5730                let checks = matcher.attributes_for_path(path, &filter_attribute_names(), false);
5731                apply_smudge_filter_with_attributes_cow(config, &checks, path, &object.body)?
5732            }
5733            _ => Cow::Borrowed(&object.body),
5734        };
5735        let file_path = worktree_path(worktree_root, path)?;
5736        if let Some(parent) = file_path.parent() {
5737            fs::create_dir_all(parent)?;
5738        }
5739        fs::write(&file_path, &body)?;
5740        let metadata = fs::metadata(&file_path)?;
5741        let mut index_entry = index_entry_from_metadata(path.clone(), entry.oid, &metadata);
5742        index_entry.mode = entry.mode;
5743        index_entries.push(index_entry);
5744    }
5745    index_entries.sort_by(|left, right| left.path.cmp(&right.path));
5746    fs::write(
5747        repository_index_path(git_dir),
5748        Index {
5749            version: 2,
5750            entries: index_entries,
5751            extensions: Vec::new(),
5752            checksum: None,
5753        }
5754        .write(format)?,
5755    )?;
5756    Ok(target_entries.len())
5757}
5758
5759/// Build an [`AttributeMatcher`] from the `.gitattributes` files contained in a
5760/// tree, plus the repo-level (`core.attributesFile`, `.git/info/attributes`)
5761/// sources, mirroring [`standard_attributes_for_path_from_tree`].
5762fn build_tree_attribute_matcher(
5763    worktree_root: &Path,
5764    db: &FileObjectDatabase,
5765    format: ObjectFormat,
5766    tree_oid: &ObjectId,
5767) -> Result<AttributeMatcher> {
5768    let mut matcher = AttributeMatcher::default();
5769    if !matcher.read_configured_attributes(worktree_root) {
5770        matcher.read_default_global_attributes();
5771    }
5772    collect_attribute_patterns_from_tree(db, format, tree_oid, Vec::new(), &mut matcher)?;
5773    read_attribute_patterns(
5774        worktree_root.join(".git").join("info").join("attributes"),
5775        &mut matcher,
5776        &[],
5777        b".git/info/attributes",
5778    );
5779    Ok(matcher)
5780}
5781
5782/// Sparse- and skip-worktree-aware variant of
5783/// [`checkout_commit_to_index_and_worktree`].
5784///
5785/// When `sparse` is `None` this behaves like the plain checkout except that it
5786/// preserves any pre-existing skip-worktree bits (so an already-sparse worktree
5787/// is not silently re-expanded). When `sparse` is `Some`, every target path is
5788/// additionally classified against the patterns: in-cone paths are written and
5789/// have their skip-worktree bit cleared, while out-of-cone paths are left out
5790/// of the worktree, get their skip-worktree bit set, and have any stale file
5791/// removed.
5792fn checkout_commit_to_index_and_worktree_sparse(
5793    worktree_root: &Path,
5794    git_dir: &Path,
5795    format: ObjectFormat,
5796    target: &ObjectId,
5797    sparse: Option<(&SparseCheckout, SparseCheckoutMode)>,
5798) -> Result<usize> {
5799    let previously_skipped = skip_worktree_paths(git_dir, format)?;
5800    let db = FileObjectDatabase::from_git_dir(git_dir, format);
5801    let commit = read_commit(&db, format, target)?;
5802    let mut target_entries = BTreeMap::new();
5803    collect_tree_entries(&db, format, &commit.tree, &mut target_entries)?;
5804
5805    // Honor skip-worktree: a path whose worktree file is intentionally absent
5806    // must not be treated as a dirty (deleted) change blocking the checkout.
5807    let status = short_status(worktree_root, git_dir, format)?;
5808    if status.iter().any(|entry| {
5809        if previously_skipped.contains(entry.path.as_slice()) {
5810            return false;
5811        }
5812        // Submodule state never blocks a checkout: upstream unpack-trees
5813        // treats gitlinks as always up-to-date (ie_match_stat refuses to pay
5814        // for a submodule dirtiness probe), so new commits / dirty content in
5815        // a submodule must not fail the branch switch.
5816        if entry.index_mode == Some(0o160000) || entry.worktree_mode == Some(0o160000) {
5817            return false;
5818        }
5819        // An untracked embedded repository where the target tree records a
5820        // gitlink is reused as-is (upstream entry.c write_entry: mkdir with
5821        // EEXIST is success), so it does not block the checkout either.
5822        if entry.index == b'?' && entry.worktree == b'?' {
5823            let path = entry
5824                .path
5825                .strip_suffix(b"/")
5826                .unwrap_or(entry.path.as_slice());
5827            if target_entries
5828                .get(path)
5829                .is_some_and(|target| target.mode == 0o160000)
5830            {
5831                return false;
5832            }
5833        }
5834        true
5835    }) {
5836        return Err(GitError::Transaction(
5837            "checkout requires a clean working tree".into(),
5838        ));
5839    }
5840
5841    let matcher = sparse.map(|(spec, mode)| SparseMatcher::new(spec, mode));
5842
5843    for path in read_index_entries(git_dir, format)?.keys() {
5844        if target_entries.contains_key(path) {
5845            continue;
5846        }
5847        // Do not disturb the worktree state of an intentionally skipped path.
5848        if previously_skipped.contains(path) {
5849            continue;
5850        }
5851        remove_worktree_file(worktree_root, path)?;
5852    }
5853
5854    let mut index_entries = Vec::new();
5855    for (path, entry) in &target_entries {
5856        let in_cone = matcher.as_ref().is_none_or(|matcher| {
5857            // A path already marked skip-worktree stays out unless it now
5858            // matches the sparse cone, mirroring upstream "honor skip-worktree".
5859            matcher.includes_file(path)
5860        });
5861        let index_entry = if in_cone {
5862            // `materialize_tree_entry` leaves flags_extended at 0, so the
5863            // skip-worktree bit is already clear for in-cone paths.
5864            materialize_tree_entry(&db, worktree_root, path, entry)?
5865        } else {
5866            // Out of cone: ensure no stale worktree file remains and synthesize
5867            // an index entry straight from the tree (no worktree metadata),
5868            // then mark it skip-worktree.
5869            remove_worktree_file(worktree_root, path)?;
5870            let mut index_entry = restored_head_index_entry(worktree_root, &db, path, entry)?;
5871            set_skip_worktree(&mut index_entry);
5872            index_entry
5873        };
5874        index_entries.push(index_entry);
5875    }
5876    index_entries.sort_by(|left, right| left.path.cmp(&right.path));
5877    let mut index = Index {
5878        version: 2,
5879        entries: index_entries,
5880        extensions: Vec::new(),
5881        checksum: None,
5882    };
5883    normalize_index_version_for_extended_flags(&mut index);
5884    fs::write(repository_index_path(git_dir), index.write(format)?)?;
5885    Ok(target_entries.len())
5886}
5887
5888fn skip_worktree_paths(git_dir: &Path, format: ObjectFormat) -> Result<BTreeSet<Vec<u8>>> {
5889    let index_path = repository_index_path(git_dir);
5890    if !index_path.exists() {
5891        return Ok(BTreeSet::new());
5892    }
5893    let index = Index::parse(&fs::read(index_path)?, format)?;
5894    Ok(index
5895        .entries
5896        .into_iter()
5897        .filter(index_entry_skip_worktree)
5898        .map(|entry| entry.path.into_bytes())
5899        .collect())
5900}
5901
5902pub fn restore_worktree_paths(
5903    worktree_root: impl AsRef<Path>,
5904    git_dir: impl AsRef<Path>,
5905    format: ObjectFormat,
5906    paths: &[PathBuf],
5907) -> Result<RestoreResult> {
5908    restore_worktree_paths_inner(
5909        worktree_root.as_ref(),
5910        git_dir.as_ref(),
5911        format,
5912        paths,
5913        None,
5914    )
5915}
5916
5917/// Like [`restore_worktree_paths`], applying the smudge-side content filters
5918/// (CRLF / ident / filter drivers) the way a checkout writes blobs.
5919pub fn restore_worktree_paths_filtered(
5920    worktree_root: impl AsRef<Path>,
5921    git_dir: impl AsRef<Path>,
5922    format: ObjectFormat,
5923    paths: &[PathBuf],
5924    config: &GitConfig,
5925) -> Result<RestoreResult> {
5926    restore_worktree_paths_inner(
5927        worktree_root.as_ref(),
5928        git_dir.as_ref(),
5929        format,
5930        paths,
5931        Some(config),
5932    )
5933}
5934
5935fn restore_worktree_paths_inner(
5936    worktree_root: &Path,
5937    git_dir: &Path,
5938    format: ObjectFormat,
5939    paths: &[PathBuf],
5940    smudge_config: Option<&GitConfig>,
5941) -> Result<RestoreResult> {
5942    let index_path = repository_index_path(git_dir);
5943    if !index_path.exists() {
5944        return Err(GitError::Exit(1));
5945    }
5946    let index = Index::parse(&fs::read(index_path)?, format)?;
5947    let db = FileObjectDatabase::from_git_dir(git_dir, format);
5948    let mut restored = BTreeSet::new();
5949    for path in paths {
5950        let absolute = if path.is_absolute() {
5951            path.clone()
5952        } else {
5953            worktree_root.join(path)
5954        };
5955        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
5956            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
5957        })?;
5958        let git_path = git_path_bytes(relative)?;
5959        let recursive = path == Path::new(".")
5960            || path.to_string_lossy().ends_with('/')
5961            || absolute.is_dir()
5962            || index_has_entry_under(&index.entries, &git_path);
5963        let mut matched = false;
5964        for entry in &index.entries {
5965            if entry.path.as_bytes() == git_path.as_slice()
5966                || (recursive && index_entry_is_under_path(entry.path.as_bytes(), &git_path))
5967            {
5968                restore_index_entry(worktree_root, git_dir, format, &db, entry, smudge_config)?;
5969                restored.insert(entry.path.clone());
5970                matched = true;
5971            }
5972        }
5973        if !matched {
5974            eprintln!(
5975                "error: pathspec '{}' did not match any file(s) known to git",
5976                path.display()
5977            );
5978            return Err(GitError::Exit(1));
5979        }
5980    }
5981    Ok(RestoreResult {
5982        restored: restored.len(),
5983    })
5984}
5985
5986pub fn restore_index_paths_from_head(
5987    worktree_root: impl AsRef<Path>,
5988    git_dir: impl AsRef<Path>,
5989    format: ObjectFormat,
5990    paths: &[PathBuf],
5991) -> Result<RestoreResult> {
5992    let worktree_root = worktree_root.as_ref();
5993    let git_dir = git_dir.as_ref();
5994    let index_path = repository_index_path(git_dir);
5995    let index = if index_path.exists() {
5996        Index::parse(&fs::read(&index_path)?, format)?
5997    } else {
5998        Index {
5999            version: 2,
6000            entries: Vec::new(),
6001            extensions: Vec::new(),
6002            checksum: None,
6003        }
6004    };
6005    let db = FileObjectDatabase::from_git_dir(git_dir, format);
6006    let head_entries = head_tree_entries(git_dir, format, &db)?;
6007    restore_index_paths_from_entries(
6008        worktree_root,
6009        git_dir,
6010        format,
6011        &db,
6012        index,
6013        &head_entries,
6014        paths,
6015    )
6016}
6017
6018pub fn restore_index_paths_from_tree(
6019    worktree_root: impl AsRef<Path>,
6020    git_dir: impl AsRef<Path>,
6021    format: ObjectFormat,
6022    tree_oid: &ObjectId,
6023    paths: &[PathBuf],
6024) -> Result<RestoreResult> {
6025    let worktree_root = worktree_root.as_ref();
6026    let git_dir = git_dir.as_ref();
6027    let index_path = repository_index_path(git_dir);
6028    let index = if index_path.exists() {
6029        Index::parse(&fs::read(&index_path)?, format)?
6030    } else {
6031        Index {
6032            version: 2,
6033            entries: Vec::new(),
6034            extensions: Vec::new(),
6035            checksum: None,
6036        }
6037    };
6038    let db = FileObjectDatabase::from_git_dir(git_dir, format);
6039    let source_entries = tree_entries(&db, format, tree_oid)?;
6040    restore_index_paths_from_entries(
6041        worktree_root,
6042        git_dir,
6043        format,
6044        &db,
6045        index,
6046        &source_entries,
6047        paths,
6048    )
6049}
6050
6051fn restore_index_paths_from_entries(
6052    worktree_root: &Path,
6053    git_dir: &Path,
6054    format: ObjectFormat,
6055    db: &FileObjectDatabase,
6056    index: Index,
6057    source_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
6058    paths: &[PathBuf],
6059) -> Result<RestoreResult> {
6060    let mut index_entries = index
6061        .entries
6062        .into_iter()
6063        .map(|entry| (entry.path.as_bytes().to_vec(), entry))
6064        .collect::<BTreeMap<_, _>>();
6065    let mut restored = BTreeSet::new();
6066    for path in paths {
6067        let absolute = if path.is_absolute() {
6068            path.clone()
6069        } else {
6070            worktree_root.join(path)
6071        };
6072        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
6073            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
6074        })?;
6075        let git_path = git_path_bytes(relative)?;
6076        let recursive = path == Path::new(".")
6077            || path.to_string_lossy().ends_with('/')
6078            || absolute.is_dir()
6079            || index_entries
6080                .keys()
6081                .any(|entry| index_entry_is_under_path(entry, &git_path))
6082            || source_entries
6083                .keys()
6084                .any(|entry| index_entry_is_under_path(entry, &git_path));
6085        let mut matched_paths = BTreeSet::new();
6086        for path in index_entries.keys().chain(source_entries.keys()) {
6087            if path.as_slice() == git_path.as_slice()
6088                || (recursive && index_entry_is_under_path(path, &git_path))
6089            {
6090                matched_paths.insert(path.clone());
6091            }
6092        }
6093        if matched_paths.is_empty() {
6094            eprintln!(
6095                "error: pathspec '{}' did not match any file(s) known to git",
6096                path.display()
6097            );
6098            return Err(GitError::Exit(1));
6099        }
6100        for path in matched_paths {
6101            if let Some(entry) = source_entries.get(&path) {
6102                index_entries.insert(
6103                    path.clone(),
6104                    restored_head_index_entry(worktree_root, db, &path, entry)?,
6105                );
6106            } else {
6107                index_entries.remove(&path);
6108            }
6109            restored.insert(path);
6110        }
6111    }
6112    let mut entries = index_entries.into_values().collect::<Vec<_>>();
6113    entries.sort_by(|left, right| left.path.cmp(&right.path));
6114    fs::write(
6115        repository_index_path(git_dir),
6116        Index {
6117            version: 2,
6118            entries,
6119            extensions: Vec::new(),
6120            checksum: None,
6121        }
6122        .write(format)?,
6123    )?;
6124    Ok(RestoreResult {
6125        restored: restored.len(),
6126    })
6127}
6128
6129pub fn restore_index_and_worktree_paths_from_head(
6130    worktree_root: impl AsRef<Path>,
6131    git_dir: impl AsRef<Path>,
6132    format: ObjectFormat,
6133    paths: &[PathBuf],
6134) -> Result<RestoreResult> {
6135    let worktree_root = worktree_root.as_ref();
6136    let git_dir = git_dir.as_ref();
6137    let index_path = repository_index_path(git_dir);
6138    let index = if index_path.exists() {
6139        Index::parse(&fs::read(&index_path)?, format)?
6140    } else {
6141        Index {
6142            version: 2,
6143            entries: Vec::new(),
6144            extensions: Vec::new(),
6145            checksum: None,
6146        }
6147    };
6148    let db = FileObjectDatabase::from_git_dir(git_dir, format);
6149    let head_entries = head_tree_entries(git_dir, format, &db)?;
6150    restore_index_and_worktree_paths_from_entries(
6151        worktree_root,
6152        git_dir,
6153        format,
6154        &db,
6155        index,
6156        &head_entries,
6157        paths,
6158    )
6159}
6160
6161pub fn restore_index_and_worktree_paths_from_tree(
6162    worktree_root: impl AsRef<Path>,
6163    git_dir: impl AsRef<Path>,
6164    format: ObjectFormat,
6165    tree_oid: &ObjectId,
6166    paths: &[PathBuf],
6167) -> Result<RestoreResult> {
6168    let worktree_root = worktree_root.as_ref();
6169    let git_dir = git_dir.as_ref();
6170    let index_path = repository_index_path(git_dir);
6171    let index = if index_path.exists() {
6172        Index::parse(&fs::read(&index_path)?, format)?
6173    } else {
6174        Index {
6175            version: 2,
6176            entries: Vec::new(),
6177            extensions: Vec::new(),
6178            checksum: None,
6179        }
6180    };
6181    let db = FileObjectDatabase::from_git_dir(git_dir, format);
6182    let source_entries = tree_entries(&db, format, tree_oid)?;
6183    restore_index_and_worktree_paths_from_entries(
6184        worktree_root,
6185        git_dir,
6186        format,
6187        &db,
6188        index,
6189        &source_entries,
6190        paths,
6191    )
6192}
6193
6194fn restore_index_and_worktree_paths_from_entries(
6195    worktree_root: &Path,
6196    git_dir: &Path,
6197    format: ObjectFormat,
6198    db: &FileObjectDatabase,
6199    index: Index,
6200    source_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
6201    paths: &[PathBuf],
6202) -> Result<RestoreResult> {
6203    let mut index_entries = index
6204        .entries
6205        .into_iter()
6206        .map(|entry| (entry.path.as_bytes().to_vec(), entry))
6207        .collect::<BTreeMap<_, _>>();
6208    let mut restored = BTreeSet::new();
6209    for path in paths {
6210        let absolute = if path.is_absolute() {
6211            path.clone()
6212        } else {
6213            worktree_root.join(path)
6214        };
6215        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
6216            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
6217        })?;
6218        let git_path = git_path_bytes(relative)?;
6219        let recursive = path == Path::new(".")
6220            || path.to_string_lossy().ends_with('/')
6221            || absolute.is_dir()
6222            || index_entries
6223                .keys()
6224                .any(|entry| index_entry_is_under_path(entry, &git_path))
6225            || source_entries
6226                .keys()
6227                .any(|entry| index_entry_is_under_path(entry, &git_path));
6228        let mut matched_paths = BTreeSet::new();
6229        for path in index_entries.keys().chain(source_entries.keys()) {
6230            if path.as_slice() == git_path.as_slice()
6231                || (recursive && index_entry_is_under_path(path, &git_path))
6232            {
6233                matched_paths.insert(path.clone());
6234            }
6235        }
6236        if matched_paths.is_empty() {
6237            eprintln!(
6238                "error: pathspec '{}' did not match any file(s) known to git",
6239                path.display()
6240            );
6241            return Err(GitError::Exit(1));
6242        }
6243        for path in matched_paths {
6244            if let Some(entry) = source_entries.get(&path) {
6245                index_entries.insert(
6246                    path.clone(),
6247                    restore_head_entry_to_worktree_and_index(worktree_root, db, &path, entry)?,
6248                );
6249            } else {
6250                index_entries.remove(&path);
6251                remove_worktree_file(worktree_root, &path)?;
6252            }
6253            restored.insert(path);
6254        }
6255    }
6256    let mut entries = index_entries.into_values().collect::<Vec<_>>();
6257    entries.sort_by(|left, right| left.path.cmp(&right.path));
6258    fs::write(
6259        repository_index_path(git_dir),
6260        Index {
6261            version: 2,
6262            entries,
6263            extensions: Vec::new(),
6264            checksum: None,
6265        }
6266        .write(format)?,
6267    )?;
6268    Ok(RestoreResult {
6269        restored: restored.len(),
6270    })
6271}
6272
6273pub fn reset_index_and_worktree_to_commit(
6274    worktree_root: impl AsRef<Path>,
6275    git_dir: impl AsRef<Path>,
6276    format: ObjectFormat,
6277    commit_oid: &ObjectId,
6278) -> Result<RestoreResult> {
6279    let worktree_root = worktree_root.as_ref();
6280    let git_dir = git_dir.as_ref();
6281    let db = FileObjectDatabase::from_git_dir(git_dir, format);
6282    let commit = read_commit(&db, format, commit_oid)?;
6283    let mut target_entries = BTreeMap::new();
6284    collect_tree_entries(&db, format, &commit.tree, &mut target_entries)?;
6285
6286    for path in read_index_entries(git_dir, format)?.keys() {
6287        if !target_entries.contains_key(path) {
6288            remove_worktree_file(worktree_root, path)?;
6289        }
6290    }
6291
6292    let mut index_entries = Vec::new();
6293    for (path, entry) in &target_entries {
6294        index_entries.push(materialize_tree_entry(&db, worktree_root, path, entry)?);
6295    }
6296    index_entries.sort_by(|left, right| left.path.cmp(&right.path));
6297    fs::write(
6298        repository_index_path(git_dir),
6299        Index {
6300            version: 2,
6301            entries: index_entries,
6302            extensions: Vec::new(),
6303            checksum: None,
6304        }
6305        .write(format)?,
6306    )?;
6307    Ok(RestoreResult {
6308        restored: target_entries.len(),
6309    })
6310}
6311
6312/// Write one target tree entry into the worktree and return its index entry —
6313/// the shared materialization step for every checkout/reset worktree rebuild.
6314///
6315/// Gitlinks (mode 160000) never touch the object database: their oid names a
6316/// commit in the *submodule's* repository, not an object here. Upstream
6317/// (entry.c `write_entry` S_IFGITLINK) just mkdirs the path — an
6318/// already-populated submodule is left untouched (EEXIST is success) — and
6319/// records the oid in the index with a zeroed stat so status re-evaluates the
6320/// gitlink against the embedded repository's HEAD.
6321fn materialize_tree_entry(
6322    db: &FileObjectDatabase,
6323    worktree_root: &Path,
6324    path: &[u8],
6325    entry: &TrackedEntry,
6326) -> Result<IndexEntry> {
6327    if entry.mode == 0o160000 {
6328        let dir_path = worktree_path(worktree_root, path)?;
6329        fs::create_dir_all(&dir_path)?;
6330        return Ok(IndexEntry {
6331            ctime_seconds: 0,
6332            ctime_nanoseconds: 0,
6333            mtime_seconds: 0,
6334            mtime_nanoseconds: 0,
6335            dev: 0,
6336            ino: 0,
6337            mode: entry.mode,
6338            uid: 0,
6339            gid: 0,
6340            size: 0,
6341            oid: entry.oid,
6342            flags: path.len().min(0x0fff) as u16,
6343            flags_extended: 0,
6344            path: BString::from(path),
6345        });
6346    }
6347    let object = read_expected_object(db, &entry.oid, ObjectType::Blob)?;
6348    let file_path = worktree_path(worktree_root, path)?;
6349    if let Some(parent) = file_path.parent() {
6350        fs::create_dir_all(parent)?;
6351    }
6352    fs::write(&file_path, &object.body)?;
6353    let metadata = fs::metadata(&file_path)?;
6354    let mut index_entry = index_entry_from_metadata(path.to_vec(), entry.oid, &metadata);
6355    index_entry.mode = entry.mode;
6356    Ok(index_entry)
6357}
6358
6359/// Materialize a tree object into the index and worktree.
6360pub fn checkout_tree_to_index_and_worktree(
6361    worktree_root: impl AsRef<Path>,
6362    git_dir: impl AsRef<Path>,
6363    format: ObjectFormat,
6364    tree_oid: &ObjectId,
6365) -> Result<RestoreResult> {
6366    let worktree_root = worktree_root.as_ref();
6367    let git_dir = git_dir.as_ref();
6368    let db = FileObjectDatabase::from_git_dir(git_dir, format);
6369    let mut target_entries = BTreeMap::new();
6370    collect_tree_entries(&db, format, tree_oid, &mut target_entries)?;
6371
6372    for path in read_index_entries(git_dir, format)?.keys() {
6373        if !target_entries.contains_key(path) {
6374            remove_worktree_file(worktree_root, path)?;
6375        }
6376    }
6377
6378    let mut index_entries = Vec::new();
6379    for (path, entry) in &target_entries {
6380        index_entries.push(materialize_tree_entry(&db, worktree_root, path, entry)?);
6381    }
6382    index_entries.sort_by(|left, right| left.path.cmp(&right.path));
6383    fs::write(
6384        repository_index_path(git_dir),
6385        Index {
6386            version: 2,
6387            entries: index_entries,
6388            extensions: Vec::new(),
6389            checksum: None,
6390        }
6391        .write(format)?,
6392    )?;
6393    Ok(RestoreResult {
6394        restored: target_entries.len(),
6395    })
6396}
6397
6398pub fn reset_index_to_commit(
6399    worktree_root: impl AsRef<Path>,
6400    git_dir: impl AsRef<Path>,
6401    format: ObjectFormat,
6402    commit_oid: &ObjectId,
6403) -> Result<RestoreResult> {
6404    let worktree_root = worktree_root.as_ref();
6405    let git_dir = git_dir.as_ref();
6406    let db = FileObjectDatabase::from_git_dir(git_dir, format);
6407    let commit = read_commit(&db, format, commit_oid)?;
6408    let mut target_entries = BTreeMap::new();
6409    collect_tree_entries(&db, format, &commit.tree, &mut target_entries)?;
6410    let mut index_entries = Vec::new();
6411    for (path, entry) in &target_entries {
6412        index_entries.push(restored_head_index_entry(worktree_root, &db, path, entry)?);
6413    }
6414    index_entries.sort_by(|left, right| left.path.cmp(&right.path));
6415    fs::write(
6416        repository_index_path(git_dir),
6417        Index {
6418            version: 2,
6419            entries: index_entries,
6420            extensions: Vec::new(),
6421            checksum: None,
6422        }
6423        .write(format)?,
6424    )?;
6425    Ok(RestoreResult {
6426        restored: target_entries.len(),
6427    })
6428}
6429
6430/// Build a fresh in-memory index that mirrors the tree `tree_oid`, the way
6431/// `git read-tree <tree>` does: every blob, symlink, and gitlink leaf (found by
6432/// recursing subtrees) becomes a stage-0 entry carrying the tree mode and oid,
6433/// with a fully zeroed stat (so nothing is treated as stat-clean) and size 0.
6434/// Entries are sorted by path; the index is version 2 with no extensions.
6435///
6436/// This does not touch the worktree or write anything to disk — serialize the
6437/// result with [`Index::write`] (and persist it) when you want to replace
6438/// `.git/index`.
6439pub fn index_from_tree(
6440    db: &FileObjectDatabase,
6441    format: ObjectFormat,
6442    tree_oid: &ObjectId,
6443) -> Result<Index> {
6444    let mut entries: Vec<IndexEntry> = Vec::new();
6445    if *tree_oid != ObjectId::empty_tree(format) {
6446        let mut tree_entries = BTreeMap::new();
6447        collect_tree_entries(db, format, tree_oid, &mut tree_entries)?;
6448        entries.reserve(tree_entries.len());
6449        for (path, entry) in tree_entries {
6450            let name_len = (path.len().min(0x0fff)) as u16;
6451            entries.push(IndexEntry {
6452                ctime_seconds: 0,
6453                ctime_nanoseconds: 0,
6454                mtime_seconds: 0,
6455                mtime_nanoseconds: 0,
6456                dev: 0,
6457                ino: 0,
6458                mode: entry.mode,
6459                uid: 0,
6460                gid: 0,
6461                size: 0,
6462                oid: entry.oid,
6463                flags: name_len,
6464                flags_extended: 0,
6465                path: path.into(),
6466            });
6467        }
6468    }
6469    // git orders index entries by path bytes; BTreeMap already yields that, but
6470    // sort explicitly so the contract holds regardless of how entries arrive.
6471    entries.sort_by(|left, right| left.path.cmp(&right.path));
6472    Ok(Index {
6473        version: 2,
6474        entries,
6475        extensions: Vec::new(),
6476        checksum: None,
6477    })
6478}
6479
6480/// Enforces a [`SparseCheckout`] against the current index and worktree.
6481///
6482/// Every stage-0 index entry is classified with the sparse patterns (see
6483/// [`SparseCheckoutMode`] for the matching semantics):
6484///
6485/// * **In cone**: the skip-worktree bit is cleared and, if the worktree file is
6486///   missing, it is re-materialized from the entry's blob in the object
6487///   database. Existing worktree files are left untouched so local content is
6488///   preserved.
6489/// * **Out of cone**: the skip-worktree bit is set and any existing worktree
6490///   file is removed (empty parent directories are pruned).
6491///
6492/// Conflicted entries (stage != 0) are never given the skip-worktree bit and
6493/// are left alone, matching upstream Git. The index is rewritten in place.
6494pub fn apply_sparse_checkout(
6495    worktree_root: impl AsRef<Path>,
6496    git_dir: impl AsRef<Path>,
6497    format: ObjectFormat,
6498    sparse: &SparseCheckout,
6499) -> Result<ApplySparseResult> {
6500    apply_sparse_checkout_with_mode(
6501        worktree_root,
6502        git_dir,
6503        format,
6504        sparse,
6505        SparseCheckoutMode::Auto,
6506    )
6507}
6508
6509/// Like [`apply_sparse_checkout`] but lets the caller force the pattern
6510/// interpretation instead of auto-detecting it.
6511pub fn apply_sparse_checkout_with_mode(
6512    worktree_root: impl AsRef<Path>,
6513    git_dir: impl AsRef<Path>,
6514    format: ObjectFormat,
6515    sparse: &SparseCheckout,
6516    mode: SparseCheckoutMode,
6517) -> Result<ApplySparseResult> {
6518    let worktree_root = worktree_root.as_ref();
6519    let git_dir = git_dir.as_ref();
6520    let index_path = repository_index_path(git_dir);
6521    let mut index = if index_path.exists() {
6522        Index::parse(&fs::read(&index_path)?, format)?
6523    } else {
6524        return Ok(ApplySparseResult {
6525            materialized: Vec::new(),
6526            skipped: Vec::new(),
6527            not_up_to_date: Vec::new(),
6528        });
6529    };
6530    let matcher = SparseMatcher::new(sparse, mode);
6531    let db = FileObjectDatabase::from_git_dir(git_dir, format);
6532    let mut materialized = Vec::new();
6533    let mut skipped = Vec::new();
6534    let mut not_up_to_date = Vec::new();
6535    for entry in &mut index.entries {
6536        // Never touch conflicted entries.
6537        if index_entry_stage(entry) != 0 {
6538            continue;
6539        }
6540        if matcher.includes_file(entry.path.as_bytes()) {
6541            clear_skip_worktree(entry);
6542            let file_path = worktree_path(worktree_root, entry.path.as_bytes())?;
6543            if !file_path.exists() {
6544                materialize_index_entry_file(&db, &file_path, entry)?;
6545            }
6546            materialized.push(entry.path.as_bytes().to_vec());
6547        } else {
6548            // The path is out of cone, so its worktree file should be removed and
6549            // the entry marked skip-worktree. But git refuses to delete a file
6550            // that is *not up to date* with the index (e.g. one that reappeared in
6551            // the worktree after the path was already sparse): it leaves the file,
6552            // leaves the skip-worktree bit clear, and reports the path in its "not
6553            // up to date" warning. Mirror that to avoid silent data loss.
6554            let file_path = worktree_path(worktree_root, entry.path.as_bytes())?;
6555            match fs::symlink_metadata(&file_path) {
6556                Ok(metadata) if !worktree_entry_is_uptodate(entry, &metadata) => {
6557                    clear_skip_worktree(entry);
6558                    not_up_to_date.push(entry.path.as_bytes().to_vec());
6559                }
6560                _ => {
6561                    set_skip_worktree(entry);
6562                    remove_worktree_file(worktree_root, entry.path.as_bytes())?;
6563                    skipped.push(entry.path.as_bytes().to_vec());
6564                }
6565            }
6566        }
6567    }
6568    not_up_to_date.sort();
6569    normalize_index_version_for_extended_flags(&mut index);
6570    fs::write(index_path, index.write(format)?)?;
6571    Ok(ApplySparseResult {
6572        materialized,
6573        skipped,
6574        not_up_to_date,
6575    })
6576}
6577
6578/// Whether the worktree file described by `metadata` is up to date with `entry`'s
6579/// cached index stat, using the size + mtime heuristic at the core of git's
6580/// `ie_match_stat`. A freshly-checked-out (clean) file matches; a file that was
6581/// deleted and later recreated — as happens when an out-of-cone path reappears in
6582/// the worktree — gets a fresh mtime and so reads as modified, which is exactly
6583/// the state git declines to overwrite during a sparse update.
6584fn worktree_entry_is_uptodate(entry: &IndexEntry, metadata: &fs::Metadata) -> bool {
6585    if u64::from(entry.size) != metadata.len() {
6586        return false;
6587    }
6588    let Some((mtime_seconds, mtime_nanoseconds)) = file_mtime_parts(metadata) else {
6589        // Without a usable mtime we cannot prove the file is clean; treat it as
6590        // not up to date so a present file is never silently discarded.
6591        return false;
6592    };
6593    u64::from(entry.mtime_seconds) == mtime_seconds
6594        && u64::from(entry.mtime_nanoseconds) == mtime_nanoseconds
6595}
6596
6597/// The file's modification time split into whole seconds and the sub-second
6598/// nanosecond remainder, matching how git stores `mtime` in the index.
6599fn file_mtime_parts(metadata: &fs::Metadata) -> Option<(u64, u64)> {
6600    let modified = metadata.modified().ok()?;
6601    let duration = modified.duration_since(UNIX_EPOCH).ok()?;
6602    Some((duration.as_secs(), u64::from(duration.subsec_nanos())))
6603}
6604
6605/// Write a git metadata file through a sibling `.lock` file and atomic rename.
6606///
6607/// This helper is intended for small repository/worktree metadata files such as
6608/// `HEAD`, `config.worktree`, or state files under `.git/`. It deliberately does
6609/// not try to replace object or pack writers, which have their own durability
6610/// and naming rules.
6611pub fn write_metadata_file_atomic(
6612    path: impl AsRef<Path>,
6613    bytes: &[u8],
6614    options: AtomicMetadataWriteOptions,
6615) -> Result<AtomicMetadataWriteResult> {
6616    let path = path.as_ref();
6617    let parent = path.parent().ok_or_else(|| {
6618        GitError::InvalidPath(format!("metadata path has no parent: {}", path.display()))
6619    })?;
6620    if !parent.as_os_str().is_empty() {
6621        fs::create_dir_all(parent)?;
6622    }
6623    let lock_path = metadata_lock_path(path)?;
6624    let mut lock = match fs::OpenOptions::new()
6625        .write(true)
6626        .create_new(true)
6627        .open(&lock_path)
6628    {
6629        Ok(lock) => lock,
6630        Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => {
6631            return Err(GitError::Transaction(format!(
6632                "metadata lock already exists: {}",
6633                lock_path.display()
6634            )));
6635        }
6636        Err(err) => return Err(err.into()),
6637    };
6638    if let Err(err) = lock.write_all(bytes) {
6639        let _ = fs::remove_file(&lock_path);
6640        return Err(err.into());
6641    }
6642    if options.fsync_file
6643        && let Err(err) = lock.sync_all()
6644    {
6645        let _ = fs::remove_file(&lock_path);
6646        return Err(err.into());
6647    }
6648    drop(lock);
6649    if let Err(err) = fs::rename(&lock_path, path) {
6650        let _ = fs::remove_file(&lock_path);
6651        return Err(err.into());
6652    }
6653    if options.fsync_dir
6654        && let Ok(dir) = fs::File::open(parent)
6655    {
6656        dir.sync_all()?;
6657    }
6658    let metadata = fs::metadata(path)?;
6659    Ok(AtomicMetadataWriteResult {
6660        path: path.to_path_buf(),
6661        len: metadata.len(),
6662        mtime: file_mtime_parts(&metadata),
6663    })
6664}
6665
6666fn metadata_lock_path(path: &Path) -> Result<PathBuf> {
6667    let file_name = path.file_name().ok_or_else(|| {
6668        GitError::InvalidPath(format!("metadata path has no filename: {}", path.display()))
6669    })?;
6670    let mut lock_name = file_name.to_os_string();
6671    lock_name.push(".lock");
6672    Ok(path.with_file_name(lock_name))
6673}
6674
6675/// Checks out `target` like [`checkout_detached`], but materializes the
6676/// worktree through the supplied [`SparseCheckout`]: out-of-cone paths are not
6677/// written, get their skip-worktree bit set, and have any stale worktree file
6678/// removed. Existing public checkout entry points are unchanged; this is an
6679/// additive sparse-aware variant.
6680///
6681/// The pattern interpretation is auto-detected ([`SparseCheckoutMode::Auto`]);
6682/// to reconcile an existing checkout under an explicit mode use
6683/// [`apply_sparse_checkout_with_mode`].
6684pub fn checkout_detached_sparse(
6685    worktree_root: impl AsRef<Path>,
6686    git_dir: impl AsRef<Path>,
6687    format: ObjectFormat,
6688    target: &ObjectId,
6689    committer: Vec<u8>,
6690    message: Vec<u8>,
6691    sparse: &SparseCheckout,
6692) -> Result<CheckoutResult> {
6693    let worktree_root = worktree_root.as_ref();
6694    let git_dir = git_dir.as_ref();
6695    let files = checkout_commit_to_index_and_worktree_sparse(
6696        worktree_root,
6697        git_dir,
6698        format,
6699        target,
6700        Some((sparse, SparseCheckoutMode::Auto)),
6701    )?;
6702    let refs = FileRefStore::new(git_dir, format);
6703    let zero = ObjectId::null(format);
6704    let mut tx = refs.transaction();
6705    tx.update(RefUpdate {
6706        name: "HEAD".into(),
6707        expected: None,
6708        new: RefTarget::Direct(*target),
6709        reflog: Some(ReflogEntry {
6710            old_oid: zero,
6711            new_oid: *target,
6712            committer,
6713            message,
6714        }),
6715    });
6716    tx.commit()?;
6717    Ok(CheckoutResult {
6718        branch: target.to_string(),
6719        oid: *target,
6720        files,
6721    })
6722}
6723
6724fn materialize_index_entry_file(
6725    db: &FileObjectDatabase,
6726    file_path: &Path,
6727    entry: &IndexEntry,
6728) -> Result<()> {
6729    let object = read_expected_object(db, &entry.oid, ObjectType::Blob)?;
6730    if let Some(parent) = file_path.parent() {
6731        fs::create_dir_all(parent)?;
6732    }
6733    fs::write(file_path, &object.body)?;
6734    Ok(())
6735}
6736
6737fn set_skip_worktree(entry: &mut IndexEntry) {
6738    entry.flags |= INDEX_FLAG_EXTENDED;
6739    entry.flags_extended |= INDEX_EXTENDED_FLAG_SKIP_WORKTREE;
6740}
6741
6742fn clear_skip_worktree(entry: &mut IndexEntry) {
6743    entry.flags_extended &= !INDEX_EXTENDED_FLAG_SKIP_WORKTREE;
6744    if entry.flags_extended == 0 {
6745        entry.flags &= !INDEX_FLAG_EXTENDED;
6746    }
6747}
6748
6749pub fn restore_worktree_paths_from_head(
6750    worktree_root: impl AsRef<Path>,
6751    git_dir: impl AsRef<Path>,
6752    format: ObjectFormat,
6753    paths: &[PathBuf],
6754) -> Result<RestoreResult> {
6755    let worktree_root = worktree_root.as_ref();
6756    let git_dir = git_dir.as_ref();
6757    let index_path = repository_index_path(git_dir);
6758    let index = if index_path.exists() {
6759        Index::parse(&fs::read(&index_path)?, format)?
6760    } else {
6761        Index {
6762            version: 2,
6763            entries: Vec::new(),
6764            extensions: Vec::new(),
6765            checksum: None,
6766        }
6767    };
6768    let db = FileObjectDatabase::from_git_dir(git_dir, format);
6769    let head_entries = head_tree_entries(git_dir, format, &db)?;
6770    restore_worktree_paths_from_entries(worktree_root, &db, index, &head_entries, paths)
6771}
6772
6773pub fn restore_worktree_paths_from_tree(
6774    worktree_root: impl AsRef<Path>,
6775    git_dir: impl AsRef<Path>,
6776    format: ObjectFormat,
6777    tree_oid: &ObjectId,
6778    paths: &[PathBuf],
6779) -> Result<RestoreResult> {
6780    let worktree_root = worktree_root.as_ref();
6781    let git_dir = git_dir.as_ref();
6782    let index_path = repository_index_path(git_dir);
6783    let index = if index_path.exists() {
6784        Index::parse(&fs::read(&index_path)?, format)?
6785    } else {
6786        Index {
6787            version: 2,
6788            entries: Vec::new(),
6789            extensions: Vec::new(),
6790            checksum: None,
6791        }
6792    };
6793    let db = FileObjectDatabase::from_git_dir(git_dir, format);
6794    let source_entries = tree_entries(&db, format, tree_oid)?;
6795    restore_worktree_paths_from_entries(worktree_root, &db, index, &source_entries, paths)
6796}
6797
6798fn restore_worktree_paths_from_entries(
6799    worktree_root: &Path,
6800    db: &FileObjectDatabase,
6801    index: Index,
6802    source_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
6803    paths: &[PathBuf],
6804) -> Result<RestoreResult> {
6805    let index_entries = index
6806        .entries
6807        .into_iter()
6808        .map(|entry| entry.path.into_bytes())
6809        .collect::<BTreeSet<_>>();
6810    let mut restored = BTreeSet::new();
6811    for path in paths {
6812        let absolute = if path.is_absolute() {
6813            path.clone()
6814        } else {
6815            worktree_root.join(path)
6816        };
6817        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
6818            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
6819        })?;
6820        let git_path = git_path_bytes(relative)?;
6821        let recursive = path == Path::new(".")
6822            || path.to_string_lossy().ends_with('/')
6823            || absolute.is_dir()
6824            || index_entries
6825                .iter()
6826                .any(|entry| index_entry_is_under_path(entry, &git_path))
6827            || source_entries
6828                .keys()
6829                .any(|entry| index_entry_is_under_path(entry, &git_path));
6830        let mut matched_paths = BTreeSet::new();
6831        for path in index_entries.iter().chain(source_entries.keys()) {
6832            if path.as_slice() == git_path.as_slice()
6833                || (recursive && index_entry_is_under_path(path, &git_path))
6834            {
6835                matched_paths.insert(path.clone());
6836            }
6837        }
6838        if matched_paths.is_empty() {
6839            eprintln!(
6840                "error: pathspec '{}' did not match any file(s) known to git",
6841                path.display()
6842            );
6843            return Err(GitError::Exit(1));
6844        }
6845        for path in matched_paths {
6846            if let Some(entry) = source_entries.get(&path) {
6847                restore_head_entry_to_worktree(worktree_root, db, &path, entry)?;
6848            } else {
6849                remove_worktree_file(worktree_root, &path)?;
6850            }
6851            restored.insert(path);
6852        }
6853    }
6854    Ok(RestoreResult {
6855        restored: restored.len(),
6856    })
6857}
6858
6859pub fn remove_index_and_worktree_paths(
6860    worktree_root: impl AsRef<Path>,
6861    git_dir: impl AsRef<Path>,
6862    format: ObjectFormat,
6863    paths: &[PathBuf],
6864    options: RemoveOptions,
6865) -> Result<RemoveResult> {
6866    let worktree_root = worktree_root.as_ref();
6867    let git_dir = git_dir.as_ref();
6868    let index_path = repository_index_path(git_dir);
6869    let index = if index_path.exists() {
6870        Index::parse(&fs::read(&index_path)?, format)?
6871    } else {
6872        Index {
6873            version: 2,
6874            entries: Vec::new(),
6875            extensions: Vec::new(),
6876            checksum: None,
6877        }
6878    };
6879    let db = FileObjectDatabase::from_git_dir(git_dir, format);
6880    let head_entries = head_tree_entries(git_dir, format, &db)?;
6881    let mut index_entries = index
6882        .entries
6883        .into_iter()
6884        .map(|entry| (entry.path.as_bytes().to_vec(), entry))
6885        .collect::<BTreeMap<_, _>>();
6886    let mut selected = BTreeSet::new();
6887    for path in paths {
6888        let absolute = if path.is_absolute() {
6889            path.clone()
6890        } else {
6891            worktree_root.join(path)
6892        };
6893        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
6894            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
6895        })?;
6896        let git_path = git_path_bytes(relative)?;
6897        if index_entries.contains_key(&git_path) {
6898            selected.insert(git_path);
6899            continue;
6900        }
6901        // A wildcard pathspec (e.g. `git rm "*"` or `git rm "dir/*.c"`) matches
6902        // index entries by git's pathspec matcher rather than by literal path or
6903        // directory prefix. Try the glob match first when the spec contains
6904        // wildcard metacharacters; a glob match removes the entries directly
6905        // (no `-r` needed — the pathspec already names the files).
6906        if pathspec_is_glob(&git_path) {
6907            let glob_matched = index_entries
6908                .keys()
6909                .filter(|entry| {
6910                    pathspec_item_matches(&git_path, entry, PathspecMatchMagic::default())
6911                })
6912                .cloned()
6913                .collect::<Vec<_>>();
6914            if !glob_matched.is_empty() {
6915                selected.extend(glob_matched);
6916                continue;
6917            }
6918            if options.ignore_unmatch {
6919                continue;
6920            }
6921            eprintln!(
6922                "fatal: pathspec '{}' did not match any files",
6923                String::from_utf8_lossy(&git_path)
6924            );
6925            return Err(GitError::Exit(128));
6926        }
6927        let matched = index_entries
6928            .keys()
6929            .filter(|entry| index_entry_is_under_path(entry, &git_path))
6930            .cloned()
6931            .collect::<Vec<_>>();
6932        if matched.is_empty() {
6933            if options.ignore_unmatch {
6934                continue;
6935            }
6936            eprintln!(
6937                "fatal: pathspec '{}' did not match any files",
6938                String::from_utf8_lossy(&git_path)
6939            );
6940            return Err(GitError::Exit(128));
6941        }
6942        if !options.recursive {
6943            eprintln!(
6944                "fatal: not removing '{}' recursively without -r",
6945                String::from_utf8_lossy(&git_path)
6946            );
6947            return Err(GitError::Exit(128));
6948        }
6949        selected.extend(matched);
6950    }
6951    if !options.cached && !options.force {
6952        let config = sley_config::read_repo_config(git_dir, None).unwrap_or_default();
6953        for path in &selected {
6954            let Some(index_entry) = index_entries.get(path) else {
6955                continue;
6956            };
6957            match head_entries.get(path) {
6958                Some(head_entry)
6959                    if head_entry.oid == index_entry.oid && head_entry.mode == index_entry.mode => {
6960                }
6961                _ => {
6962                    eprintln!("error: the following file has changes staged in the index:");
6963                    eprintln!("    {}", String::from_utf8_lossy(path));
6964                    eprintln!("(use --cached to keep the file, or -f to force removal)");
6965                    return Err(GitError::Exit(1));
6966                }
6967            }
6968            let worktree_file = worktree_path(worktree_root, path)?;
6969            if worktree_file.exists() {
6970                let object = read_expected_object(&db, &index_entry.oid, ObjectType::Blob)?;
6971                let worktree_bytes = apply_clean_filter(
6972                    worktree_root,
6973                    git_dir,
6974                    &config,
6975                    path,
6976                    &fs::read(&worktree_file)?,
6977                )?;
6978                if worktree_bytes != object.body {
6979                    eprintln!("error: the following file has local modifications:");
6980                    eprintln!("    {}", String::from_utf8_lossy(path));
6981                    eprintln!("(use --cached to keep the file, or -f to force removal)");
6982                    return Err(GitError::Exit(1));
6983                }
6984            }
6985        }
6986    }
6987    for path in &selected {
6988        if options.dry_run {
6989            continue;
6990        }
6991        if !options.cached {
6992            remove_worktree_file(worktree_root, path)?;
6993        }
6994        index_entries.remove(path);
6995    }
6996    if options.dry_run {
6997        return Ok(RemoveResult {
6998            removed: selected.into_iter().collect(),
6999        });
7000    }
7001    let entries = index_entries.into_values().collect::<Vec<_>>();
7002    fs::write(
7003        index_path,
7004        Index {
7005            version: 2,
7006            entries,
7007            extensions: Vec::new(),
7008            checksum: None,
7009        }
7010        .write(format)?,
7011    )?;
7012    Ok(RemoveResult {
7013        removed: selected.into_iter().collect(),
7014    })
7015}
7016
7017pub fn move_index_and_worktree_path(
7018    worktree_root: impl AsRef<Path>,
7019    git_dir: impl AsRef<Path>,
7020    format: ObjectFormat,
7021    source: &Path,
7022    destination: &Path,
7023    options: MoveOptions,
7024) -> Result<MoveResult> {
7025    let worktree_root = worktree_root.as_ref();
7026    let git_dir = git_dir.as_ref();
7027    let index_path = repository_index_path(git_dir);
7028    let mut index = if index_path.exists() {
7029        Index::parse(&fs::read(&index_path)?, format)?
7030    } else {
7031        Index {
7032            version: 2,
7033            entries: Vec::new(),
7034            extensions: Vec::new(),
7035            checksum: None,
7036        }
7037    };
7038    let source_absolute = if source.is_absolute() {
7039        source.to_path_buf()
7040    } else {
7041        worktree_root.join(source)
7042    };
7043    let destination_absolute = if destination.is_absolute() {
7044        destination.to_path_buf()
7045    } else {
7046        worktree_root.join(destination)
7047    };
7048    let destination_absolute = if destination_absolute.is_dir() {
7049        let Some(file_name) = source_absolute.file_name() else {
7050            return Err(GitError::InvalidPath(format!(
7051                "invalid source path {}",
7052                source.display()
7053            )));
7054        };
7055        destination_absolute.join(file_name)
7056    } else {
7057        destination_absolute
7058    };
7059    let source_relative = source_absolute.strip_prefix(worktree_root).map_err(|_| {
7060        GitError::InvalidPath(format!("path {} is outside worktree", source.display()))
7061    })?;
7062    let destination_relative = destination_absolute
7063        .strip_prefix(worktree_root)
7064        .map_err(|_| {
7065            GitError::InvalidPath(format!(
7066                "path {} is outside worktree",
7067                destination.display()
7068            ))
7069        })?;
7070    let source_path = git_path_bytes(source_relative)?;
7071    let destination_path = git_path_bytes(destination_relative)?;
7072    let destination_has_trailing_separator = path_has_trailing_separator(&destination_absolute);
7073    if destination_has_trailing_separator && !destination_absolute.is_dir() {
7074        if options.skip_errors {
7075            return Ok(MoveResult {
7076                source: source_path,
7077                destination: destination_path,
7078                skipped: true,
7079                fatal: None,
7080                details: Vec::new(),
7081            });
7082        }
7083        let mut destination = String::from_utf8_lossy(&destination_path).into_owned();
7084        destination.push('/');
7085        if options.dry_run {
7086            let fatal = format!(
7087                "fatal: destination directory does not exist, source={}, destination={destination}",
7088                String::from_utf8_lossy(&source_path),
7089            );
7090            return Ok(MoveResult {
7091                source: source_path,
7092                destination: destination.clone().into_bytes(),
7093                skipped: false,
7094                fatal: Some(fatal),
7095                details: Vec::new(),
7096            });
7097        }
7098        eprintln!(
7099            "fatal: destination directory does not exist, source={}, destination={destination}",
7100            String::from_utf8_lossy(&source_path),
7101        );
7102        return Err(GitError::Exit(128));
7103    }
7104    if destination_absolute.exists() {
7105        if !options.force {
7106            if options.skip_errors {
7107                return Ok(MoveResult {
7108                    source: source_path,
7109                    destination: destination_path,
7110                    skipped: true,
7111                    fatal: None,
7112                    details: Vec::new(),
7113                });
7114            }
7115            if options.dry_run {
7116                let fatal = format!(
7117                    "fatal: destination exists, source={}, destination={}",
7118                    String::from_utf8_lossy(&source_path),
7119                    String::from_utf8_lossy(&destination_path)
7120                );
7121                return Ok(MoveResult {
7122                    source: source_path,
7123                    destination: destination_path,
7124                    skipped: false,
7125                    fatal: Some(fatal),
7126                    details: Vec::new(),
7127                });
7128            }
7129            eprintln!(
7130                "fatal: destination exists, source={}, destination={}",
7131                String::from_utf8_lossy(&source_path),
7132                String::from_utf8_lossy(&destination_path)
7133            );
7134            return Err(GitError::Exit(128));
7135        }
7136        if !options.dry_run && destination_absolute.is_dir() {
7137            fs::remove_dir_all(&destination_absolute)?;
7138        } else if !options.dry_run {
7139            fs::remove_file(&destination_absolute)?;
7140        }
7141    }
7142    let directory_prefix = {
7143        let mut prefix = source_path.clone();
7144        prefix.push(b'/');
7145        prefix
7146    };
7147    let directory_entries: Vec<_> = index
7148        .entries
7149        .iter()
7150        .filter(|entry| entry.path.as_bytes().starts_with(&directory_prefix))
7151        .cloned()
7152        .collect();
7153    if !directory_entries.is_empty() {
7154        let details: Vec<_> = directory_entries
7155            .iter()
7156            .map(|entry| {
7157                let suffix = &entry.path.as_bytes()[source_path.len()..];
7158                let mut destination = destination_path.clone();
7159                destination.extend_from_slice(suffix);
7160                MoveDetail {
7161                    source: entry.path.as_bytes().to_vec(),
7162                    destination,
7163                    skipped: false,
7164                }
7165            })
7166            .collect();
7167        if options.dry_run {
7168            return Ok(MoveResult {
7169                source: source_path,
7170                destination: destination_path,
7171                skipped: false,
7172                fatal: None,
7173                details,
7174            });
7175        }
7176        fs::rename(&source_absolute, &destination_absolute)?;
7177        let moved_paths: Vec<_> = details
7178            .iter()
7179            .map(|detail| detail.destination.clone())
7180            .collect();
7181        index.entries.retain(|entry| {
7182            !entry.path.as_bytes().starts_with(&directory_prefix)
7183                && !moved_paths
7184                    .iter()
7185                    .any(|m| m.as_slice() == entry.path.as_bytes())
7186        });
7187        for (source_entry, detail) in directory_entries.into_iter().zip(details.iter()) {
7188            let relative_path = git_path_to_relative_path(&detail.destination)?;
7189            let metadata = fs::metadata(worktree_root.join(relative_path))?;
7190            let mut destination_entry =
7191                index_entry_from_metadata(detail.destination.clone(), source_entry.oid, &metadata);
7192            destination_entry.mode = source_entry.mode;
7193            index.entries.push(destination_entry);
7194        }
7195        index
7196            .entries
7197            .sort_by(|left, right| left.path.cmp(&right.path));
7198        index.extensions.clear();
7199        fs::write(index_path, index.write(format)?)?;
7200        return Ok(MoveResult {
7201            source: source_path,
7202            destination: destination_path,
7203            skipped: false,
7204            fatal: None,
7205            details,
7206        });
7207    }
7208
7209    let Some(position) = index
7210        .entries
7211        .iter()
7212        .position(|entry| entry.path == source_path)
7213    else {
7214        if options.skip_errors {
7215            return Ok(MoveResult {
7216                source: source_path,
7217                destination: destination_path,
7218                skipped: true,
7219                fatal: None,
7220                details: Vec::new(),
7221            });
7222        }
7223        let source_kind = if source_absolute.exists() {
7224            "not under version control"
7225        } else {
7226            "bad source"
7227        };
7228        if options.dry_run {
7229            let fatal = format!(
7230                "fatal: {source_kind}, source={}, destination={}",
7231                String::from_utf8_lossy(&source_path),
7232                String::from_utf8_lossy(&destination_path)
7233            );
7234            return Ok(MoveResult {
7235                source: source_path,
7236                destination: destination_path,
7237                skipped: false,
7238                fatal: Some(fatal),
7239                details: Vec::new(),
7240            });
7241        }
7242        eprintln!(
7243            "fatal: {source_kind}, source={}, destination={}",
7244            String::from_utf8_lossy(&source_path),
7245            String::from_utf8_lossy(&destination_path)
7246        );
7247        return Err(GitError::Exit(128));
7248    };
7249    if options.dry_run {
7250        return Ok(MoveResult {
7251            source: source_path,
7252            destination: destination_path,
7253            skipped: false,
7254            fatal: None,
7255            details: Vec::new(),
7256        });
7257    }
7258    if let Some(parent) = destination_absolute.parent()
7259        && !parent.exists()
7260    {
7261        if options.skip_errors {
7262            return Ok(MoveResult {
7263                source: source_path,
7264                destination: destination_path,
7265                skipped: true,
7266                fatal: None,
7267                details: Vec::new(),
7268            });
7269        }
7270        eprintln!(
7271            "fatal: renaming '{}' failed: No such file or directory",
7272            String::from_utf8_lossy(&source_path)
7273        );
7274        return Err(GitError::Exit(128));
7275    }
7276    fs::rename(&source_absolute, &destination_absolute)?;
7277    let metadata = fs::metadata(&destination_absolute)?;
7278    let source_entry = index.entries.remove(position);
7279    let mut destination_entry =
7280        index_entry_from_metadata(destination_path.clone(), source_entry.oid, &metadata);
7281    destination_entry.mode = source_entry.mode;
7282    index.entries.retain(|entry| entry.path != destination_path);
7283    index.entries.push(destination_entry);
7284    index
7285        .entries
7286        .sort_by(|left, right| left.path.cmp(&right.path));
7287    index.extensions.clear();
7288    fs::write(index_path, index.write(format)?)?;
7289    Ok(MoveResult {
7290        source: source_path,
7291        destination: destination_path,
7292        skipped: false,
7293        fatal: None,
7294        details: Vec::new(),
7295    })
7296}
7297
7298fn restore_index_entry(
7299    worktree_root: &Path,
7300    git_dir: &Path,
7301    format: ObjectFormat,
7302    db: &FileObjectDatabase,
7303    entry: &IndexEntry,
7304    smudge_config: Option<&GitConfig>,
7305) -> Result<()> {
7306    let object = read_expected_object(db, &entry.oid, ObjectType::Blob)?;
7307    let body: Cow<'_, [u8]> = match smudge_config {
7308        Some(config) => {
7309            let checks = smudge_attribute_checks_from_index(
7310                worktree_root,
7311                git_dir,
7312                format,
7313                entry.path.as_bytes(),
7314            )?;
7315            apply_smudge_filter_with_attributes_cow(
7316                config,
7317                &checks,
7318                entry.path.as_bytes(),
7319                &object.body,
7320            )?
7321        }
7322        None => Cow::Borrowed(&object.body),
7323    };
7324    let file_path = worktree_path(worktree_root, entry.path.as_bytes())?;
7325    if let Some(parent) = file_path.parent() {
7326        fs::create_dir_all(parent)?;
7327    }
7328    fs::write(file_path, &body)?;
7329    Ok(())
7330}
7331
7332fn restored_head_index_entry(
7333    worktree_root: &Path,
7334    db: &FileObjectDatabase,
7335    path: &[u8],
7336    entry: &TrackedEntry,
7337) -> Result<IndexEntry> {
7338    let file_path = worktree_path(worktree_root, path)?;
7339    // This restores the index from a tree (reset --mixed / stash / sparse) WITHOUT
7340    // rewriting the worktree file, so the file on disk may hold different content
7341    // than `entry.oid`. Crucially we must NOT copy the worktree file's stat onto
7342    // this entry: that would make the cached stat match a file whose real content
7343    // hashes to a DIFFERENT oid, breaking git's "stat-match implies oid-match"
7344    // invariant that the status stat-cache relies on. Leave the stat zeroed so
7345    // status always re-hashes this path and detects any modification -- exactly
7346    // git's behavior for tree-sourced entries until a later refresh validates them.
7347    let size = if entry.mode == 0o160000 {
7348        // A gitlink's oid names a commit in the submodule's repository — it is
7349        // not readable here, and a tree-sourced gitlink entry carries size 0.
7350        0
7351    } else {
7352        match fs::metadata(&file_path) {
7353            Ok(metadata) => metadata.len().min(u32::MAX as u64) as u32,
7354            Err(_) => {
7355                let object = read_expected_object(db, &entry.oid, ObjectType::Blob)?;
7356                object.body.len().min(u32::MAX as usize) as u32
7357            }
7358        }
7359    };
7360    Ok(IndexEntry {
7361        ctime_seconds: 0,
7362        ctime_nanoseconds: 0,
7363        mtime_seconds: 0,
7364        mtime_nanoseconds: 0,
7365        dev: 0,
7366        ino: 0,
7367        mode: entry.mode,
7368        uid: 0,
7369        gid: 0,
7370        size,
7371        oid: entry.oid,
7372        flags: path.len().min(0x0fff) as u16,
7373        flags_extended: 0,
7374        path: BString::from(path),
7375    })
7376}
7377
7378fn restore_head_entry_to_worktree(
7379    worktree_root: &Path,
7380    db: &FileObjectDatabase,
7381    path: &[u8],
7382    entry: &TrackedEntry,
7383) -> Result<()> {
7384    let object = read_expected_object(db, &entry.oid, ObjectType::Blob)?;
7385    let file_path = worktree_path(worktree_root, path)?;
7386    if let Some(parent) = file_path.parent() {
7387        fs::create_dir_all(parent)?;
7388    }
7389    fs::write(file_path, &object.body)?;
7390    Ok(())
7391}
7392
7393fn restore_head_entry_to_worktree_and_index(
7394    worktree_root: &Path,
7395    db: &FileObjectDatabase,
7396    path: &[u8],
7397    entry: &TrackedEntry,
7398) -> Result<IndexEntry> {
7399    let object = read_expected_object(db, &entry.oid, ObjectType::Blob)?;
7400    let file_path = worktree_path(worktree_root, path)?;
7401    if let Some(parent) = file_path.parent() {
7402        fs::create_dir_all(parent)?;
7403    }
7404    fs::write(&file_path, &object.body)?;
7405    let metadata = fs::metadata(&file_path)?;
7406    let mut index_entry = index_entry_from_metadata(path.to_vec(), entry.oid, &metadata);
7407    index_entry.mode = entry.mode;
7408    Ok(index_entry)
7409}
7410
7411fn index_has_entry_under(entries: &[IndexEntry], directory: &[u8]) -> bool {
7412    entries
7413        .iter()
7414        .any(|entry| index_entry_is_under_path(entry.path.as_bytes(), directory))
7415}
7416
7417fn index_entry_is_under_path(entry_path: &[u8], directory: &[u8]) -> bool {
7418    if directory.is_empty() {
7419        return true;
7420    }
7421    entry_path
7422        .strip_prefix(directory)
7423        .and_then(|rest| rest.strip_prefix(b"/"))
7424        .is_some()
7425}
7426
7427fn index_entry_from_metadata(
7428    path: impl Into<BString>,
7429    oid: ObjectId,
7430    metadata: &fs::Metadata,
7431) -> IndexEntry {
7432    let modified = metadata.modified().ok();
7433    let duration = modified
7434        .and_then(|time| time.duration_since(UNIX_EPOCH).ok())
7435        .unwrap_or_default();
7436    let mode = file_mode(metadata);
7437    let path = path.into();
7438    let flags = path.len().min(0x0fff) as u16;
7439    IndexEntry {
7440        ctime_seconds: duration.as_secs().min(u32::MAX as u64) as u32,
7441        ctime_nanoseconds: duration.subsec_nanos(),
7442        mtime_seconds: duration.as_secs().min(u32::MAX as u64) as u32,
7443        mtime_nanoseconds: duration.subsec_nanos(),
7444        dev: 0,
7445        ino: 0,
7446        mode,
7447        uid: 0,
7448        gid: 0,
7449        size: metadata.len().min(u32::MAX as u64) as u32,
7450        oid,
7451        flags,
7452        flags_extended: 0,
7453        path,
7454    }
7455}
7456
7457fn read_expected_object(
7458    db: &FileObjectDatabase,
7459    oid: &ObjectId,
7460    expected: ObjectType,
7461) -> Result<std::sync::Arc<EncodedObject>> {
7462    let object = db
7463        .read_object(oid)
7464        .map_err(|err| expect_missing_object_kind(err, *oid, missing_kind_for_type(expected)))?;
7465    if object.object_type != expected {
7466        return Err(GitError::InvalidObject(format!(
7467            "expected {} {}, found {}",
7468            expected.as_str(),
7469            oid,
7470            object.object_type.as_str()
7471        )));
7472    }
7473    Ok(object)
7474}
7475
7476fn expect_missing_object_kind(
7477    err: GitError,
7478    oid: ObjectId,
7479    expected: MissingObjectKind,
7480) -> GitError {
7481    match err.not_found_kind() {
7482        Some(sley_core::NotFoundKind::Object { .. }) => GitError::object_kind_not_found_in(
7483            oid,
7484            expected,
7485            MissingObjectContext::WorktreeMaterialize,
7486        ),
7487        _ => err,
7488    }
7489}
7490
7491fn missing_kind_for_type(object_type: ObjectType) -> MissingObjectKind {
7492    match object_type {
7493        ObjectType::Blob => MissingObjectKind::Blob,
7494        ObjectType::Tree => MissingObjectKind::Tree,
7495        ObjectType::Commit => MissingObjectKind::Commit,
7496        ObjectType::Tag => MissingObjectKind::Tag,
7497    }
7498}
7499
7500fn read_commit(db: &FileObjectDatabase, format: ObjectFormat, oid: &ObjectId) -> Result<Commit> {
7501    let object = read_expected_object(db, oid, ObjectType::Commit)?;
7502    Commit::parse(format, &object.body)
7503}
7504
7505#[derive(Debug, Clone, PartialEq, Eq)]
7506struct TrackedEntry {
7507    mode: u32,
7508    oid: ObjectId,
7509}
7510
7511/// git's racy-git stat cache: the stage-0 index entries keyed by path (so the
7512/// worktree walk can reuse a cached oid when a file's stat shows it is unchanged
7513/// since it was staged) plus the index *file's* own mtime, which git uses as the
7514/// racy-clean reference timestamp.
7515///
7516/// SAFETY INVARIANT: trusting a cached oid by stat alone is only sound because
7517/// every code path that stamps a worktree stat onto an index entry also hashed
7518/// that exact file content (see `index_entry_from_metadata`), while tree-sourced
7519/// restores (reset --mixed / stash / sparse) leave the stat zeroed
7520/// (`restored_head_index_entry`). So a non-zero, non-racy stat match implies the
7521/// cached oid is the file's true content. When that does not hold we fall through
7522/// to a full read+filter+hash, so a modified file is never reported clean.
7523#[derive(Debug, Clone, Default)]
7524struct IndexStatCache {
7525    entries: HashMap<Vec<u8>, IndexEntry>,
7526    /// The index file's modification time as `(seconds, nanoseconds)`, or `None`
7527    /// when it could not be determined. Used as git's racy-clean reference.
7528    index_mtime: Option<(u64, u64)>,
7529}
7530
7531impl IndexStatCache {
7532    /// Builds the cache from an already-parsed index plus the path of the index
7533    /// file on disk (whose mtime becomes the racy-clean reference). Only stage-0
7534    /// entries are retained; higher merge stages never describe a worktree file.
7535    fn from_index(index: &Index, index_path: &Path) -> Self {
7536        let index_mtime = fs::metadata(index_path)
7537            .ok()
7538            .and_then(|metadata| file_mtime_parts(&metadata));
7539        Self::from_index_mtime(index, index_mtime)
7540    }
7541
7542    fn from_index_mtime(index: &Index, index_mtime: Option<(u64, u64)>) -> Self {
7543        IndexStatCache {
7544            entries: stage0_index_entries(index),
7545            index_mtime,
7546        }
7547    }
7548
7549    /// Whether `entry` is "racily clean" in git's sense: its cached mtime is not
7550    /// strictly older than the index file's mtime, so a same-timestamp write
7551    /// could have changed the content without moving the stat. Such entries must
7552    /// always be re-hashed.
7553    ///
7554    /// Conservative by construction: if the index mtime is unknown, or either
7555    /// side's mtime is zero (e.g. a tree-sourced entry whose stat was left
7556    /// zeroed), this returns `true` so the caller re-hashes rather than trusting
7557    /// a stat we cannot prove safe.
7558    fn is_racily_clean(&self, entry: &IndexEntry) -> bool {
7559        let Some(index_mtime) = self.index_mtime else {
7560            return true;
7561        };
7562        if index_mtime == (0, 0) {
7563            return true;
7564        }
7565        let entry_mtime = (
7566            u64::from(entry.mtime_seconds),
7567            u64::from(entry.mtime_nanoseconds),
7568        );
7569        if entry_mtime == (0, 0) {
7570            return true;
7571        }
7572        // Racy unless the index was written strictly after the entry's mtime.
7573        index_mtime <= entry_mtime
7574    }
7575
7576    /// Whether the index has a stage-0 entry for `git_path` (i.e. the path is
7577    /// tracked). Used to skip hashing untracked worktree files.
7578    fn contains(&self, git_path: &[u8]) -> bool {
7579        self.entries.contains_key(git_path)
7580    }
7581
7582    fn tracked_entry(&self, git_path: &[u8]) -> Option<TrackedEntry> {
7583        self.entries.get(git_path).map(|entry| TrackedEntry {
7584            mode: entry.mode,
7585            oid: entry.oid,
7586        })
7587    }
7588
7589    /// Returns the cached [`TrackedEntry`] for `git_path` (reusing its stored
7590    /// oid, so the caller can SKIP reading, filtering, and hashing the file) only
7591    /// when the worktree file is provably unchanged since it was staged: a
7592    /// stage-0 entry exists, its recorded mode matches the file's current mode
7593    /// (catching pure `chmod`s that do not move mtime), the size+mtime stat
7594    /// check passes, and the entry is not racily clean. Otherwise returns `None`
7595    /// and the caller hashes the file as usual.
7596    fn reuse_tracked_entry(
7597        &self,
7598        git_path: &[u8],
7599        worktree_metadata: &fs::Metadata,
7600    ) -> Option<TrackedEntry> {
7601        let entry = self.entries.get(git_path)?;
7602        self.reuse_index_entry(entry, worktree_metadata)
7603    }
7604
7605    fn reuse_index_entry(
7606        &self,
7607        entry: &IndexEntry,
7608        worktree_metadata: &fs::Metadata,
7609    ) -> Option<TrackedEntry> {
7610        if entry.mode != worktree_entry_mode(worktree_metadata) {
7611            return None;
7612        }
7613        if !worktree_entry_is_uptodate(entry, worktree_metadata) {
7614            return None;
7615        }
7616        if self.is_racily_clean(entry) {
7617            return None;
7618        }
7619        Some(TrackedEntry {
7620            mode: entry.mode,
7621            oid: entry.oid,
7622        })
7623    }
7624
7625    /// The stage-0 gitlink (mode 160000) index entry at `git_path`, if any.
7626    fn gitlink_entry(&self, git_path: &[u8]) -> Option<&IndexEntry> {
7627        self.entries
7628            .get(git_path)
7629            .filter(|entry| entry.mode == 0o160000)
7630    }
7631}
7632
7633fn read_index_entries(
7634    git_dir: &Path,
7635    format: ObjectFormat,
7636) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
7637    let db = FileObjectDatabase::from_git_dir(git_dir, format);
7638    Ok(read_index_entries_with_stat_cache(git_dir, format, &db)?.0)
7639}
7640
7641fn resolve_head_tree_oid(
7642    git_dir: &Path,
7643    format: ObjectFormat,
7644    db: &FileObjectDatabase,
7645) -> Result<Option<ObjectId>> {
7646    let Some(commit_oid) = resolve_head_commit_oid(git_dir, format)? else {
7647        return Ok(None);
7648    };
7649    let object = read_expected_object(db, &commit_oid, ObjectType::Commit)?;
7650    let commit = Commit::parse_ref(format, &object.body)?;
7651    Ok(Some(commit.tree))
7652}
7653
7654fn resolve_head_commit_oid(git_dir: &Path, format: ObjectFormat) -> Result<Option<ObjectId>> {
7655    let refs = FileRefStore::new(git_dir, format);
7656    sley_refs::resolve_ref_peeled(&refs, "HEAD")
7657}
7658
7659fn status_entry_is_untracked_or_ignored(entry: &ShortStatusEntry) -> bool {
7660    matches!((entry.index, entry.worktree), (b'?', b'?') | (b'!', b'!'))
7661}
7662
7663fn checkout_switch_head_symbolic(
7664    refs: &FileRefStore,
7665    branch_ref: String,
7666    committer: Vec<u8>,
7667    branch: &str,
7668    old_oid: Option<ObjectId>,
7669    new_oid: Option<ObjectId>,
7670) -> Result<()> {
7671    // Reflog "from" side: the previous branch's short name, or the commit id
7672    // when HEAD was detached (git's `checkout: moving from X to Y` shape,
7673    // which `@{-N}` resolution parses).
7674    let from = match refs.read_ref("HEAD") {
7675        Ok(Some(RefTarget::Symbolic(name))) => name
7676            .strip_prefix("refs/heads/")
7677            .unwrap_or(&name)
7678            .to_string(),
7679        Ok(Some(RefTarget::Direct(oid))) => oid.to_hex(),
7680        _ => "HEAD".to_string(),
7681    };
7682    let mut tx = refs.transaction();
7683    let reflog = match (old_oid, new_oid) {
7684        (Some(old_oid), Some(new_oid)) => Some(ReflogEntry {
7685            old_oid,
7686            new_oid,
7687            committer,
7688            message: format!("checkout: moving from {from} to {branch}").into_bytes(),
7689        }),
7690        _ => None,
7691    };
7692    tx.update(RefUpdate {
7693        name: "HEAD".into(),
7694        expected: None,
7695        new: RefTarget::Symbolic(branch_ref),
7696        reflog,
7697    });
7698    tx.commit()
7699}
7700
7701fn cache_tree_is_valid(tree: &CacheTree) -> bool {
7702    if tree.entry_count < 0 || tree.oid.is_none() {
7703        return false;
7704    }
7705    tree.subtrees
7706        .iter()
7707        .all(|child| cache_tree_is_valid(&child.tree))
7708}
7709
7710fn head_matches_index_from_cache_tree(
7711    index: &Index,
7712    format: ObjectFormat,
7713    head_tree_oid: &ObjectId,
7714    stage0_entry_count: usize,
7715) -> Result<bool> {
7716    let cache_tree = match index.cache_tree(format) {
7717        Ok(Some(cache_tree)) => cache_tree,
7718        Ok(None) | Err(_) => return Ok(false),
7719    };
7720    if !cache_tree_is_valid(&cache_tree) {
7721        return Ok(false);
7722    }
7723    let Some(root_oid) = cache_tree.oid.as_ref() else {
7724        return Ok(false);
7725    };
7726    if root_oid != head_tree_oid {
7727        return Ok(false);
7728    }
7729    Ok(cache_tree.entry_count as usize == stage0_entry_count)
7730}
7731
7732/// Parses the index a single time and returns both the path -> [`TrackedEntry`]
7733/// map used for status comparisons AND the [`IndexStatCache`] used to short-cut
7734/// the worktree walk, avoiding a second parse of the same file.
7735fn read_index_entries_with_stat_cache(
7736    git_dir: &Path,
7737    format: ObjectFormat,
7738    db: &FileObjectDatabase,
7739) -> Result<(BTreeMap<Vec<u8>, TrackedEntry>, IndexStatCache, bool)> {
7740    let (index, stat_cache, head_matches_index) = read_index_with_stat_cache(git_dir, format, db)?;
7741    let tracked = index_entries_from_index(index);
7742    Ok((tracked, stat_cache, head_matches_index))
7743}
7744
7745fn index_entries_from_index(index: Index) -> BTreeMap<Vec<u8>, TrackedEntry> {
7746    index
7747        .entries
7748        .into_iter()
7749        .filter(|entry| entry.stage() == Stage::Normal)
7750        .map(|entry| {
7751            (
7752                entry.path.into_bytes(),
7753                TrackedEntry {
7754                    mode: entry.mode,
7755                    oid: entry.oid,
7756                },
7757            )
7758        })
7759        .collect()
7760}
7761
7762fn read_index_with_stat_cache(
7763    git_dir: &Path,
7764    format: ObjectFormat,
7765    db: &FileObjectDatabase,
7766) -> Result<(Index, IndexStatCache, bool)> {
7767    let index_path = repository_index_path(git_dir);
7768    let index_metadata = match fs::metadata(&index_path) {
7769        Ok(metadata) => metadata,
7770        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
7771            return Ok((
7772                Index {
7773                    version: 2,
7774                    entries: Vec::new(),
7775                    extensions: Vec::new(),
7776                    checksum: None,
7777                },
7778                IndexStatCache::default(),
7779                false,
7780            ));
7781        }
7782        Err(err) => return Err(err.into()),
7783    };
7784    let index = Index::parse(&fs::read(&index_path)?, format)?;
7785    let stat_cache = IndexStatCache::from_index_mtime(&index, file_mtime_parts(&index_metadata));
7786    let head_matches_index = match resolve_head_tree_oid(git_dir, format, db)? {
7787        Some(head_tree_oid) => head_matches_index_from_cache_tree(
7788            &index,
7789            format,
7790            &head_tree_oid,
7791            stat_cache.entries.len(),
7792        )?,
7793        None => false,
7794    };
7795    Ok((index, stat_cache, head_matches_index))
7796}
7797
7798fn head_tree_entries(
7799    git_dir: &Path,
7800    format: ObjectFormat,
7801    db: &FileObjectDatabase,
7802) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
7803    let refs = FileRefStore::new(git_dir, format);
7804    let Some(head) = refs.read_ref("HEAD")? else {
7805        return Ok(BTreeMap::new());
7806    };
7807    let commit_oid = match head {
7808        RefTarget::Direct(oid) => Some(oid),
7809        RefTarget::Symbolic(name) => match refs.read_ref(&name)? {
7810            Some(RefTarget::Direct(oid)) => Some(oid),
7811            _ => None,
7812        },
7813    };
7814    let Some(commit_oid) = commit_oid else {
7815        return Ok(BTreeMap::new());
7816    };
7817    let object = read_expected_object(db, &commit_oid, ObjectType::Commit)?;
7818    let commit = Commit::parse_ref(format, &object.body)?;
7819    let mut entries = BTreeMap::new();
7820    collect_tree_entries(db, format, &commit.tree, &mut entries)?;
7821    Ok(entries)
7822}
7823
7824fn tree_entries(
7825    db: &FileObjectDatabase,
7826    format: ObjectFormat,
7827    tree_oid: &ObjectId,
7828) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
7829    let mut entries = BTreeMap::new();
7830    collect_tree_entries(db, format, tree_oid, &mut entries)?;
7831    Ok(entries)
7832}
7833
7834/// Flatten a tree's blob leaves into `entries`, keyed by full path.
7835///
7836/// Delegates to the canonical [`sley_diff_merge::flatten_tree`] (the local
7837/// recursive flattener was a byte-identical copy) and adapts its
7838/// `(mode, oid)` tuples into this module's [`TrackedEntry`]. Entries already
7839/// present in `entries` are overwritten, matching the previous insert-based
7840/// behaviour.
7841fn collect_tree_entries(
7842    db: &FileObjectDatabase,
7843    format: ObjectFormat,
7844    tree_oid: &ObjectId,
7845    entries: &mut BTreeMap<Vec<u8>, TrackedEntry>,
7846) -> Result<()> {
7847    for (path, (mode, oid)) in sley_diff_merge::flatten_tree(db, format, tree_oid)? {
7848        entries.insert(path, TrackedEntry { mode, oid });
7849    }
7850    Ok(())
7851}
7852
7853/// Like a full worktree walk, but accepts the index's [`IndexStatCache`] so the
7854/// walk can reuse a cached oid for files that are provably unchanged since they
7855/// were staged, skipping the read+filter+hash for those paths. Passing `None`
7856/// hashes every file when no stat cache is supplied.
7857fn worktree_entries_with_stat_cache(
7858    worktree_root: &Path,
7859    git_dir: &Path,
7860    format: ObjectFormat,
7861    stat_cache: Option<&IndexStatCache>,
7862    tracked_paths: Option<&BTreeSet<Vec<u8>>>,
7863    ignores: Option<&mut IgnoreMatcher>,
7864) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
7865    Ok(worktree_entries_with_submodule_dirt(
7866        worktree_root,
7867        git_dir,
7868        format,
7869        stat_cache,
7870        tracked_paths,
7871        ignores,
7872    )?
7873    .0)
7874}
7875
7876/// Tracked worktree entries keyed by repo path, plus the dirt mask
7877/// ([`DIRTY_SUBMODULE_MODIFIED`] / [`DIRTY_SUBMODULE_UNTRACKED`]) for every
7878/// tracked gitlink path whose submodule working tree is dirty.
7879type WorktreeEntriesWithDirt = (BTreeMap<Vec<u8>, TrackedEntry>, BTreeMap<Vec<u8>, u8>);
7880
7881/// Status worktree snapshot: tracked/untracked entries, gitlink dirt masks, and
7882/// tracked paths observed in the worktree.
7883type StatusWorktreeSnapshot = (
7884    BTreeMap<Vec<u8>, TrackedEntry>,
7885    BTreeMap<Vec<u8>, u8>,
7886    HashSet<Vec<u8>>,
7887);
7888
7889/// Like [`worktree_entries_with_stat_cache`], but also reports, for every
7890/// tracked gitlink path whose submodule working tree is dirty, the dirt mask
7891/// ([`DIRTY_SUBMODULE_MODIFIED`] / [`DIRTY_SUBMODULE_UNTRACKED`]).
7892fn worktree_entries_with_submodule_dirt(
7893    worktree_root: &Path,
7894    git_dir: &Path,
7895    format: ObjectFormat,
7896    stat_cache: Option<&IndexStatCache>,
7897    tracked_paths: Option<&BTreeSet<Vec<u8>>>,
7898    ignores: Option<&mut IgnoreMatcher>,
7899) -> Result<WorktreeEntriesWithDirt> {
7900    let mut entries = BTreeMap::new();
7901    let mut submodule_dirt_map = BTreeMap::new();
7902    let mut tracked_presence = HashSet::new();
7903    // Worktree blobs are compared to the index by OID, so they must be passed
7904    // through the clean filter (core.autocrlf / .gitattributes) first -- exactly
7905    // as `git add` would store them. With no filter configured this is an exact
7906    // passthrough, so unfiltered repositories see identical OIDs.
7907    let config = sley_config::read_repo_config(git_dir, None).unwrap_or_default();
7908    // Seed the matcher with the repo-wide sources only; each directory's
7909    // `.gitattributes` is folded in by `collect_worktree_entries` as it descends,
7910    // so the worktree is read exactly once (a separate full-tree attribute pass was
7911    // a second traversal of every directory).
7912    let mut attr_matcher = AttributeMatcher::from_worktree_base(worktree_root);
7913    let attr_requested = filter_attribute_names();
7914    let mut context = WorktreeEntriesWalk {
7915        git_dir,
7916        format,
7917        config: &config,
7918        matcher: &mut attr_matcher,
7919        requested: &attr_requested,
7920        stat_cache,
7921        tracked_paths,
7922        ignores,
7923        entries: &mut entries,
7924        submodule_dirt: &mut submodule_dirt_map,
7925        tracked_presence: &mut tracked_presence,
7926        record_clean_tracked: true,
7927    };
7928    collect_worktree_entries(&mut context, worktree_root, &[])?;
7929    Ok((entries, submodule_dirt_map))
7930}
7931
7932fn status_worktree_entries_with_submodule_dirt(
7933    worktree_root: &Path,
7934    git_dir: &Path,
7935    format: ObjectFormat,
7936    stat_cache: &IndexStatCache,
7937    tracked_paths: Option<&BTreeSet<Vec<u8>>>,
7938    ignores: Option<&mut IgnoreMatcher>,
7939) -> Result<StatusWorktreeSnapshot> {
7940    let mut entries = BTreeMap::new();
7941    let mut submodule_dirt_map = BTreeMap::new();
7942    let mut tracked_presence = HashSet::new();
7943    let config = sley_config::read_repo_config(git_dir, None).unwrap_or_default();
7944    let mut attr_matcher = AttributeMatcher::from_worktree_base(worktree_root);
7945    let attr_requested = filter_attribute_names();
7946    let mut context = WorktreeEntriesWalk {
7947        git_dir,
7948        format,
7949        config: &config,
7950        matcher: &mut attr_matcher,
7951        requested: &attr_requested,
7952        stat_cache: Some(stat_cache),
7953        tracked_paths,
7954        ignores,
7955        entries: &mut entries,
7956        submodule_dirt: &mut submodule_dirt_map,
7957        tracked_presence: &mut tracked_presence,
7958        record_clean_tracked: false,
7959    };
7960    collect_worktree_entries(&mut context, worktree_root, &[])?;
7961    Ok((entries, submodule_dirt_map, tracked_presence))
7962}
7963
7964fn worktree_entry_for_git_path(
7965    worktree_root: &Path,
7966    git_dir: &Path,
7967    format: ObjectFormat,
7968    git_path: &[u8],
7969    expected_oid: &ObjectId,
7970    expected_mode: u32,
7971    stat_cache: Option<&IndexStatCache>,
7972) -> Result<Option<TrackedEntry>> {
7973    let absolute = worktree_root.join(repo_path_to_os_path(git_path)?);
7974    let metadata = match fs::symlink_metadata(&absolute) {
7975        Ok(metadata) => metadata,
7976        Err(err)
7977            if matches!(
7978                err.kind(),
7979                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
7980            ) =>
7981        {
7982            return Ok(None);
7983        }
7984        Err(err) => return Err(err.into()),
7985    };
7986
7987    if expected_mode == 0o160000 {
7988        if !metadata.is_dir() {
7989            return Ok(Some(TrackedEntry {
7990                mode: worktree_entry_mode(&metadata),
7991                oid: ObjectId::null(format),
7992            }));
7993        }
7994        let oid = sley_diff_merge::gitlink_head_oid(&absolute, format).unwrap_or(*expected_oid);
7995        return Ok(Some(TrackedEntry {
7996            mode: 0o160000,
7997            oid,
7998        }));
7999    }
8000
8001    if metadata.is_dir() {
8002        return Ok(Some(TrackedEntry {
8003            mode: worktree_entry_mode(&metadata),
8004            oid: ObjectId::null(format),
8005        }));
8006    }
8007
8008    if !(metadata.is_file() || metadata.file_type().is_symlink()) {
8009        return Ok(Some(TrackedEntry {
8010            mode: worktree_entry_mode(&metadata),
8011            oid: ObjectId::null(format),
8012        }));
8013    }
8014
8015    if let Some(tracked) =
8016        stat_cache.and_then(|cache| cache.reuse_tracked_entry(git_path, &metadata))
8017    {
8018        return Ok(Some(tracked));
8019    }
8020
8021    let mode = worktree_entry_mode(&metadata);
8022    let body = if metadata.file_type().is_symlink() {
8023        symlink_target_bytes(&absolute)?
8024    } else {
8025        let config = sley_config::read_repo_config(git_dir, None).unwrap_or_default();
8026        let body = fs::read(&absolute)?;
8027        apply_clean_filter(worktree_root, git_dir, &config, git_path, &body)?
8028    };
8029    let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
8030    Ok(Some(TrackedEntry { mode, oid }))
8031}
8032
8033fn worktree_entry_for_index_entry_with_attributes(
8034    worktree_root: &Path,
8035    git_dir: &Path,
8036    format: ObjectFormat,
8037    index_entry: &IndexEntry,
8038    stat_cache: &IndexStatCache,
8039    clean_filter: &mut Option<TrackedOnlyCleanFilter>,
8040) -> Result<Option<TrackedEntry>> {
8041    let git_path = index_entry.path.as_bytes();
8042    let expected_mode = index_entry.mode;
8043    let absolute = worktree_root.join(repo_path_to_os_path(git_path)?);
8044    let metadata = match fs::symlink_metadata(&absolute) {
8045        Ok(metadata) => metadata,
8046        Err(err)
8047            if matches!(
8048                err.kind(),
8049                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
8050            ) =>
8051        {
8052            return Ok(None);
8053        }
8054        Err(err) => return Err(err.into()),
8055    };
8056    let file_type = metadata.file_type();
8057
8058    if expected_mode == 0o160000 {
8059        if !file_type.is_dir() {
8060            return Ok(Some(TrackedEntry {
8061                mode: worktree_entry_mode(&metadata),
8062                oid: ObjectId::null(format),
8063            }));
8064        }
8065        let oid = sley_diff_merge::gitlink_head_oid(&absolute, format).unwrap_or(index_entry.oid);
8066        return Ok(Some(TrackedEntry {
8067            mode: 0o160000,
8068            oid,
8069        }));
8070    }
8071
8072    if file_type.is_dir() {
8073        return Ok(Some(TrackedEntry {
8074            mode: worktree_entry_mode(&metadata),
8075            oid: ObjectId::null(format),
8076        }));
8077    }
8078
8079    if !(file_type.is_file() || file_type.is_symlink()) {
8080        return Ok(Some(TrackedEntry {
8081            mode: worktree_entry_mode(&metadata),
8082            oid: ObjectId::null(format),
8083        }));
8084    }
8085
8086    if let Some(tracked) = stat_cache.reuse_index_entry(index_entry, &metadata) {
8087        return Ok(Some(tracked));
8088    }
8089
8090    let mode = worktree_entry_mode(&metadata);
8091    let body = if file_type.is_symlink() {
8092        symlink_target_bytes(&absolute)?
8093    } else {
8094        let body = fs::read(&absolute)?;
8095        let clean_filter = tracked_only_clean_filter(clean_filter, worktree_root, git_dir);
8096        clean_filter.read_attributes_for_path(worktree_root, git_path)?;
8097        let checks =
8098            clean_filter
8099                .matcher
8100                .attributes_for_path(git_path, &clean_filter.requested, false);
8101        apply_clean_filter_with_attributes(&clean_filter.config, &checks, git_path, &body)?
8102    };
8103    let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
8104    Ok(Some(TrackedEntry { mode, oid }))
8105}
8106
8107struct TrackedOnlyCleanFilter {
8108    config: GitConfig,
8109    matcher: AttributeMatcher,
8110    requested: Vec<Vec<u8>>,
8111    attribute_dirs: BTreeSet<Vec<u8>>,
8112}
8113
8114impl TrackedOnlyCleanFilter {
8115    fn read_attributes_for_path(&mut self, worktree_root: &Path, git_path: &[u8]) -> Result<()> {
8116        self.read_attribute_dir(worktree_root, &[])?;
8117        let mut prefix = Vec::new();
8118        let mut parts = git_path.split(|byte| *byte == b'/').peekable();
8119        while let Some(part) = parts.next() {
8120            if parts.peek().is_none() {
8121                break;
8122            }
8123            if !prefix.is_empty() {
8124                prefix.push(b'/');
8125            }
8126            prefix.extend_from_slice(part);
8127            self.read_attribute_dir(worktree_root, &prefix)?;
8128        }
8129        Ok(())
8130    }
8131
8132    fn read_attribute_dir(&mut self, worktree_root: &Path, git_path: &[u8]) -> Result<()> {
8133        if !self.attribute_dirs.insert(git_path.to_vec()) {
8134            return Ok(());
8135        }
8136        let dir = if git_path.is_empty() {
8137            worktree_root.to_path_buf()
8138        } else {
8139            worktree_root.join(repo_path_to_os_path(git_path)?)
8140        };
8141        read_dir_attribute_patterns(worktree_root, &dir, &mut self.matcher)
8142    }
8143}
8144
8145fn tracked_only_clean_filter<'a>(
8146    clean_filter: &'a mut Option<TrackedOnlyCleanFilter>,
8147    worktree_root: &Path,
8148    git_dir: &Path,
8149) -> &'a mut TrackedOnlyCleanFilter {
8150    if clean_filter.is_none() {
8151        *clean_filter = Some(TrackedOnlyCleanFilter {
8152            config: sley_config::read_repo_config(git_dir, None).unwrap_or_default(),
8153            matcher: AttributeMatcher::from_worktree_base(worktree_root),
8154            requested: filter_attribute_names(),
8155            attribute_dirs: BTreeSet::new(),
8156        });
8157    }
8158    clean_filter
8159        .as_mut()
8160        .expect("tracked-only clean filter initialized")
8161}
8162
8163struct WorktreeEntriesWalk<'a> {
8164    git_dir: &'a Path,
8165    format: ObjectFormat,
8166    config: &'a GitConfig,
8167    matcher: &'a mut AttributeMatcher,
8168    requested: &'a [Vec<u8>],
8169    stat_cache: Option<&'a IndexStatCache>,
8170    tracked_paths: Option<&'a BTreeSet<Vec<u8>>>,
8171    ignores: Option<&'a mut IgnoreMatcher>,
8172    entries: &'a mut BTreeMap<Vec<u8>, TrackedEntry>,
8173    /// Dirt masks for tracked gitlink paths whose submodule worktree is dirty.
8174    submodule_dirt: &'a mut BTreeMap<Vec<u8>, u8>,
8175    tracked_presence: &'a mut HashSet<Vec<u8>>,
8176    record_clean_tracked: bool,
8177}
8178
8179impl WorktreeEntriesWalk<'_> {
8180    fn mark_tracked_present(&mut self, git_path: &[u8]) {
8181        self.tracked_presence.insert(git_path.to_vec());
8182    }
8183
8184    fn tracked_entry_for(&self, git_path: &[u8]) -> Option<TrackedEntry> {
8185        self.stat_cache
8186            .and_then(|cache| cache.tracked_entry(git_path))
8187    }
8188
8189    fn should_record_tracked_entry(&self, git_path: &[u8], entry: &TrackedEntry) -> bool {
8190        self.record_clean_tracked
8191            || self
8192                .tracked_entry_for(git_path)
8193                .is_none_or(|tracked| tracked != *entry)
8194    }
8195}
8196
8197fn git_path_append_component(parent: &[u8], component: &std::ffi::OsStr) -> Vec<u8> {
8198    let component = os_str_component_bytes(component);
8199    let separator = usize::from(!parent.is_empty());
8200    let mut path = Vec::with_capacity(parent.len() + separator + component.len());
8201    if !parent.is_empty() {
8202        path.extend_from_slice(parent);
8203        path.push(b'/');
8204    }
8205    path.extend_from_slice(component.as_ref());
8206    path
8207}
8208
8209#[cfg(unix)]
8210fn os_str_component_bytes(component: &std::ffi::OsStr) -> Cow<'_, [u8]> {
8211    use std::os::unix::ffi::OsStrExt;
8212
8213    Cow::Borrowed(component.as_bytes())
8214}
8215
8216#[cfg(not(unix))]
8217fn os_str_component_bytes(component: &std::ffi::OsStr) -> Cow<'_, [u8]> {
8218    Cow::Owned(component.to_string_lossy().into_owned().into_bytes())
8219}
8220
8221fn collect_worktree_entries(
8222    context: &mut WorktreeEntriesWalk<'_>,
8223    dir: &Path,
8224    dir_git_path: &[u8],
8225) -> Result<()> {
8226    if is_same_path(dir, context.git_dir) {
8227        return Ok(());
8228    }
8229    // Fold this directory's `.gitattributes` into the matcher before processing its
8230    // files, so lookups for files here (and below) see it. This is what lets the
8231    // walk read the tree once instead of doing a separate full-tree attribute pass.
8232    read_dir_attribute_patterns_for_base(dir, dir_git_path, context.matcher)?;
8233    if let Some(ignores) = context.ignores.as_deref_mut() {
8234        read_dir_ignore_patterns_for_base(dir, dir_git_path, ignores)?;
8235    }
8236    for entry in fs::read_dir(dir)? {
8237        let entry = entry?;
8238        let file_name = entry.file_name();
8239        let path = entry.path();
8240        if is_dot_git_entry(&path) {
8241            continue;
8242        }
8243        if is_same_path(&path, context.git_dir) {
8244            continue;
8245        }
8246        let metadata = entry.metadata()?;
8247        let git_path = git_path_append_component(dir_git_path, &file_name);
8248        if context
8249            .ignores
8250            .as_ref()
8251            .is_some_and(|ignores| ignores.is_ignored(&git_path, metadata.is_dir()))
8252        {
8253            if metadata.is_dir()
8254                && context.tracked_paths.is_some_and(|tracked_paths| {
8255                    tracked_paths_may_contain(tracked_paths, &git_path)
8256                })
8257            {
8258                collect_worktree_entries(context, &path, &git_path)?;
8259            }
8260            continue;
8261        }
8262        if metadata.is_dir() {
8263            // A directory staged as a gitlink (mode 160000) is opaque: the walk
8264            // never descends into it. Its worktree "content" is the commit the
8265            // embedded repository has checked out (upstream ce_compare_gitlink):
8266            // a populated submodule reports its HEAD (plus a dirt mask when its
8267            // own tree has modified/untracked content); an unpopulated
8268            // directory — no repository, or no commit checked out — always
8269            // matches the staged oid.
8270            if let Some(index_entry) = context
8271                .stat_cache
8272                .and_then(|cache| cache.gitlink_entry(&git_path))
8273            {
8274                context.mark_tracked_present(&git_path);
8275                let oid = sley_diff_merge::gitlink_head_oid(&path, context.format)
8276                    .unwrap_or(index_entry.oid);
8277                let dirt = submodule_dirt(&path);
8278                if dirt != 0 {
8279                    context.submodule_dirt.insert(git_path.clone(), dirt);
8280                }
8281                let tracked = TrackedEntry {
8282                    mode: 0o160000,
8283                    oid,
8284                };
8285                if dirt != 0 || context.should_record_tracked_entry(&git_path, &tracked) {
8286                    context.entries.insert(git_path, tracked);
8287                }
8288                continue;
8289            }
8290            if is_nested_repository_boundary(&path) {
8291                if let Some(tracked_paths) = context.tracked_paths
8292                    && !tracked_paths_may_contain(tracked_paths, &git_path)
8293                {
8294                    continue;
8295                }
8296                context.entries.insert(
8297                    git_path,
8298                    TrackedEntry {
8299                        mode: 0o040000,
8300                        oid: ObjectId::null(context.format),
8301                    },
8302                );
8303                continue;
8304            }
8305            if let Some(tracked_paths) = context.tracked_paths
8306                && !tracked_paths_may_contain(tracked_paths, &git_path)
8307            {
8308                continue;
8309            }
8310            collect_worktree_entries(context, &path, &git_path)?;
8311        } else if metadata.is_file() || metadata.file_type().is_symlink() {
8312            if let Some(tracked_paths) = context.tracked_paths
8313                && !tracked_paths.contains(&git_path)
8314            {
8315                continue;
8316            }
8317            let entry_mode = worktree_entry_mode(&metadata);
8318            // git's racy-git stat shortcut: when the index's cached stat proves
8319            // this file is unchanged since it was staged, reuse the staged oid
8320            // and skip the read+filter+hash entirely. `reuse_tracked_entry`
8321            // returns `Some` ONLY for a non-racy size+mtime+mode match, so a
8322            // modified file always falls through to the full hash below and is
8323            // never silently reported clean.
8324            if let Some(tracked) = context
8325                .stat_cache
8326                .and_then(|cache| cache.reuse_tracked_entry(&git_path, &metadata))
8327            {
8328                context.mark_tracked_present(&git_path);
8329                if context.record_clean_tracked {
8330                    context.entries.insert(git_path, tracked);
8331                }
8332                continue;
8333            }
8334            // A file absent from the index is untracked: status and the
8335            // index-vs-worktree diff report it by *presence* (`??` / nothing), never
8336            // by content, so computing its oid is wasted work — git never hashes
8337            // untracked files. Record presence with a null oid and skip the
8338            // read+filter+hash. Without a stat cache we cannot tell tracked from
8339            // untracked, so fall through and hash as before.
8340            if context
8341                .stat_cache
8342                .is_some_and(|cache| !cache.contains(&git_path))
8343            {
8344                context.entries.insert(
8345                    git_path,
8346                    TrackedEntry {
8347                        mode: entry_mode,
8348                        oid: ObjectId::null(context.format),
8349                    },
8350                );
8351                continue;
8352            }
8353            let body = if metadata.file_type().is_symlink() {
8354                // The blob for a symlink is the raw link target; clean filters
8355                // never apply because git treats symlink content as opaque.
8356                symlink_target_bytes(&path)?
8357            } else {
8358                let body = fs::read(&path)?;
8359                // Resolve this path's attributes against the prebuilt matcher (a cheap
8360                // pattern match) and apply the clean filter -- no per-file matcher
8361                // rebuild. With no attributes/autocrlf configured this is an exact
8362                // passthrough, so the stored OID is unchanged.
8363                let checks =
8364                    context
8365                        .matcher
8366                        .attributes_for_path(&git_path, context.requested, false);
8367                apply_clean_filter_with_attributes(context.config, &checks, &git_path, &body)?
8368            };
8369            let oid = EncodedObject::new(ObjectType::Blob, body).object_id(context.format)?;
8370            let tracked = TrackedEntry {
8371                mode: entry_mode,
8372                oid,
8373            };
8374            if context
8375                .stat_cache
8376                .is_some_and(|cache| cache.contains(&git_path))
8377            {
8378                context.mark_tracked_present(&git_path);
8379                if context.should_record_tracked_entry(&git_path, &tracked) {
8380                    context.entries.insert(git_path, tracked);
8381                }
8382            } else {
8383                context.entries.insert(git_path, tracked);
8384            }
8385        }
8386    }
8387    Ok(())
8388}
8389
8390fn tracked_paths_may_contain(tracked_paths: &BTreeSet<Vec<u8>>, directory: &[u8]) -> bool {
8391    if tracked_paths.contains(directory) {
8392        return true;
8393    }
8394    let mut prefix = Vec::with_capacity(directory.len() + 1);
8395    prefix.extend_from_slice(directory);
8396    prefix.push(b'/');
8397    tracked_paths
8398        .range::<[u8], _>((
8399            std::ops::Bound::Included(prefix.as_slice()),
8400            std::ops::Bound::Unbounded,
8401        ))
8402        .next()
8403        .is_some_and(|path| path.starts_with(&prefix))
8404}
8405
8406fn is_same_path(left: &Path, right: &Path) -> bool {
8407    left == right
8408}
8409
8410/// Whether `path`'s final component is `.git`. Git never lists a `.git` entry at
8411/// any depth (a repository's own `.git`, a submodule gitlink file, or an embedded
8412/// repository's `.git` directory) as untracked content.
8413fn is_dot_git_entry(path: &Path) -> bool {
8414    path.file_name() == Some(std::ffi::OsStr::new(".git"))
8415}
8416
8417/// Whether `path` is a directory containing an embedded repository's `.git`
8418/// *directory*, or a `.git` file whose `gitdir:` pointer resolves to an
8419/// existing directory (a submodule worktree). Git treats both as a repository
8420/// boundary (listing the directory as `dir/`); an *invalid* `.git` file (no
8421/// resolvable `gitdir:` target) is not a boundary — Git descends into the
8422/// directory and lists its other untracked contents normally.
8423fn is_nested_repository_boundary(path: &Path) -> bool {
8424    if path.join(".git").is_dir() {
8425        return true;
8426    }
8427    sley_diff_merge::gitlink_git_dir(path).is_some()
8428}
8429
8430/// Whether `path` is an embedded repository's `.git` directory or a path inside it.
8431fn is_embedded_git_internals(root: &Path, path: &Path) -> bool {
8432    let Ok(relative) = path.strip_prefix(root) else {
8433        return false;
8434    };
8435    let mut current = root.to_path_buf();
8436    for component in relative.components() {
8437        if matches!(component, std::path::Component::Normal(name) if name == ".git")
8438            && current != root
8439            && current.join(".git").is_dir()
8440        {
8441            return true;
8442        }
8443        current.push(component);
8444    }
8445    false
8446}
8447
8448fn worktree_entry_mode(metadata: &fs::Metadata) -> u32 {
8449    if metadata.file_type().is_symlink() {
8450        0o120000
8451    } else if metadata.is_dir() {
8452        0o040000
8453    } else {
8454        file_mode(metadata)
8455    }
8456}
8457
8458fn worktree_path(root: &Path, path: &[u8]) -> Result<PathBuf> {
8459    let text = std::str::from_utf8(path).map_err(|err| GitError::InvalidPath(err.to_string()))?;
8460    let relative = PathBuf::from(text);
8461    if relative.is_absolute()
8462        || relative.components().any(|component| {
8463            matches!(
8464                component,
8465                std::path::Component::ParentDir | std::path::Component::Prefix(_)
8466            )
8467        })
8468    {
8469        return Err(GitError::InvalidPath(format!(
8470            "invalid worktree path {text}"
8471        )));
8472    }
8473    Ok(root.join(relative))
8474}
8475
8476fn remove_worktree_file(root: &Path, path: &[u8]) -> Result<()> {
8477    let file = worktree_path(root, path)?;
8478    if !file.exists() {
8479        return Ok(());
8480    }
8481    if file.is_dir() {
8482        // A tracked path that is a directory on disk is a gitlink: upstream
8483        // checkout/reset never recurses into a submodule's working tree. It
8484        // rmdirs the path when empty (remove_scheduled_dirs) and leaves a
8485        // populated submodule in place.
8486        match fs::remove_dir(&file) {
8487            Ok(()) => prune_empty_parents(root, file.parent())?,
8488            Err(err) if err.kind() == std::io::ErrorKind::DirectoryNotEmpty => {}
8489            Err(err) => return Err(err.into()),
8490        }
8491        return Ok(());
8492    }
8493    fs::remove_file(&file)?;
8494    prune_empty_parents(root, file.parent())?;
8495    Ok(())
8496}
8497
8498fn prune_empty_parents(root: &Path, mut dir: Option<&Path>) -> Result<()> {
8499    while let Some(path) = dir {
8500        if path == root {
8501            break;
8502        }
8503        match fs::remove_dir(path) {
8504            Ok(()) => dir = path.parent(),
8505            Err(err) if err.kind() == std::io::ErrorKind::NotFound => dir = path.parent(),
8506            Err(err) if err.kind() == std::io::ErrorKind::DirectoryNotEmpty => break,
8507            Err(err) => return Err(err.into()),
8508        }
8509    }
8510    Ok(())
8511}
8512
8513#[derive(Debug, Default)]
8514struct TreeNode {
8515    files: Vec<TreeFile>,
8516    directories: BTreeMap<Vec<u8>, TreeNode>,
8517}
8518
8519#[derive(Debug)]
8520struct TreeFile {
8521    name: Vec<u8>,
8522    mode: u32,
8523    oid: ObjectId,
8524}
8525
8526impl TreeNode {
8527    fn insert(&mut self, entry: &IndexEntry) -> Result<()> {
8528        let components = entry
8529            .path
8530            .as_bytes()
8531            .split(|byte| *byte == b'/')
8532            .collect::<Vec<_>>();
8533        if components.iter().any(|component| component.is_empty()) {
8534            return Err(GitError::InvalidPath(format!(
8535                "invalid index path {}",
8536                String::from_utf8_lossy(entry.path.as_bytes())
8537            )));
8538        }
8539        self.insert_components(&components, entry)
8540    }
8541
8542    fn insert_components(&mut self, components: &[&[u8]], entry: &IndexEntry) -> Result<()> {
8543        match components {
8544            [] => Err(GitError::InvalidPath("empty index path".into())),
8545            [name] => {
8546                self.files.push(TreeFile {
8547                    name: name.to_vec(),
8548                    mode: entry.mode,
8549                    oid: entry.oid,
8550                });
8551                Ok(())
8552            }
8553            [directory, rest @ ..] => self
8554                .directories
8555                .entry(directory.to_vec())
8556                .or_default()
8557                .insert_components(rest, entry),
8558        }
8559    }
8560}
8561
8562fn write_tree_node(node: &TreeNode, odb: &mut FileObjectDatabase) -> Result<ObjectId> {
8563    let mut entries = Vec::with_capacity(node.files.len() + node.directories.len());
8564    for file in &node.files {
8565        entries.push(TreeEntry {
8566            mode: file.mode,
8567            name: BString::from(file.name.as_slice()),
8568            oid: file.oid,
8569        });
8570    }
8571    for (name, child) in &node.directories {
8572        let oid = write_tree_node(child, odb)?;
8573        entries.push(TreeEntry {
8574            mode: 0o040000,
8575            name: BString::from(name.as_slice()),
8576            oid,
8577        });
8578    }
8579    entries.sort_by(|left, right| {
8580        git_tree_entry_cmp(
8581            left.name.as_bytes(),
8582            left.mode,
8583            right.name.as_bytes(),
8584            right.mode,
8585        )
8586    });
8587    odb.write_object(EncodedObject::new(
8588        ObjectType::Tree,
8589        Tree { entries }.write(),
8590    ))
8591}
8592
8593fn git_tree_entry_cmp(
8594    left_name: &[u8],
8595    left_mode: u32,
8596    right_name: &[u8],
8597    right_mode: u32,
8598) -> Ordering {
8599    let shared = left_name.len().min(right_name.len());
8600    let name_order = left_name[..shared].cmp(&right_name[..shared]);
8601    if name_order != Ordering::Equal {
8602        return name_order;
8603    }
8604    let left_end = left_name.len() == shared;
8605    let right_end = right_name.len() == shared;
8606    match (left_end, right_end) {
8607        (true, true) => Ordering::Equal,
8608        (true, false) => tree_name_terminator(left_mode).cmp(&right_name[shared]),
8609        (false, true) => left_name[shared].cmp(&tree_name_terminator(right_mode)),
8610        (false, false) => Ordering::Equal,
8611    }
8612}
8613
8614fn tree_name_terminator(mode: u32) -> u8 {
8615    if mode == 0o040000 { b'/' } else { 0 }
8616}
8617
8618#[cfg(unix)]
8619fn file_mode(metadata: &fs::Metadata) -> u32 {
8620    use std::os::unix::fs::PermissionsExt;
8621    if metadata.permissions().mode() & 0o111 != 0 {
8622        0o100755
8623    } else {
8624        0o100644
8625    }
8626}
8627
8628#[cfg(not(unix))]
8629fn file_mode(_metadata: &fs::Metadata) -> u32 {
8630    0o100644
8631}
8632
8633/// The blob content git stores for a symlink: the raw bytes of the link target
8634/// exactly as `readlink(2)` returns them. On Unix the target is an opaque byte
8635/// string, so we take the `OsStr` bytes verbatim (no UTF-8 round-trip, no path
8636/// re-componentization that could rewrite separators).
8637#[cfg(unix)]
8638fn symlink_target_bytes(path: &Path) -> Result<Vec<u8>> {
8639    use std::os::unix::ffi::OsStrExt;
8640    let target = fs::read_link(path)?;
8641    Ok(target.as_os_str().as_bytes().to_vec())
8642}
8643
8644#[cfg(not(unix))]
8645fn symlink_target_bytes(path: &Path) -> Result<Vec<u8>> {
8646    let target = fs::read_link(path)?;
8647    // git normalizes symlink targets to forward slashes on platforms whose
8648    // native separator is `\`.
8649    Ok(target.to_string_lossy().replace('\\', "/").into_bytes())
8650}
8651
8652fn git_path_bytes(path: &Path) -> Result<Vec<u8>> {
8653    if path.components().any(|component| {
8654        matches!(
8655            component,
8656            std::path::Component::ParentDir | std::path::Component::Prefix(_)
8657        )
8658    }) {
8659        return Err(GitError::InvalidPath(format!(
8660            "invalid index path {}",
8661            path.display()
8662        )));
8663    }
8664    Ok(path
8665        .components()
8666        .filter_map(|component| match component {
8667            std::path::Component::Normal(value) => Some(value.to_string_lossy().into_owned()),
8668            _ => None,
8669        })
8670        .collect::<Vec<_>>()
8671        .join("/")
8672        .into_bytes())
8673}
8674
8675fn repo_path_to_os_path(path: &[u8]) -> Result<PathBuf> {
8676    #[cfg(unix)]
8677    {
8678        use std::os::unix::ffi::OsStrExt;
8679
8680        Ok(PathBuf::from(std::ffi::OsStr::from_bytes(path)))
8681    }
8682
8683    #[cfg(not(unix))]
8684    {
8685        let path = std::str::from_utf8(path)
8686            .map_err(|_| GitError::InvalidPath("index path is not utf8".into()))?;
8687        Ok(path.split('/').collect())
8688    }
8689}
8690
8691fn git_path_to_relative_path(path: &[u8]) -> Result<PathBuf> {
8692    let path = std::str::from_utf8(path)
8693        .map_err(|err| GitError::InvalidPath(format!("invalid utf-8 index path: {err}")))?;
8694    Ok(path.split('/').collect())
8695}
8696
8697fn path_has_trailing_separator(path: &Path) -> bool {
8698    path.as_os_str()
8699        .to_string_lossy()
8700        .ends_with(std::path::MAIN_SEPARATOR)
8701}
8702
8703#[cfg(test)]
8704mod tests {
8705    use super::*;
8706    use sley_odb::ObjectReader;
8707    use std::sync::atomic::{AtomicU64, Ordering};
8708
8709    static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
8710
8711    #[test]
8712    fn atomic_metadata_writer_writes_and_reports_stat() {
8713        let root = temp_root();
8714        let path = root.join(".git").join("HEAD");
8715
8716        let result = write_metadata_file_atomic(
8717            &path,
8718            b"ref: refs/heads/main\n",
8719            AtomicMetadataWriteOptions::default(),
8720        )
8721        .expect("write metadata");
8722
8723        assert_eq!(
8724            fs::read(&path).expect("read metadata"),
8725            b"ref: refs/heads/main\n"
8726        );
8727        assert_eq!(result.path, path);
8728        assert_eq!(result.len, b"ref: refs/heads/main\n".len() as u64);
8729        assert!(result.mtime.is_some());
8730        assert!(!path.with_file_name("HEAD.lock").exists());
8731        fs::remove_dir_all(root).expect("test operation should succeed");
8732    }
8733
8734    #[test]
8735    fn atomic_metadata_writer_existing_lock_preserves_original() {
8736        let root = temp_root();
8737        let git_dir = root.join(".git");
8738        fs::create_dir_all(&git_dir).expect("create git dir");
8739        let path = git_dir.join("HEAD");
8740        let lock = git_dir.join("HEAD.lock");
8741        fs::write(&path, b"ref: refs/heads/main\n").expect("write original");
8742        fs::write(&lock, b"held\n").expect("write lock");
8743
8744        let err = write_metadata_file_atomic(
8745            &path,
8746            b"ref: refs/heads/other\n",
8747            AtomicMetadataWriteOptions::default(),
8748        )
8749        .expect_err("held lock must fail");
8750
8751        assert!(matches!(err, GitError::Transaction(_)));
8752        assert_eq!(
8753            fs::read(&path).expect("read original"),
8754            b"ref: refs/heads/main\n"
8755        );
8756        assert_eq!(fs::read(&lock).expect("read lock"), b"held\n");
8757        fs::remove_dir_all(root).expect("test operation should succeed");
8758    }
8759
8760    // --- `ls-files --eol` stat/attr helpers (mirror convert.c) ---------------
8761
8762    #[test]
8763    fn convert_stats_ascii_classifies_eol_content() {
8764        assert_eq!(convert_stats_ascii(b""), "none");
8765        assert_eq!(convert_stats_ascii(b"abc"), "none");
8766        assert_eq!(convert_stats_ascii(b"a\nb\n"), "lf");
8767        assert_eq!(convert_stats_ascii(b"a\r\nb\r\n"), "crlf");
8768        assert_eq!(convert_stats_ascii(b"a\r\nb\n"), "mixed");
8769        // A lone CR makes the content binary (-text), matching git.
8770        assert_eq!(convert_stats_ascii(b"a\rb"), "-text");
8771        // A NUL byte is binary.
8772        assert_eq!(convert_stats_ascii(b"a\0b\n"), "-text");
8773        // A trailing ^Z (EOF) is not counted as non-printable.
8774        assert_eq!(convert_stats_ascii(b"abc\n\x1a"), "lf");
8775    }
8776
8777    fn attr_check(name: &[u8], state: Option<AttributeState>) -> AttributeCheck {
8778        AttributeCheck {
8779            attribute: name.to_vec(),
8780            state,
8781        }
8782    }
8783
8784    #[test]
8785    fn convert_attr_ascii_matches_git_attr_action() {
8786        // No attributes at all: empty attr field.
8787        assert_eq!(convert_attr_ascii(&[]), "");
8788        // text (set) -> "text"; -text (unset) -> "-text".
8789        assert_eq!(
8790            convert_attr_ascii(&[attr_check(b"text", Some(AttributeState::Set))]),
8791            "text"
8792        );
8793        assert_eq!(
8794            convert_attr_ascii(&[attr_check(b"text", Some(AttributeState::Unset))]),
8795            "-text"
8796        );
8797        // text=auto -> "text=auto"; with eol=crlf/lf the AUTO variants.
8798        assert_eq!(
8799            convert_attr_ascii(&[attr_check(
8800                b"text",
8801                Some(AttributeState::Value(b"auto".to_vec()))
8802            )]),
8803            "text=auto"
8804        );
8805        assert_eq!(
8806            convert_attr_ascii(&[
8807                attr_check(b"text", Some(AttributeState::Value(b"auto".to_vec()))),
8808                attr_check(b"eol", Some(AttributeState::Value(b"crlf".to_vec()))),
8809            ]),
8810            "text=auto eol=crlf"
8811        );
8812        assert_eq!(
8813            convert_attr_ascii(&[
8814                attr_check(b"text", Some(AttributeState::Value(b"auto".to_vec()))),
8815                attr_check(b"eol", Some(AttributeState::Value(b"lf".to_vec()))),
8816            ]),
8817            "text=auto eol=lf"
8818        );
8819        // eol=crlf/lf alone (no text) forces text + the eol direction.
8820        assert_eq!(
8821            convert_attr_ascii(&[attr_check(
8822                b"eol",
8823                Some(AttributeState::Value(b"crlf".to_vec()))
8824            )]),
8825            "text eol=crlf"
8826        );
8827        assert_eq!(
8828            convert_attr_ascii(&[attr_check(
8829                b"eol",
8830                Some(AttributeState::Value(b"lf".to_vec()))
8831            )]),
8832            "text eol=lf"
8833        );
8834        // -text overrides any eol attribute (binary wins).
8835        assert_eq!(
8836            convert_attr_ascii(&[
8837                attr_check(b"text", Some(AttributeState::Unset)),
8838                attr_check(b"eol", Some(AttributeState::Value(b"crlf".to_vec()))),
8839            ]),
8840            "-text"
8841        );
8842    }
8843
8844    #[test]
8845    fn smudge_safety_guard_skips_irreversible_autocrlf() {
8846        // text=auto eol=crlf (AUTO_CRLF): convert pure-LF, but leave content
8847        // alone when it already has a CR or CRLF, or is binary.
8848        let auto = ContentFilterPlan {
8849            text: TextDecision::Auto,
8850            eol: EolConversion::Crlf,
8851            driver: None,
8852        };
8853        assert!(auto.will_convert_lf_to_crlf(b"a\nb\n"));
8854        assert!(!auto.will_convert_lf_to_crlf(b"a\r\nb\n")); // has CRLF
8855        assert!(!auto.will_convert_lf_to_crlf(b"a\nb\rc")); // lone CR (binary)
8856        assert!(!auto.will_convert_lf_to_crlf(b"abc")); // no naked LF
8857
8858        // text eol=crlf (TEXT_CRLF): no safety guard — always convert naked LF
8859        // even when a CR/CRLF is already present.
8860        let text = ContentFilterPlan {
8861            text: TextDecision::Text,
8862            eol: EolConversion::Crlf,
8863            driver: None,
8864        };
8865        assert!(text.will_convert_lf_to_crlf(b"a\r\nb\nc\n"));
8866        assert!(!text.will_convert_lf_to_crlf(b"a\r\nb\r\n")); // no naked LF
8867    }
8868
8869    /// Build an in-memory ignore matcher from raw `.gitignore` lines (no disk).
8870    fn ignore_matcher(patterns: &[&[u8]]) -> IgnoreMatcher {
8871        let mut matcher = IgnoreMatcher::default();
8872        let owned: Vec<Vec<u8>> = patterns.iter().map(|p| p.to_vec()).collect();
8873        matcher.extend_patterns(&owned);
8874        matcher
8875    }
8876
8877    #[test]
8878    fn ignore_match_kind_fast_paths_match_the_wildcard_engine() {
8879        // Literal: exact basename anywhere; not a superstring.
8880        let matcher = ignore_matcher(&[b"Pods"]);
8881        assert!(matcher.is_ignored(b"a/b/Pods", true));
8882        assert!(matcher.is_ignored(b"Pods", false));
8883        assert!(!matcher.is_ignored(b"Pods_not", false));
8884        assert!(matches!(
8885            classify_ignore_pattern(b"Pods"),
8886            MatchKind::Literal
8887        ));
8888
8889        // Suffix `*.log`: basename ending in `.log` at any depth.
8890        let matcher = ignore_matcher(&[b"*.log"]);
8891        assert!(matcher.is_ignored(b"x.log", false));
8892        assert!(matcher.is_ignored(b"a/b/x.log", false));
8893        assert!(matcher.is_ignored(b".log", false));
8894        assert!(!matcher.is_ignored(b"x.logx", false));
8895        assert!(matches!(
8896            classify_ignore_pattern(b"*.log"),
8897            MatchKind::Suffix
8898        ));
8899
8900        // Prefix `build*`: basename starting with `build`.
8901        let matcher = ignore_matcher(&[b"build*"]);
8902        assert!(matcher.is_ignored(b"buildfoo", false));
8903        assert!(matcher.is_ignored(b"a/build", false));
8904        assert!(!matcher.is_ignored(b"xbuild", false));
8905        assert!(matches!(
8906            classify_ignore_pattern(b"build*"),
8907            MatchKind::Prefix
8908        ));
8909    }
8910
8911    #[test]
8912    fn ignore_anchored_suffix_does_not_cross_slash() {
8913        // `/*.log` is anchored: matches `.log` files only at the matcher base,
8914        // never in a subdirectory — the slash guard in `match_segment`.
8915        let matcher = ignore_matcher(&[b"/*.log"]);
8916        assert!(matcher.is_ignored(b"x.log", false));
8917        assert!(!matcher.is_ignored(b"sub/x.log", false));
8918
8919        // Anchored literal likewise only matches at root.
8920        let matcher = ignore_matcher(&[b"/foo"]);
8921        assert!(matcher.is_ignored(b"foo", false));
8922        assert!(!matcher.is_ignored(b"a/foo", false));
8923    }
8924
8925    #[test]
8926    fn ignore_double_star_prefix_collapses_to_basename() {
8927        // `**/X` ≡ `X` for slash-free X (verified against `git check-ignore`).
8928        let matcher = ignore_matcher(&[b"**/Pods"]);
8929        assert!(matcher.is_ignored(b"a/b/Pods", true));
8930        assert!(matcher.is_ignored(b"Pods", true));
8931        assert!(!matcher.is_ignored(b"Pods_not", false));
8932
8933        let matcher = ignore_matcher(&[b"**/*.jks"]);
8934        assert!(matcher.is_ignored(b"x.jks", false));
8935        assert!(matcher.is_ignored(b"a/deep/y.jks", false));
8936        assert!(!matcher.is_ignored(b"x.jksx", false));
8937
8938        // `**/A/B` keeps a slash in the tail, so it stays a real glob and must
8939        // match the trailing path at any depth.
8940        let matcher = ignore_matcher(&[b"**/Flutter/ephemeral"]);
8941        assert!(matcher.is_ignored(b"Flutter/ephemeral", true));
8942        assert!(matcher.is_ignored(b"a/Flutter/ephemeral", true));
8943        assert!(!matcher.is_ignored(b"Flutter/other", true));
8944    }
8945
8946    #[test]
8947    fn ignore_complex_globs_still_use_the_engine() {
8948        let matcher = ignore_matcher(&[b"*.[Cc]ache"]);
8949        assert!(matcher.is_ignored(b"x.cache", false));
8950        assert!(matcher.is_ignored(b"x.Cache", false));
8951        assert!(!matcher.is_ignored(b"x.xache", false));
8952        assert!(matches!(
8953            classify_ignore_pattern(b"*.[Cc]ache"),
8954            MatchKind::Glob
8955        ));
8956
8957        let matcher = ignore_matcher(&[b"Icon?"]);
8958        assert!(matcher.is_ignored(b"IconA", false));
8959        assert!(!matcher.is_ignored(b"Icon", false));
8960        assert!(!matcher.is_ignored(b"IconAB", false));
8961
8962        // Multi-star is not a simple prefix/suffix.
8963        assert!(matches!(
8964            classify_ignore_pattern(b"app.*.symbols"),
8965            MatchKind::Glob
8966        ));
8967        assert!(matches!(classify_ignore_pattern(b"a*b*c"), MatchKind::Glob));
8968    }
8969
8970    #[test]
8971    fn ignore_negation_still_applies_after_fast_paths() {
8972        // Last match wins: a negated literal un-ignores a suffix-matched file.
8973        let matcher = ignore_matcher(&[b"*.log", b"!keep.log"]);
8974        assert!(matcher.is_ignored(b"a/x.log", false));
8975        assert!(!matcher.is_ignored(b"a/keep.log", false));
8976    }
8977
8978    #[test]
8979    fn read_expected_object_missing_blob_exposes_oid_and_kind() {
8980        let root = temp_root();
8981        let git_dir = root.join(".git");
8982        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
8983        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
8984        let missing = ObjectId::empty_blob(ObjectFormat::Sha1);
8985
8986        let err = read_expected_object(&db, &missing, ObjectType::Blob)
8987            .expect_err("missing blob should error");
8988        let kind = err.not_found_kind().expect("typed not found");
8989        assert_eq!(kind.object_id(), Some(missing));
8990        assert_eq!(kind.missing_object_kind(), Some(MissingObjectKind::Blob));
8991        assert_eq!(
8992            kind.missing_object_context(),
8993            Some(MissingObjectContext::WorktreeMaterialize)
8994        );
8995        fs::remove_dir_all(root).expect("test operation should succeed");
8996    }
8997
8998    #[test]
8999    fn update_index_adds_file_entry_and_blob() {
9000        let root = temp_root();
9001        let git_dir = root.join(".git");
9002        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
9003        fs::write(root.join("hello.txt"), b"hello\n").expect("test operation should succeed");
9004        let result = add_paths_to_index(
9005            &root,
9006            &git_dir,
9007            ObjectFormat::Sha1,
9008            &[PathBuf::from("hello.txt")],
9009        )
9010        .expect("test operation should succeed");
9011        assert_eq!(result.entries, 1);
9012        let index = Index::parse_v2_sha1(
9013            &fs::read(repository_index_path(git_dir)).expect("test operation should succeed"),
9014        )
9015        .expect("test operation should succeed");
9016        assert_eq!(index.entries[0].path, b"hello.txt");
9017        fs::remove_dir_all(root).expect("test operation should succeed");
9018    }
9019
9020    #[test]
9021    fn update_index_and_write_tree_support_sha256() {
9022        let root = temp_root();
9023        let git_dir = root.join(".git");
9024        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
9025        fs::write(root.join("hello.txt"), b"hello\n").expect("test operation should succeed");
9026        let result = add_paths_to_index(
9027            &root,
9028            &git_dir,
9029            ObjectFormat::Sha256,
9030            &[PathBuf::from("hello.txt")],
9031        )
9032        .expect("test operation should succeed");
9033        assert_eq!(result.entries, 1);
9034
9035        let index = Index::parse(
9036            &fs::read(repository_index_path(&git_dir)).expect("test operation should succeed"),
9037            ObjectFormat::Sha256,
9038        )
9039        .expect("test operation should succeed");
9040        assert_eq!(index.entries[0].path, b"hello.txt");
9041        assert_eq!(index.entries[0].oid.format(), ObjectFormat::Sha256);
9042
9043        let tree_oid = write_tree_from_index(&git_dir, ObjectFormat::Sha256)
9044            .expect("test operation should succeed");
9045        assert_eq!(tree_oid.format(), ObjectFormat::Sha256);
9046        let odb = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
9047        let tree = odb
9048            .read_object(&tree_oid)
9049            .expect("test operation should succeed");
9050        assert_eq!(tree.object_type, ObjectType::Tree);
9051        fs::remove_dir_all(root).expect("test operation should succeed");
9052    }
9053
9054    #[test]
9055    fn write_tree_from_index_writes_nested_tree_objects() {
9056        let root = temp_root();
9057        let git_dir = root.join(".git");
9058        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
9059        fs::create_dir_all(root.join("src")).expect("test operation should succeed");
9060        fs::write(root.join("README.md"), b"readme\n").expect("test operation should succeed");
9061        fs::write(root.join("src").join("lib.rs"), b"pub fn demo() {}\n")
9062            .expect("test operation should succeed");
9063        let result = add_paths_to_index(
9064            &root,
9065            &git_dir,
9066            ObjectFormat::Sha1,
9067            &[PathBuf::from("README.md"), PathBuf::from("src/lib.rs")],
9068        )
9069        .expect("test operation should succeed");
9070        assert_eq!(result.entries, 2);
9071        let tree_oid = write_tree_from_index(&git_dir, ObjectFormat::Sha1)
9072            .expect("test operation should succeed");
9073        let odb = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
9074        let tree = odb
9075            .read_object(&tree_oid)
9076            .expect("test operation should succeed");
9077        assert_eq!(tree.object_type, ObjectType::Tree);
9078        fs::remove_dir_all(root).expect("test operation should succeed");
9079    }
9080
9081    #[test]
9082    fn short_status_reports_added_and_untracked_paths() {
9083        let root = temp_root();
9084        let git_dir = root.join(".git");
9085        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
9086        fs::write(root.join("hello.txt"), b"hello\n").expect("test operation should succeed");
9087        fs::write(root.join("extra.txt"), b"extra\n").expect("test operation should succeed");
9088        add_paths_to_index(
9089            &root,
9090            &git_dir,
9091            ObjectFormat::Sha1,
9092            &[PathBuf::from("hello.txt")],
9093        )
9094        .expect("test operation should succeed");
9095        let status = short_status(&root, &git_dir, ObjectFormat::Sha1)
9096            .expect("test operation should succeed");
9097        assert_eq!(
9098            status
9099                .iter()
9100                .map(ShortStatusEntry::line)
9101                .collect::<Vec<_>>(),
9102            vec!["A  hello.txt", "?? extra.txt"]
9103        );
9104        fs::remove_dir_all(root).expect("test operation should succeed");
9105    }
9106
9107    #[test]
9108    fn worktree_root_is_none_for_bare_repository() {
9109        // A bare git_dir (basename `.git`) with `core.bare = true` must resolve to
9110        // `Ok(None)` rather than falling through to the "parent of .git" case.
9111        let root = temp_root();
9112        let git_dir = root.join(".git");
9113        fs::create_dir_all(&git_dir).expect("create bare git dir");
9114        // Hermetic minimal config — do not depend on host gitconfig.
9115        fs::write(git_dir.join("config"), b"[core]\n\tbare = true\n").expect("write bare config");
9116
9117        assert_eq!(
9118            worktree_root_for_git_dir(&git_dir).expect("resolve bare worktree root"),
9119            None,
9120            "a bare repository has no working tree"
9121        );
9122
9123        fs::remove_dir_all(root).expect("test operation should succeed");
9124    }
9125
9126    #[test]
9127    fn worktree_root_is_parent_for_non_bare_dot_git() {
9128        // A non-bare `.git` directory (no core.bare / core.bare = false) still
9129        // resolves to its parent — the ordinary non-bare layout.
9130        let root = temp_root();
9131        let work = root.join("work");
9132        let git_dir = work.join(".git");
9133        fs::create_dir_all(&git_dir).expect("create non-bare git dir");
9134        fs::write(
9135            git_dir.join("config"),
9136            b"[core]\n\tbare = false\n",
9137        )
9138        .expect("write non-bare config");
9139
9140        assert_eq!(
9141            worktree_root_for_git_dir(&git_dir).expect("resolve non-bare worktree root"),
9142            Some(work.clone()),
9143            "a non-bare .git dir resolves to its parent"
9144        );
9145
9146        fs::remove_dir_all(root).expect("test operation should succeed");
9147    }
9148
9149    fn temp_root() -> PathBuf {
9150        let path = std::env::temp_dir().join(format!(
9151            "sley-worktree-{}-{}",
9152            std::process::id(),
9153            TEMP_COUNTER.fetch_add(1, Ordering::Relaxed)
9154        ));
9155        fs::create_dir_all(&path).expect("test operation should succeed");
9156        path
9157    }
9158
9159    fn index_entry_for<'a>(index: &'a Index, path: &[u8]) -> &'a IndexEntry {
9160        index
9161            .entries
9162            .iter()
9163            .find(|entry| entry.path == path)
9164            .unwrap_or_else(|| panic!("missing index entry for {}", String::from_utf8_lossy(path)))
9165    }
9166
9167    fn read_index(git_dir: &Path) -> Index {
9168        Index::parse(
9169            &fs::read(repository_index_path(git_dir)).expect("test operation should succeed"),
9170            ObjectFormat::Sha1,
9171        )
9172        .expect("test operation should succeed")
9173    }
9174
9175    /// Stages `paths` from the worktree, writes their tree, wraps it in a commit
9176    /// object, and points `refs/heads/main` + `HEAD` at it. Returns the commit
9177    /// id. After this call the index reflects the committed tree.
9178    fn build_commit(root: &Path, git_dir: &Path, paths: &[&str]) -> ObjectId {
9179        let path_bufs = paths.iter().map(PathBuf::from).collect::<Vec<_>>();
9180        add_paths_to_index(root, git_dir, ObjectFormat::Sha1, &path_bufs)
9181            .expect("test operation should succeed");
9182        let tree = write_tree_from_index(git_dir, ObjectFormat::Sha1)
9183            .expect("test operation should succeed");
9184        let mut body = Vec::new();
9185        body.extend_from_slice(format!("tree {tree}\n").as_bytes());
9186        body.extend_from_slice(b"author Test <test@example.com> 0 +0000\n");
9187        body.extend_from_slice(b"committer Test <test@example.com> 0 +0000\n");
9188        body.extend_from_slice(b"\n");
9189        body.extend_from_slice(b"sparse fixture\n");
9190        let odb = FileObjectDatabase::from_git_dir(git_dir, ObjectFormat::Sha1);
9191        let commit = odb
9192            .write_object(EncodedObject::new(ObjectType::Commit, body))
9193            .expect("test operation should succeed");
9194        let refs = FileRefStore::new(git_dir, ObjectFormat::Sha1);
9195        let mut tx = refs.transaction();
9196        tx.update(RefUpdate {
9197            name: "refs/heads/main".into(),
9198            expected: None,
9199            new: RefTarget::Direct(commit),
9200            reflog: None,
9201        });
9202        tx.update(RefUpdate {
9203            name: "HEAD".into(),
9204            expected: None,
9205            new: RefTarget::Symbolic("refs/heads/main".into()),
9206            reflog: None,
9207        });
9208        tx.commit().expect("test operation should succeed");
9209        commit
9210    }
9211
9212    fn full_sparse(patterns: &[&[u8]]) -> SparseCheckout {
9213        SparseCheckout {
9214            patterns: patterns.iter().map(|pattern| pattern.to_vec()).collect(),
9215            sparse_index: false,
9216        }
9217    }
9218
9219    #[test]
9220    fn apply_sparse_checkout_full_mode_skips_out_of_cone_paths() {
9221        let root = temp_root();
9222        let git_dir = root.join(".git");
9223        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
9224        fs::create_dir_all(root.join("in")).expect("test operation should succeed");
9225        fs::create_dir_all(root.join("out")).expect("test operation should succeed");
9226        fs::write(root.join("in").join("keep.txt"), b"keep\n")
9227            .expect("test operation should succeed");
9228        fs::write(root.join("out").join("drop.txt"), b"drop\n")
9229            .expect("test operation should succeed");
9230        fs::write(root.join("top.txt"), b"top\n").expect("test operation should succeed");
9231        build_commit(&root, &git_dir, &["in/keep.txt", "out/drop.txt", "top.txt"]);
9232
9233        // Full (non-cone) pattern: keep only the `in/` subtree.
9234        let sparse = full_sparse(&[b"/in/"]);
9235        let result = apply_sparse_checkout_with_mode(
9236            &root,
9237            &git_dir,
9238            ObjectFormat::Sha1,
9239            &sparse,
9240            SparseCheckoutMode::Full,
9241        )
9242        .expect("test operation should succeed");
9243
9244        assert!(root.join("in").join("keep.txt").exists());
9245        assert!(!root.join("out").join("drop.txt").exists());
9246        assert!(!root.join("top.txt").exists());
9247        assert!(result.materialized.contains(&b"in/keep.txt".to_vec()));
9248        assert!(result.skipped.contains(&b"out/drop.txt".to_vec()));
9249        assert!(result.skipped.contains(&b"top.txt".to_vec()));
9250
9251        let index = read_index(&git_dir);
9252        assert!(!index_entry_skip_worktree(index_entry_for(
9253            &index,
9254            b"in/keep.txt"
9255        )));
9256        assert!(index_entry_skip_worktree(index_entry_for(
9257            &index,
9258            b"out/drop.txt"
9259        )));
9260        assert!(index_entry_skip_worktree(index_entry_for(
9261            &index, b"top.txt"
9262        )));
9263        // Out-of-cone entries are preserved in the index, just not on disk.
9264        assert_eq!(index.entries.len(), 3);
9265        fs::remove_dir_all(root).expect("test operation should succeed");
9266    }
9267
9268    #[test]
9269    fn apply_sparse_checkout_toggle_rematerializes() {
9270        let root = temp_root();
9271        let git_dir = root.join(".git");
9272        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
9273        fs::create_dir_all(root.join("a")).expect("test operation should succeed");
9274        fs::create_dir_all(root.join("b")).expect("test operation should succeed");
9275        fs::write(root.join("a").join("file.txt"), b"a\n").expect("test operation should succeed");
9276        fs::write(root.join("b").join("file.txt"), b"b\n").expect("test operation should succeed");
9277        build_commit(&root, &git_dir, &["a/file.txt", "b/file.txt"]);
9278
9279        // First narrow to `a/`.
9280        apply_sparse_checkout_with_mode(
9281            &root,
9282            &git_dir,
9283            ObjectFormat::Sha1,
9284            &full_sparse(&[b"/a/"]),
9285            SparseCheckoutMode::Full,
9286        )
9287        .expect("test operation should succeed");
9288        assert!(root.join("a").join("file.txt").exists());
9289        assert!(!root.join("b").join("file.txt").exists());
9290        let index = read_index(&git_dir);
9291        assert!(index_entry_skip_worktree(index_entry_for(
9292            &index,
9293            b"b/file.txt"
9294        )));
9295
9296        // Now switch the cone to `b/`: `a/` must leave, `b/` must come back with
9297        // the correct content, and the skip-worktree bits must flip.
9298        apply_sparse_checkout_with_mode(
9299            &root,
9300            &git_dir,
9301            ObjectFormat::Sha1,
9302            &full_sparse(&[b"/b/"]),
9303            SparseCheckoutMode::Full,
9304        )
9305        .expect("test operation should succeed");
9306        assert!(!root.join("a").join("file.txt").exists());
9307        assert!(root.join("b").join("file.txt").exists());
9308        assert_eq!(
9309            fs::read(root.join("b").join("file.txt")).expect("test operation should succeed"),
9310            b"b\n"
9311        );
9312        let index = read_index(&git_dir);
9313        assert!(index_entry_skip_worktree(index_entry_for(
9314            &index,
9315            b"a/file.txt"
9316        )));
9317        assert!(!index_entry_skip_worktree(index_entry_for(
9318            &index,
9319            b"b/file.txt"
9320        )));
9321        fs::remove_dir_all(root).expect("test operation should succeed");
9322    }
9323
9324    #[test]
9325    fn apply_sparse_checkout_cone_mode_matches_directory_prefixes() {
9326        let root = temp_root();
9327        let git_dir = root.join(".git");
9328        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
9329        fs::create_dir_all(root.join("kept").join("nested"))
9330            .expect("test operation should succeed");
9331        fs::create_dir_all(root.join("other")).expect("test operation should succeed");
9332        fs::write(root.join("kept").join("a.txt"), b"a\n").expect("test operation should succeed");
9333        fs::write(root.join("kept").join("nested").join("b.txt"), b"b\n")
9334            .expect("test operation should succeed");
9335        fs::write(root.join("other").join("c.txt"), b"c\n").expect("test operation should succeed");
9336        fs::write(root.join("root.txt"), b"r\n").expect("test operation should succeed");
9337        build_commit(
9338            &root,
9339            &git_dir,
9340            &["kept/a.txt", "kept/nested/b.txt", "other/c.txt", "root.txt"],
9341        );
9342
9343        // Standard cone patterns: top-level files plus the whole `kept/` tree.
9344        let sparse = SparseCheckout {
9345            patterns: vec![b"/*".to_vec(), b"!/*/".to_vec(), b"/kept/".to_vec()],
9346            sparse_index: false,
9347        };
9348        // Auto mode should detect cone shape on its own.
9349        assert!(patterns_are_cone(&sparse.patterns));
9350        apply_sparse_checkout(&root, &git_dir, ObjectFormat::Sha1, &sparse)
9351            .expect("test operation should succeed");
9352
9353        assert!(root.join("root.txt").exists());
9354        assert!(root.join("kept").join("a.txt").exists());
9355        assert!(root.join("kept").join("nested").join("b.txt").exists());
9356        assert!(!root.join("other").join("c.txt").exists());
9357
9358        let index = read_index(&git_dir);
9359        assert!(!index_entry_skip_worktree(index_entry_for(
9360            &index,
9361            b"root.txt"
9362        )));
9363        assert!(!index_entry_skip_worktree(index_entry_for(
9364            &index,
9365            b"kept/a.txt"
9366        )));
9367        assert!(!index_entry_skip_worktree(index_entry_for(
9368            &index,
9369            b"kept/nested/b.txt"
9370        )));
9371        assert!(index_entry_skip_worktree(index_entry_for(
9372            &index,
9373            b"other/c.txt"
9374        )));
9375        fs::remove_dir_all(root).expect("test operation should succeed");
9376    }
9377
9378    #[test]
9379    fn apply_sparse_checkout_honors_preexisting_skip_worktree_via_idempotence() {
9380        let root = temp_root();
9381        let git_dir = root.join(".git");
9382        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
9383        fs::create_dir_all(root.join("in")).expect("test operation should succeed");
9384        fs::create_dir_all(root.join("out")).expect("test operation should succeed");
9385        fs::write(root.join("in").join("keep.txt"), b"keep\n")
9386            .expect("test operation should succeed");
9387        fs::write(root.join("out").join("drop.txt"), b"drop\n")
9388            .expect("test operation should succeed");
9389        build_commit(&root, &git_dir, &["in/keep.txt", "out/drop.txt"]);
9390
9391        let sparse = full_sparse(&[b"/in/"]);
9392        apply_sparse_checkout_with_mode(
9393            &root,
9394            &git_dir,
9395            ObjectFormat::Sha1,
9396            &sparse,
9397            SparseCheckoutMode::Full,
9398        )
9399        .expect("test operation should succeed");
9400        assert!(!root.join("out").join("drop.txt").exists());
9401
9402        // Re-applying the same spec is a no-op: the already-skipped file stays
9403        // absent and the bit stays set (we do not resurrect it).
9404        let result = apply_sparse_checkout_with_mode(
9405            &root,
9406            &git_dir,
9407            ObjectFormat::Sha1,
9408            &sparse,
9409            SparseCheckoutMode::Full,
9410        )
9411        .expect("test operation should succeed");
9412        assert!(!root.join("out").join("drop.txt").exists());
9413        assert!(root.join("in").join("keep.txt").exists());
9414        assert!(result.skipped.contains(&b"out/drop.txt".to_vec()));
9415        let index = read_index(&git_dir);
9416        assert!(index_entry_skip_worktree(index_entry_for(
9417            &index,
9418            b"out/drop.txt"
9419        )));
9420        fs::remove_dir_all(root).expect("test operation should succeed");
9421    }
9422
9423    #[test]
9424    fn checkout_detached_sparse_only_writes_in_cone_paths() {
9425        let root = temp_root();
9426        let git_dir = root.join(".git");
9427        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
9428        fs::create_dir_all(root.join("keep")).expect("test operation should succeed");
9429        fs::create_dir_all(root.join("skip")).expect("test operation should succeed");
9430        fs::write(root.join("keep").join("a.txt"), b"a\n").expect("test operation should succeed");
9431        fs::write(root.join("skip").join("b.txt"), b"b\n").expect("test operation should succeed");
9432        let commit = build_commit(&root, &git_dir, &["keep/a.txt", "skip/b.txt"]);
9433
9434        // The worktree is clean and matches the commit. A sparse checkout must
9435        // keep the in-cone file and evict the out-of-cone one.
9436        let sparse = full_sparse(&[b"/keep/"]);
9437        let result = checkout_detached_sparse(
9438            &root,
9439            &git_dir,
9440            ObjectFormat::Sha1,
9441            &commit,
9442            b"Test <test@example.com> 0 +0000".to_vec(),
9443            b"checkout".to_vec(),
9444            &sparse,
9445        )
9446        .expect("test operation should succeed");
9447        assert_eq!(result.files, 2);
9448
9449        assert!(root.join("keep").join("a.txt").exists());
9450        assert_eq!(
9451            fs::read(root.join("keep").join("a.txt")).expect("test operation should succeed"),
9452            b"a\n"
9453        );
9454        assert!(!root.join("skip").join("b.txt").exists());
9455
9456        let index = read_index(&git_dir);
9457        assert_eq!(index.entries.len(), 2);
9458        assert!(!index_entry_skip_worktree(index_entry_for(
9459            &index,
9460            b"keep/a.txt"
9461        )));
9462        let skipped = index_entry_for(&index, b"skip/b.txt");
9463        assert!(index_entry_skip_worktree(skipped));
9464        // The skipped entry still carries the committed blob id and mode.
9465        assert_eq!(skipped.mode, 0o100644);
9466        fs::remove_dir_all(root).expect("test operation should succeed");
9467    }
9468
9469    // ----- content filtering: EOL / autocrlf + clean/smudge drivers -----
9470
9471    /// Build a [`GitConfig`] from raw config text.
9472    fn config_from(text: &str) -> GitConfig {
9473        GitConfig::parse(text.as_bytes()).expect("test operation should succeed")
9474    }
9475
9476    /// Resolve attribute checks against an on-disk `.gitattributes` in `root`.
9477    fn attrs(root: &Path, path: &[u8]) -> Vec<AttributeCheck> {
9478        filter_attribute_checks(root, path).expect("test operation should succeed")
9479    }
9480
9481    #[test]
9482    fn standard_attribute_matcher_matches_per_path_lookup() {
9483        let root = temp_root();
9484        fs::create_dir_all(root.join(".git").join("info"))
9485            .expect("test operation should succeed");
9486        fs::create_dir_all(root.join("src").join("nested"))
9487            .expect("test operation should succeed");
9488        fs::write(root.join(".gitattributes"), b"*.rs diff=rust\n")
9489            .expect("test operation should succeed");
9490        fs::write(root.join("src").join(".gitattributes"), b"*.rs diff=python\n")
9491            .expect("test operation should succeed");
9492        fs::write(
9493            root.join(".git").join("info").join("attributes"),
9494            b"src/nested/*.rs diff=java\n",
9495        )
9496        .expect("test operation should succeed");
9497
9498        let requested = vec![b"diff".to_vec()];
9499        let path = b"src/nested/file.rs";
9500        let per_path = standard_attributes_for_path(&root, path, &requested, false)
9501            .expect("test operation should succeed");
9502        let matcher = StandardAttributeMatcher::from_worktree_root(&root)
9503            .expect("test operation should succeed");
9504        assert_eq!(
9505            matcher.attributes_for_path(path, &requested, false),
9506            per_path
9507        );
9508
9509        fs::remove_dir_all(root).expect("test operation should succeed");
9510    }
9511
9512    #[test]
9513    fn crlf_to_lf_collapses_only_pairs() {
9514        assert_eq!(convert_crlf_to_lf(b"a\r\nb\r\n"), b"a\nb\n");
9515        // A lone CR (no following LF) is preserved.
9516        assert_eq!(convert_crlf_to_lf(b"a\rb"), b"a\rb");
9517        // An already-LF stream is unchanged.
9518        assert_eq!(convert_crlf_to_lf(b"a\nb\n"), b"a\nb\n");
9519    }
9520
9521    #[test]
9522    fn lf_to_crlf_does_not_double_convert() {
9523        assert_eq!(convert_lf_to_crlf(b"a\nb\n"), b"a\r\nb\r\n");
9524        // Existing CRLF is left intact (no extra CR added).
9525        assert_eq!(convert_lf_to_crlf(b"a\r\nb\r\n"), b"a\r\nb\r\n");
9526    }
9527
9528    #[test]
9529    fn autocrlf_round_trip_clean_then_smudge() {
9530        // autocrlf=true: worktree CRLF -> blob LF on clean, blob LF -> worktree
9531        // CRLF on smudge.
9532        let config = config_from("[core]\n\tautocrlf = true\n");
9533        let checks: Vec<AttributeCheck> = Vec::new();
9534        let worktree = b"line1\r\nline2\r\n";
9535        let blob = apply_clean_filter_with_attributes(&config, &checks, b"file.txt", worktree)
9536            .expect("test operation should succeed");
9537        assert_eq!(blob, b"line1\nline2\n", "clean must normalize CRLF to LF");
9538        let restored = apply_smudge_filter_with_attributes(&config, &checks, b"file.txt", &blob)
9539            .expect("test operation should succeed");
9540        assert_eq!(
9541            restored, worktree,
9542            "smudge must restore CRLF from the LF blob"
9543        );
9544    }
9545
9546    #[test]
9547    fn autocrlf_input_normalizes_on_clean_but_not_smudge() {
9548        // autocrlf=input: clean normalizes to LF, smudge leaves LF as-is.
9549        let config = config_from("[core]\n\tautocrlf = input\n");
9550        let checks: Vec<AttributeCheck> = Vec::new();
9551        let blob = apply_clean_filter_with_attributes(&config, &checks, b"file.txt", b"a\r\nb\r\n")
9552            .expect("test operation should succeed");
9553        assert_eq!(blob, b"a\nb\n");
9554        let smudged = apply_smudge_filter_with_attributes(&config, &checks, b"file.txt", &blob)
9555            .expect("test operation should succeed");
9556        assert_eq!(
9557            smudged, b"a\nb\n",
9558            "input mode must not add carriage returns"
9559        );
9560    }
9561
9562    #[test]
9563    fn eol_crlf_attribute_drives_conversion_without_config() {
9564        // No core.autocrlf; the `eol=crlf` attribute alone forces conversion.
9565        let config = config_from("");
9566        let checks = vec![AttributeCheck {
9567            attribute: b"eol".to_vec(),
9568            state: Some(AttributeState::Value(b"crlf".to_vec())),
9569        }];
9570        let blob = apply_clean_filter_with_attributes(&config, &checks, b"a.txt", b"x\r\ny\r\n")
9571            .expect("test operation should succeed");
9572        assert_eq!(blob, b"x\ny\n");
9573        let smudged = apply_smudge_filter_with_attributes(&config, &checks, b"a.txt", &blob)
9574            .expect("test operation should succeed");
9575        assert_eq!(smudged, b"x\r\ny\r\n");
9576    }
9577
9578    #[test]
9579    fn binary_attribute_disables_eol_conversion() {
9580        // `-text` (binary) must leave CRLF/NUL content untouched in both
9581        // directions even when autocrlf=true.
9582        let config = config_from("[core]\n\tautocrlf = true\n");
9583        let checks = vec![AttributeCheck {
9584            attribute: b"text".to_vec(),
9585            state: Some(AttributeState::Unset),
9586        }];
9587        let content = b"\x00\x01\r\n\x02\r\n".to_vec();
9588        let blob = apply_clean_filter_with_attributes(&config, &checks, b"data.bin", &content)
9589            .expect("test operation should succeed");
9590        assert_eq!(blob, content, "binary file must not be CRLF-normalized");
9591        let smudged = apply_smudge_filter_with_attributes(&config, &checks, b"data.bin", &blob)
9592            .expect("test operation should succeed");
9593        assert_eq!(
9594            smudged, content,
9595            "binary file must not gain carriage returns"
9596        );
9597    }
9598
9599    #[test]
9600    fn autocrlf_auto_skips_binary_looking_content() {
9601        // text=auto (via autocrlf) must not convert content that contains NUL.
9602        let config = config_from("[core]\n\tautocrlf = true\n");
9603        let checks: Vec<AttributeCheck> = Vec::new();
9604        let content = b"a\r\n\x00b\r\n".to_vec();
9605        let blob = apply_clean_filter_with_attributes(&config, &checks, b"f", &content)
9606            .expect("test operation should succeed");
9607        assert_eq!(blob, content, "binary-looking content stays untouched");
9608    }
9609
9610    #[test]
9611    fn autocrlf_via_add_and_checkout_round_trips() {
9612        // End-to-end: a CRLF worktree file is stored as an LF blob by the
9613        // filtered add path, and restored as CRLF by the filtered checkout.
9614        let root = temp_root();
9615        let git_dir = root.join(".git");
9616        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
9617        let config = config_from("[core]\n\tautocrlf = true\n");
9618
9619        fs::write(root.join("crlf.txt"), b"alpha\r\nbeta\r\n")
9620            .expect("test operation should succeed");
9621        add_paths_to_index_filtered(
9622            &root,
9623            &git_dir,
9624            ObjectFormat::Sha1,
9625            &[PathBuf::from("crlf.txt")],
9626            &config,
9627        )
9628        .expect("test operation should succeed");
9629
9630        // The stored blob must be LF-normalized.
9631        let index = read_index(&git_dir);
9632        let entry = index_entry_for(&index, b"crlf.txt");
9633        let odb = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
9634        let blob = odb
9635            .read_object(&entry.oid)
9636            .expect("test operation should succeed");
9637        assert_eq!(blob.body, b"alpha\nbeta\n");
9638
9639        // Commit and point HEAD at it, then re-checkout with smudge filtering.
9640        let tree = write_tree_from_index(&git_dir, ObjectFormat::Sha1)
9641            .expect("test operation should succeed");
9642        let mut body = Vec::new();
9643        body.extend_from_slice(format!("tree {tree}\n").as_bytes());
9644        body.extend_from_slice(b"author T <t@e> 0 +0000\ncommitter T <t@e> 0 +0000\n\nm\n");
9645        let odb = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
9646        let commit = odb
9647            .write_object(EncodedObject::new(ObjectType::Commit, body))
9648            .expect("test operation should succeed");
9649        let refs = FileRefStore::new(&git_dir, ObjectFormat::Sha1);
9650        let mut tx = refs.transaction();
9651        tx.update(RefUpdate {
9652            name: "HEAD".into(),
9653            expected: None,
9654            new: RefTarget::Direct(commit),
9655            reflog: None,
9656        });
9657        tx.commit().expect("test operation should succeed");
9658
9659        // Make the worktree match the committed (LF) blob so the tree is clean
9660        // for checkout; `short_status`/`worktree_entries` compare by content
9661        // hash and are not filter-aware. Checkout will then smudge it to CRLF.
9662        fs::write(root.join("crlf.txt"), b"alpha\nbeta\n").expect("test operation should succeed");
9663        checkout_detached_filtered(
9664            &root,
9665            &git_dir,
9666            ObjectFormat::Sha1,
9667            &commit,
9668            b"T <t@e> 0 +0000".to_vec(),
9669            b"co".to_vec(),
9670            &config,
9671        )
9672        .expect("test operation should succeed");
9673        assert_eq!(
9674            fs::read(root.join("crlf.txt")).expect("test operation should succeed"),
9675            b"alpha\r\nbeta\r\n",
9676            "checkout must restore CRLF line endings"
9677        );
9678        fs::remove_dir_all(root).expect("test operation should succeed");
9679    }
9680
9681    #[test]
9682    fn driver_filter_clean_and_smudge_transform_both_directions() {
9683        // filter=case: clean upper-cases (worktree -> blob), smudge lower-cases
9684        // (blob -> worktree).
9685        let config =
9686            config_from("[filter \"case\"]\n\tclean = tr a-z A-Z\n\tsmudge = tr A-Z a-z\n");
9687        let checks = vec![AttributeCheck {
9688            attribute: b"filter".to_vec(),
9689            state: Some(AttributeState::Value(b"case".to_vec())),
9690        }];
9691        let blob = apply_clean_filter_with_attributes(&config, &checks, b"f.txt", b"Hello World")
9692            .expect("test operation should succeed");
9693        assert_eq!(blob, b"HELLO WORLD", "clean driver must upper-case");
9694        let worktree =
9695            apply_smudge_filter_with_attributes(&config, &checks, b"f.txt", b"HELLO WORLD")
9696                .expect("test operation should succeed");
9697        assert_eq!(worktree, b"hello world", "smudge driver must lower-case");
9698    }
9699
9700    #[test]
9701    fn driver_filter_resolved_from_gitattributes_file() {
9702        // The filter name is read from a real `.gitattributes`, the commands from
9703        // config; exercises the public worktree-rooted entry points.
9704        let root = temp_root();
9705        let git_dir = root.join(".git");
9706        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
9707        fs::write(root.join(".gitattributes"), b"*.dat filter=rot\n")
9708            .expect("test operation should succeed");
9709        let config =
9710            config_from("[filter \"rot\"]\n\tclean = sed s/a/b/g\n\tsmudge = sed s/b/a/g\n");
9711        // Clean reads attributes from the live worktree `.gitattributes`.
9712        let blob = apply_clean_filter(&root, &git_dir, &config, b"x.dat", b"banana")
9713            .expect("test operation should succeed");
9714        assert_eq!(blob, b"bbnbnb");
9715        // Smudge reads attributes from the index (the worktree file may not
9716        // exist yet during checkout), so stage `.gitattributes` first.
9717        add_paths_to_index(
9718            &root,
9719            &git_dir,
9720            ObjectFormat::Sha1,
9721            &[PathBuf::from(".gitattributes")],
9722        )
9723        .expect("test operation should succeed");
9724        let smudged = apply_smudge_filter(
9725            &root,
9726            &git_dir,
9727            ObjectFormat::Sha1,
9728            &config,
9729            b"x.dat",
9730            &blob,
9731        )
9732        .expect("test operation should succeed");
9733        // sed s/b/a/g is not a perfect inverse, but verifies the smudge command
9734        // ran on the blob bytes.
9735        assert_eq!(smudged, b"aanana");
9736        fs::remove_dir_all(root).expect("test operation should succeed");
9737    }
9738
9739    #[test]
9740    fn required_filter_failure_is_fatal() {
9741        // A required filter whose command fails must surface an error.
9742        let config = config_from("[filter \"boom\"]\n\tclean = false\n\trequired = true\n");
9743        let checks = vec![AttributeCheck {
9744            attribute: b"filter".to_vec(),
9745            state: Some(AttributeState::Value(b"boom".to_vec())),
9746        }];
9747        let err = apply_clean_filter_with_attributes(&config, &checks, b"f", b"data")
9748            .expect_err("required filter failure must error");
9749        assert!(matches!(err, GitError::Command(_)), "got {err:?}");
9750    }
9751
9752    #[test]
9753    fn required_filter_missing_command_is_fatal() {
9754        // required=true but no clean command for this direction is also fatal.
9755        let config = config_from("[filter \"need\"]\n\tsmudge = cat\n\trequired = true\n");
9756        let checks = vec![AttributeCheck {
9757            attribute: b"filter".to_vec(),
9758            state: Some(AttributeState::Value(b"need".to_vec())),
9759        }];
9760        let err = apply_clean_filter_with_attributes(&config, &checks, b"f", b"data")
9761            .expect_err("required filter without a clean command must error");
9762        assert!(matches!(err, GitError::Command(_)), "got {err:?}");
9763    }
9764
9765    #[test]
9766    fn non_required_filter_failure_passes_through() {
9767        // A non-required filter that fails must pass the content through
9768        // unchanged rather than erroring.
9769        let config = config_from("[filter \"opt\"]\n\tclean = false\n");
9770        let checks = vec![AttributeCheck {
9771            attribute: b"filter".to_vec(),
9772            state: Some(AttributeState::Value(b"opt".to_vec())),
9773        }];
9774        let out = apply_clean_filter_with_attributes(&config, &checks, b"f", b"keepme")
9775            .expect("test operation should succeed");
9776        assert_eq!(
9777            out, b"keepme",
9778            "optional filter failure passes content through"
9779        );
9780    }
9781
9782    #[test]
9783    fn filter_with_no_command_is_noop() {
9784        // filter=name with no configured commands and not required is ignored.
9785        let config = config_from("");
9786        let checks = vec![AttributeCheck {
9787            attribute: b"filter".to_vec(),
9788            state: Some(AttributeState::Value(b"ghost".to_vec())),
9789        }];
9790        let out = apply_clean_filter_with_attributes(&config, &checks, b"f", b"unchanged")
9791            .expect("test operation should succeed");
9792        assert_eq!(out, b"unchanged");
9793    }
9794
9795    #[test]
9796    fn driver_and_eol_compose_on_clean_and_smudge() {
9797        // filter=case + autocrlf=true: clean runs the driver then CRLF->LF;
9798        // smudge runs LF->CRLF then the driver.
9799        let config = config_from(
9800            "[core]\n\tautocrlf = true\n[filter \"case\"]\n\tclean = tr a-z A-Z\n\tsmudge = tr A-Z a-z\n",
9801        );
9802        let checks = vec![
9803            AttributeCheck {
9804                attribute: b"filter".to_vec(),
9805                state: Some(AttributeState::Value(b"case".to_vec())),
9806            },
9807            AttributeCheck {
9808                attribute: b"text".to_vec(),
9809                state: Some(AttributeState::Set),
9810            },
9811        ];
9812        let blob = apply_clean_filter_with_attributes(&config, &checks, b"f.txt", b"ab\r\ncd\r\n")
9813            .expect("test operation should succeed");
9814        assert_eq!(blob, b"AB\nCD\n", "clean: upper-case then CRLF->LF");
9815        let worktree = apply_smudge_filter_with_attributes(&config, &checks, b"f.txt", &blob)
9816            .expect("test operation should succeed");
9817        assert_eq!(
9818            worktree, b"ab\r\ncd\r\n",
9819            "smudge: LF->CRLF then lower-case"
9820        );
9821    }
9822
9823    #[test]
9824    fn attrs_helper_reads_filter_from_disk() {
9825        let root = temp_root();
9826        fs::write(root.join(".gitattributes"), b"*.txt text\n*.bin -text\n")
9827            .expect("test operation should succeed");
9828        let text = attrs(&root, b"a.txt");
9829        assert!(
9830            text.iter()
9831                .any(|c| c.attribute == b"text" && c.state == Some(AttributeState::Set))
9832        );
9833        let bin = attrs(&root, b"a.bin");
9834        assert!(
9835            bin.iter()
9836                .any(|c| c.attribute == b"text" && c.state == Some(AttributeState::Unset))
9837        );
9838        fs::remove_dir_all(root).expect("test operation should succeed");
9839    }
9840
9841    /// Builds a stat cache holding a single stage-0 entry whose size+mtime match
9842    /// `file`'s real metadata, with the index-file mtime placed strictly after
9843    /// the entry mtime so the entry reads as non-racy by default. The entry's oid
9844    /// is `oid` and its mode is `mode`.
9845    fn stat_cache_for(file: &Path, oid: ObjectId, mode: u32) -> (IndexStatCache, IndexEntry) {
9846        let metadata = fs::metadata(file).expect("test operation should succeed");
9847        let mut entry = index_entry_from_metadata(b"f.txt".to_vec(), oid, &metadata);
9848        entry.mode = mode;
9849        let index_mtime = Some((u64::from(entry.mtime_seconds) + 10, 0));
9850        let mut entries = HashMap::new();
9851        entries.insert(entry.path.as_bytes().to_vec(), entry.clone());
9852        (
9853            IndexStatCache {
9854                entries,
9855                index_mtime,
9856            },
9857            entry,
9858        )
9859    }
9860
9861    #[test]
9862    fn reuse_tracked_entry_only_reuses_clean_non_racy_match() {
9863        let root = temp_root();
9864        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
9865        let file = root.join("f.txt");
9866        let metadata = fs::metadata(&file).expect("test operation should succeed");
9867        let real_mode = file_mode(&metadata);
9868        let oid = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec())
9869            .object_id(ObjectFormat::Sha1)
9870            .expect("test operation should succeed");
9871
9872        // Clean, non-racy, matching stat + mode -> reuse the cached oid.
9873        let (cache, _) = stat_cache_for(&file, oid, real_mode);
9874        let reused = cache.reuse_tracked_entry(b"f.txt", &metadata);
9875        assert_eq!(
9876            reused,
9877            Some(TrackedEntry {
9878                mode: real_mode,
9879                oid,
9880            }),
9881            "a clean non-racy stat+mode match must reuse the staged oid"
9882        );
9883
9884        // No stage-0 entry for the path -> must hash.
9885        assert_eq!(
9886            cache.reuse_tracked_entry(b"other.txt", &metadata),
9887            None,
9888            "a path with no cached entry must fall through to hashing"
9889        );
9890
9891        // Size differs from the file -> must hash.
9892        let (mut size_cache, mut shrunk) = stat_cache_for(&file, oid, real_mode);
9893        shrunk.size = shrunk.size.saturating_sub(1);
9894        size_cache.entries.insert(shrunk.path.to_vec(), shrunk);
9895        assert_eq!(
9896            size_cache.reuse_tracked_entry(b"f.txt", &metadata),
9897            None,
9898            "a size mismatch must fall through to hashing"
9899        );
9900
9901        // Mode differs (e.g. a chmod that did not move mtime) -> must hash.
9902        let (mode_cache, _) = stat_cache_for(&file, oid, 0o100755);
9903        assert_eq!(
9904            mode_cache.reuse_tracked_entry(b"f.txt", &metadata),
9905            None,
9906            "a mode mismatch must fall through to hashing"
9907        );
9908
9909        // Racily clean (index mtime not strictly after the entry mtime) -> hash.
9910        let (mut racy_cache, entry) = stat_cache_for(&file, oid, real_mode);
9911        racy_cache.index_mtime = Some((
9912            u64::from(entry.mtime_seconds),
9913            u64::from(entry.mtime_nanoseconds),
9914        ));
9915        assert_eq!(
9916            racy_cache.reuse_tracked_entry(b"f.txt", &metadata),
9917            None,
9918            "a racily-clean entry must always be re-hashed"
9919        );
9920
9921        // Unknown index mtime is treated as racy -> hash.
9922        let (mut unknown_cache, _) = stat_cache_for(
9923            &file,
9924            EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec())
9925                .object_id(ObjectFormat::Sha1)
9926                .expect("test operation should succeed"),
9927            real_mode,
9928        );
9929        unknown_cache.index_mtime = None;
9930        assert_eq!(
9931            unknown_cache.reuse_tracked_entry(b"f.txt", &metadata),
9932            None,
9933            "an unknown index mtime must be treated conservatively as racy"
9934        );
9935
9936        fs::remove_dir_all(root).expect("test operation should succeed");
9937    }
9938
9939    #[test]
9940    fn index_stat_probe_cache_serves_many_paths_from_one_index_parse() {
9941        let root = temp_root();
9942        let git_dir = root.join(".git");
9943        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
9944        fs::write(root.join("a.txt"), b"alpha\n").expect("test operation should succeed");
9945        fs::write(root.join("b.txt"), b"bravo\n").expect("test operation should succeed");
9946        build_commit(&root, &git_dir, &["a.txt", "b.txt"]);
9947
9948        let cache = IndexStatProbeCache::from_repository_index(&git_dir, ObjectFormat::Sha1)
9949            .expect("probe cache");
9950        assert_eq!(cache.len(), 2);
9951        assert!(cache.contains_git_path(b"a.txt"));
9952        assert!(cache.contains_git_path(b"b.txt"));
9953        let a = cache.probe_for_git_path(b"a.txt").expect("a probe");
9954        let b = cache.probe_for_git_path(b"b.txt").expect("b probe");
9955        assert_eq!(a.entry().path, b"a.txt");
9956        assert_eq!(b.entry().path, b"b.txt");
9957        assert_eq!(a.index_mtime(), cache.index_mtime());
9958        assert_eq!(b.index_mtime(), cache.index_mtime());
9959        assert!(
9960            cache.probe_for_git_path(b"missing.txt").is_none(),
9961            "missing paths should not allocate probes"
9962        );
9963
9964        let one_shot =
9965            IndexStatProbe::from_repository_index(&git_dir, ObjectFormat::Sha1, b"a.txt")
9966                .expect("legacy one-shot probe")
9967                .expect("a probe");
9968        assert_eq!(one_shot.entry().path, b"a.txt");
9969        assert_eq!(one_shot.index_mtime(), cache.index_mtime());
9970
9971        fs::remove_dir_all(root).expect("test operation should succeed");
9972    }
9973
9974    #[test]
9975    fn short_status_detects_same_length_content_change() {
9976        let root = temp_root();
9977        let git_dir = root.join(".git");
9978        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
9979        fs::write(root.join("f.txt"), b"aaaa\n").expect("test operation should succeed");
9980        build_commit(&root, &git_dir, &["f.txt"]);
9981        // Overwrite with the SAME byte length but different content. Right after
9982        // staging the entry is racily clean (index mtime >= entry mtime), so the
9983        // stat shortcut must not be trusted and the change must surface as M.
9984        fs::write(root.join("f.txt"), b"bbbb\n").expect("test operation should succeed");
9985        let status = short_status(&root, &git_dir, ObjectFormat::Sha1)
9986            .expect("test operation should succeed");
9987        assert_eq!(
9988            status
9989                .iter()
9990                .map(ShortStatusEntry::line)
9991                .collect::<Vec<_>>(),
9992            vec![" M f.txt"],
9993            "a same-length content change must be reported modified"
9994        );
9995        fs::remove_dir_all(root).expect("test operation should succeed");
9996    }
9997
9998    #[test]
9999    fn short_status_clean_after_byte_identical_rewrite() {
10000        let root = temp_root();
10001        let git_dir = root.join(".git");
10002        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
10003        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
10004        build_commit(&root, &git_dir, &["f.txt"]);
10005        // Rewrite with byte-identical content; the mtime moves so the stat
10006        // shortcut declines to reuse and the fallback hash proves it clean.
10007        std::thread::sleep(std::time::Duration::from_millis(20));
10008        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
10009        let status = short_status(&root, &git_dir, ObjectFormat::Sha1)
10010            .expect("test operation should succeed");
10011        assert!(
10012            status.is_empty(),
10013            "a byte-identical rewrite must be clean via the fallback hash, got {status:?}"
10014        );
10015        fs::remove_dir_all(root).expect("test operation should succeed");
10016    }
10017
10018    #[test]
10019    fn short_status_trusts_stat_cache_and_skips_rehash() {
10020        let root = temp_root();
10021        let git_dir = root.join(".git");
10022        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
10023        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
10024        build_commit(&root, &git_dir, &["f.txt"]);
10025
10026        // Plant a BOGUS oid in the stage-0 entry while preserving its size+mtime,
10027        // so a real re-hash of the (unchanged) worktree file would NOT match it.
10028        let index_path = repository_index_path(&git_dir);
10029        let mut index = read_index(&git_dir);
10030        let bogus = ObjectId::from_hex(ObjectFormat::Sha1, &"0".repeat(40))
10031            .expect("test operation should succeed");
10032        let real_oid = index_entry_for(&index, b"f.txt").oid;
10033        assert_ne!(
10034            real_oid, bogus,
10035            "fixture oid should differ from the bogus oid"
10036        );
10037        index
10038            .entries
10039            .iter_mut()
10040            .find(|entry| entry.path == b"f.txt")
10041            .expect("test operation should succeed")
10042            .oid = bogus.clone();
10043        fs::write(
10044            &index_path,
10045            index
10046                .write(ObjectFormat::Sha1)
10047                .expect("test operation should succeed"),
10048        )
10049        .expect("test operation should succeed");
10050
10051        // Make the index file STRICTLY newer than the entry mtime (non-racy) by
10052        // waiting past one-second filesystem granularity and rewriting it, so the
10053        // racy-clean guard does not force a re-hash.
10054        std::thread::sleep(std::time::Duration::from_millis(1100));
10055        fs::write(
10056            &index_path,
10057            fs::read(&index_path).expect("test operation should succeed"),
10058        )
10059        .expect("test operation should succeed");
10060
10061        // The file is unchanged on disk, so a trusted stat reuses the bogus index
10062        // oid for the worktree entry: worktree-oid == index-oid == bogus, so the
10063        // WORKTREE column is clean. Had status re-hashed the file, the real oid
10064        // would differ from the bogus index oid and the worktree column would be
10065        // 'M'. (The index-vs-HEAD column is 'M' because we corrupted the index
10066        // oid away from HEAD; that is expected and not what this test asserts.)
10067        let status = short_status(&root, &git_dir, ObjectFormat::Sha1)
10068            .expect("test operation should succeed");
10069        let entry = status
10070            .iter()
10071            .find(|entry| entry.path == b"f.txt")
10072            .expect("f.txt should appear (its index oid now differs from HEAD)");
10073        assert_eq!(
10074            entry.worktree, b' ',
10075            "non-racy stat match must trust the cached oid (no re-hash); worktree column was {}",
10076            entry.worktree as char
10077        );
10078        assert_eq!(
10079            entry.index_oid.as_ref(),
10080            Some(&bogus),
10081            "the worktree entry must have reused the planted bogus index oid, not the real hash"
10082        );
10083
10084        fs::remove_dir_all(root).expect("test operation should succeed");
10085    }
10086
10087    #[test]
10088    fn worktree_entry_state_detects_same_size_content_change() {
10089        let root = temp_root();
10090        let git_dir = root.join(".git");
10091        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
10092        fs::write(root.join("f.txt"), b"aaaa\n").expect("test operation should succeed");
10093        build_commit(&root, &git_dir, &["f.txt"]);
10094        let index = read_index(&git_dir);
10095        let entry = index_entry_for(&index, b"f.txt").clone();
10096        let probe = IndexStatProbe::from_index_entry_and_index_path(
10097            entry.clone(),
10098            repository_index_path(&git_dir),
10099        );
10100
10101        fs::write(root.join("f.txt"), b"bbbb\n").expect("test operation should succeed");
10102        let state = worktree_entry_state(
10103            &root,
10104            &git_dir,
10105            ObjectFormat::Sha1,
10106            Path::new("f.txt"),
10107            &entry.oid,
10108            entry.mode,
10109            Some(&probe),
10110        )
10111        .expect("test operation should succeed");
10112        assert_eq!(state, WorktreeEntryState::Modified);
10113
10114        fs::remove_dir_all(root).expect("test operation should succeed");
10115    }
10116
10117    #[test]
10118    fn worktree_entry_state_reports_deleted_for_missing_and_parent_not_directory() {
10119        let root = temp_root();
10120        let git_dir = root.join(".git");
10121        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
10122        fs::create_dir_all(root.join("dir")).expect("test operation should succeed");
10123        fs::write(root.join("dir").join("f.txt"), b"hello\n")
10124            .expect("test operation should succeed");
10125        build_commit(&root, &git_dir, &["dir/f.txt"]);
10126        let index = read_index(&git_dir);
10127        let entry = index_entry_for(&index, b"dir/f.txt").clone();
10128
10129        fs::remove_file(root.join("dir").join("f.txt")).expect("test operation should succeed");
10130        let missing = worktree_entry_state_by_git_path(
10131            &root,
10132            &git_dir,
10133            ObjectFormat::Sha1,
10134            b"dir/f.txt",
10135            &entry.oid,
10136            entry.mode,
10137            None,
10138        )
10139        .expect("test operation should succeed");
10140        assert_eq!(missing, WorktreeEntryState::Deleted);
10141
10142        fs::remove_dir(root.join("dir")).expect("test operation should succeed");
10143        fs::write(root.join("dir"), b"not a directory").expect("test operation should succeed");
10144        let parent_not_directory = worktree_entry_state_by_git_path(
10145            &root,
10146            &git_dir,
10147            ObjectFormat::Sha1,
10148            b"dir/f.txt",
10149            &entry.oid,
10150            entry.mode,
10151            None,
10152        )
10153        .expect("test operation should succeed");
10154        assert_eq!(parent_not_directory, WorktreeEntryState::Deleted);
10155
10156        fs::remove_dir_all(root).expect("test operation should succeed");
10157    }
10158
10159    #[test]
10160    fn worktree_entry_state_trusts_clean_non_racy_probe() {
10161        let root = temp_root();
10162        let git_dir = root.join(".git");
10163        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
10164        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
10165        build_commit(&root, &git_dir, &["f.txt"]);
10166        let index_path = repository_index_path(&git_dir);
10167        let mut index = read_index(&git_dir);
10168        let bogus = ObjectId::from_hex(ObjectFormat::Sha1, &"1".repeat(40))
10169            .expect("test operation should succeed");
10170        index
10171            .entries
10172            .iter_mut()
10173            .find(|entry| entry.path == b"f.txt")
10174            .expect("test operation should succeed")
10175            .oid = bogus;
10176        fs::write(
10177            &index_path,
10178            index
10179                .write(ObjectFormat::Sha1)
10180                .expect("test operation should succeed"),
10181        )
10182        .expect("test operation should succeed");
10183        std::thread::sleep(std::time::Duration::from_millis(1100));
10184        fs::write(
10185            &index_path,
10186            fs::read(&index_path).expect("test operation should succeed"),
10187        )
10188        .expect("test operation should succeed");
10189        let index = read_index(&git_dir);
10190        let entry = index_entry_for(&index, b"f.txt").clone();
10191        let probe = IndexStatProbe::from_index_entry_and_index_path(
10192            entry.clone(),
10193            repository_index_path(&git_dir),
10194        );
10195
10196        let state = worktree_entry_state(
10197            &root,
10198            &git_dir,
10199            ObjectFormat::Sha1,
10200            Path::new("f.txt"),
10201            &entry.oid,
10202            entry.mode,
10203            Some(&probe),
10204        )
10205        .expect("test operation should succeed");
10206        assert_eq!(
10207            state,
10208            WorktreeEntryState::Clean,
10209            "a non-racy stat match must be enough to prove this path clean"
10210        );
10211
10212        fs::remove_dir_all(root).expect("test operation should succeed");
10213    }
10214
10215    #[test]
10216    fn worktree_entry_state_rehashes_racy_probe() {
10217        let root = temp_root();
10218        let git_dir = root.join(".git");
10219        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
10220        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
10221        build_commit(&root, &git_dir, &["f.txt"]);
10222        let index = read_index(&git_dir);
10223        let mut entry = index_entry_for(&index, b"f.txt").clone();
10224        entry.oid = ObjectId::from_hex(ObjectFormat::Sha1, &"2".repeat(40))
10225            .expect("test operation should succeed");
10226        let probe = IndexStatProbe::from_index_entry(
10227            entry.clone(),
10228            Some((
10229                u64::from(entry.mtime_seconds),
10230                u64::from(entry.mtime_nanoseconds),
10231            )),
10232        );
10233
10234        let state = worktree_entry_state(
10235            &root,
10236            &git_dir,
10237            ObjectFormat::Sha1,
10238            Path::new("f.txt"),
10239            &entry.oid,
10240            entry.mode,
10241            Some(&probe),
10242        )
10243        .expect("test operation should succeed");
10244        assert_eq!(
10245            state,
10246            WorktreeEntryState::Modified,
10247            "a racily-clean stat match must fall through to hashing"
10248        );
10249
10250        fs::remove_dir_all(root).expect("test operation should succeed");
10251    }
10252
10253    #[cfg(unix)]
10254    #[test]
10255    fn worktree_entry_state_detects_chmod_only_change() {
10256        use std::os::unix::fs::PermissionsExt;
10257
10258        let root = temp_root();
10259        let git_dir = root.join(".git");
10260        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
10261        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
10262        build_commit(&root, &git_dir, &["f.txt"]);
10263        let index = read_index(&git_dir);
10264        let entry = index_entry_for(&index, b"f.txt").clone();
10265
10266        let file = root.join("f.txt");
10267        let mut permissions = fs::metadata(&file)
10268            .expect("test operation should succeed")
10269            .permissions();
10270        permissions.set_mode(permissions.mode() | 0o111);
10271        fs::set_permissions(&file, permissions).expect("test operation should succeed");
10272        let state = worktree_entry_state(
10273            &root,
10274            &git_dir,
10275            ObjectFormat::Sha1,
10276            Path::new("f.txt"),
10277            &entry.oid,
10278            entry.mode,
10279            None,
10280        )
10281        .expect("test operation should succeed");
10282        assert_eq!(state, WorktreeEntryState::Modified);
10283
10284        fs::remove_dir_all(root).expect("test operation should succeed");
10285    }
10286
10287    #[cfg(unix)]
10288    #[test]
10289    fn worktree_entry_state_detects_symlink_target_change() {
10290        use std::os::unix::fs::symlink;
10291
10292        let root = temp_root();
10293        let git_dir = root.join(".git");
10294        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
10295        symlink("one", root.join("link")).expect("test operation should succeed");
10296        build_commit(&root, &git_dir, &["link"]);
10297        let index = read_index(&git_dir);
10298        let entry = index_entry_for(&index, b"link").clone();
10299
10300        fs::remove_file(root.join("link")).expect("test operation should succeed");
10301        symlink("two", root.join("link")).expect("test operation should succeed");
10302        let state = worktree_entry_state(
10303            &root,
10304            &git_dir,
10305            ObjectFormat::Sha1,
10306            Path::new("link"),
10307            &entry.oid,
10308            entry.mode,
10309            None,
10310        )
10311        .expect("test operation should succeed");
10312        assert_eq!(state, WorktreeEntryState::Modified);
10313
10314        fs::remove_dir_all(root).expect("test operation should succeed");
10315    }
10316
10317    #[test]
10318    fn worktree_entry_state_treats_present_unpopulated_gitlink_directory_as_clean() {
10319        let root = temp_root();
10320        let git_dir = root.join(".git");
10321        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
10322        fs::create_dir_all(root.join("submodule")).expect("test operation should succeed");
10323        let oid = ObjectId::from_hex(ObjectFormat::Sha1, &"3".repeat(40))
10324            .expect("test operation should succeed");
10325
10326        let state = worktree_entry_state(
10327            &root,
10328            &git_dir,
10329            ObjectFormat::Sha1,
10330            Path::new("submodule"),
10331            &oid,
10332            0o160000,
10333            None,
10334        )
10335        .expect("test operation should succeed");
10336        assert_eq!(state, WorktreeEntryState::Clean);
10337
10338        fs::remove_dir_all(root).expect("test operation should succeed");
10339    }
10340
10341    #[test]
10342    fn short_status_empty_on_unborn_repository() {
10343        let root = temp_root();
10344        let git_dir = root.join(".git");
10345        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
10346        fs::write(git_dir.join("HEAD"), "ref: refs/heads/main\n")
10347            .expect("test operation should succeed");
10348        let status = short_status(&root, &git_dir, ObjectFormat::Sha1)
10349            .expect("test operation should succeed");
10350        assert!(
10351            status.is_empty(),
10352            "an unborn repository with an empty worktree must be clean, got {status:?}"
10353        );
10354        fs::remove_dir_all(root).expect("test operation should succeed");
10355    }
10356
10357    #[test]
10358    fn untracked_paths_skips_embedded_git_internals() {
10359        let root = temp_root();
10360        let git_dir = root.join(".git");
10361        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
10362        fs::write(git_dir.join("HEAD"), "ref: refs/heads/main\n")
10363            .expect("test operation should succeed");
10364        let nested = root.join("not-a-submodule");
10365        fs::create_dir_all(nested.join(".git")).expect("test operation should succeed");
10366        fs::write(nested.join(".git/HEAD"), "ref: refs/heads/main\n")
10367            .expect("test operation should succeed");
10368        fs::write(nested.join("file.txt"), b"inside\n").expect("test operation should succeed");
10369        let paths = untracked_paths(&root, &git_dir, ObjectFormat::Sha1)
10370            .expect("test operation should succeed");
10371        assert!(
10372            paths.iter().any(|path| path == b"not-a-submodule/"),
10373            "embedded repository directory should be listed, got {paths:?}"
10374        );
10375        assert!(
10376            !paths
10377                .iter()
10378                .any(|path| path.starts_with(b"not-a-submodule/.git")),
10379            "embedded .git internals must not be listed, got {paths:?}"
10380        );
10381        fs::remove_dir_all(root).expect("test operation should succeed");
10382    }
10383
10384    #[cfg(unix)]
10385    #[test]
10386    fn untracked_paths_lists_symlink() {
10387        use std::os::unix::fs::symlink;
10388
10389        let root = temp_root();
10390        let git_dir = root.join(".git");
10391        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
10392        fs::write(git_dir.join("HEAD"), "ref: refs/heads/main\n")
10393            .expect("test operation should succeed");
10394        fs::write(root.join("target.txt"), b"target\n").expect("test operation should succeed");
10395        symlink(root.join("target.txt"), root.join("path1")).expect("create symlink");
10396        let paths = untracked_paths(&root, &git_dir, ObjectFormat::Sha1)
10397            .expect("test operation should succeed");
10398        assert!(
10399            paths.contains(&b"path1".to_vec()),
10400            "untracked symlink must be listed, got {paths:?}"
10401        );
10402        fs::remove_dir_all(root).expect("test operation should succeed");
10403    }
10404}