Skip to main content

sley_worktree/
lib.rs

1#![allow(
2    clippy::collapsible_if,
3    clippy::if_same_then_else,
4    clippy::ptr_arg,
5    clippy::too_many_arguments
6)]
7
8use sley_config::GitConfig;
9use sley_core::{
10    BString, GitError, MissingObjectContext, MissingObjectKind, ObjectFormat, ObjectId, RepoPath,
11    Result,
12};
13use sley_index::{
14    BorrowedIndex, CacheTree, Index, IndexEntry, IndexEntryRef, SPARSE_DIR_MODE, SplitIndexLink,
15    Stage, UntrackedCache, UntrackedCacheDir, UntrackedCacheOidStat, UntrackedCacheStatData,
16};
17use sley_object::{Commit, EncodedObject, ObjectType, Tree, TreeEntry, tree_entry_object_type};
18use sley_odb::{FileObjectDatabase, ObjectPresenceChecker, ObjectReader, ObjectWriter};
19use sley_refs::{FileRefStore, RefTarget, RefUpdate, ReflogEntry, branch_ref_name};
20use std::borrow::Cow;
21use std::cell::{Cell, RefCell};
22use std::cmp::Ordering;
23use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
24use std::io::{Read, Write};
25use std::ops::Range;
26use std::path::{Path, PathBuf};
27use std::process::{Child, ChildStdin, ChildStdout, Command, Stdio};
28use std::sync::{Mutex, OnceLock};
29use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
30use std::{env, fs};
31
32#[derive(Debug, Clone, PartialEq, Eq)]
33pub enum WorktreeStatus {
34    Clean,
35    Modified(RepoPath),
36    Added(RepoPath),
37    Deleted(RepoPath),
38    Untracked(RepoPath),
39}
40
41pub trait WorktreeScanner {
42    fn status(&self) -> Result<Vec<WorktreeStatus>>;
43}
44
45#[derive(Debug, Clone, PartialEq, Eq)]
46pub struct SparseCheckout {
47    pub patterns: Vec<Vec<u8>>,
48    pub sparse_index: bool,
49}
50
51/// Selects how the patterns in a [`SparseCheckout`] are interpreted when
52/// deciding which index paths are "in cone" (kept in the worktree).
53///
54/// * [`SparseCheckoutMode::Full`] interprets the patterns exactly like
55///   `.gitignore` lines (full pattern matching, including `*`, `?`, `**`,
56///   character classes, anchoring with a leading `/`, directory-only `/`
57///   suffixes, and `!` negation). A path is *included* when the last pattern
58///   that matches it is not negated. This mirrors upstream Git's non-cone
59///   `core.sparseCheckout` behaviour.
60/// * [`SparseCheckoutMode::Cone`] interprets the patterns as the restricted
61///   directory-prefix form Git emits for `core.sparseCheckoutCone`: a literal
62///   `/*` (top-level files), the recursive-parent guard `!/*/`, and anchored
63///   directory patterns such as `/dir/` (everything under `dir/`) plus the
64///   parent guards `/dir/*` and `!/dir/*/`. Matching is purely prefix based,
65///   so glob metacharacters are treated literally.
66/// * [`SparseCheckoutMode::Auto`] inspects the patterns and uses cone matching
67///   when every pattern fits the cone grammar above, otherwise full matching.
68#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
69pub enum SparseCheckoutMode {
70    #[default]
71    Auto,
72    Full,
73    Cone,
74}
75
76#[derive(Debug, Clone, PartialEq, Eq)]
77pub struct ApplySparseResult {
78    /// Paths whose worktree file was (re)materialized because they are in cone.
79    pub materialized: Vec<Vec<u8>>,
80    /// Paths that were taken out of the worktree because they are out of cone;
81    /// their index entry now has the skip-worktree bit set.
82    pub skipped: Vec<Vec<u8>>,
83    /// Out-of-cone paths whose worktree file was *not* up to date with the index
84    /// and was therefore left in place (and its skip-worktree bit left clear),
85    /// matching git's data-loss-avoiding behavior. The caller surfaces these as
86    /// git's "The following paths are not up to date …" warning. Sorted by path.
87    pub not_up_to_date: Vec<Vec<u8>>,
88}
89
90#[derive(Debug, Clone, PartialEq, Eq)]
91pub struct UpdateIndexResult {
92    pub entries: usize,
93    pub updated: Vec<ObjectId>,
94}
95
96#[derive(Debug, Clone, PartialEq, Eq)]
97pub enum AddUpdateTrackedAction {
98    Add(Vec<u8>),
99    Remove(Vec<u8>),
100}
101
102#[derive(Debug, Clone, PartialEq, Eq)]
103pub enum AddExactTrackedPathResult {
104    Handled(Option<AddUpdateTrackedAction>),
105    Unsupported,
106}
107
108#[derive(Debug, Clone, PartialEq, Eq)]
109pub struct CacheInfoEntry {
110    pub mode: u32,
111    pub oid: ObjectId,
112    pub path: Vec<u8>,
113    pub stage: u16,
114}
115
116#[derive(Debug, Clone, PartialEq, Eq)]
117pub enum IndexInfoRecord {
118    Add(CacheInfoEntry),
119    Remove { path: Vec<u8> },
120}
121
122/// Batch-wide options for the `git add`-style callers that apply one uniform
123/// mode to every path. The positional `add`/`remove`/`force_remove`/`info_only`/
124/// `chmod` fields describe that uniform mode; `ignore_skip_worktree_entries` is
125/// a genuine whole-invocation toggle (it is not positional in git either).
126///
127/// `git update-index <flag> <path>...` does NOT use this for its per-path mode —
128/// it builds [`UpdateIndexPath`] values directly, each carrying the sticky mode
129/// in effect when that path was parsed. See [`UpdateIndexPath`].
130#[derive(Debug, Clone, Copy, PartialEq, Eq)]
131pub struct UpdateIndexOptions {
132    pub add: bool,
133    pub remove: bool,
134    pub force_remove: bool,
135    pub chmod: Option<bool>,
136    pub info_only: bool,
137    pub ignore_skip_worktree_entries: bool,
138    pub allow_skip_worktree_entries: bool,
139}
140
141impl UpdateIndexOptions {
142    /// The uniform per-path mode this batch applies to every path.
143    fn path_mode(&self) -> UpdateIndexPathMode {
144        UpdateIndexPathMode {
145            add: self.add,
146            remove: self.remove,
147            force_remove: self.force_remove,
148            info_only: self.info_only,
149            chmod: self.chmod,
150        }
151    }
152}
153
154#[derive(Debug, Clone, Copy, PartialEq, Eq)]
155struct LargeObjectPolicy {
156    threshold: u64,
157    compression_level: u32,
158    pack_size_limit: Option<u64>,
159}
160
161impl LargeObjectPolicy {
162    fn from_config(git_dir: &Path, parameters_env: Option<&str>) -> Result<Self> {
163        let config = effective_worktree_config(git_dir, parameters_env)?;
164        let threshold = match config.get("core", None, "bigfilethreshold") {
165            Some(value) => match sley_config::parse_config_int(value) {
166                Some(value) if value >= 0 => value as u64,
167                _ => {
168                    eprintln!(
169                        "fatal: bad numeric config value '{value}' for 'core.bigfilethreshold': invalid unit"
170                    );
171                    return Err(GitError::Exit(128));
172                }
173            },
174            None => 512 * 1024 * 1024,
175        };
176        let compression_level = pack_compression_level(&config);
177        let pack_size_limit = config
178            .get("pack", None, "packSizeLimit")
179            .and_then(sley_config::parse_config_int)
180            .and_then(|value| (value > 0).then_some(value as u64));
181        Ok(Self {
182            threshold,
183            compression_level,
184            pack_size_limit,
185        })
186    }
187}
188
189fn effective_worktree_config(git_dir: &Path, parameters_env: Option<&str>) -> Result<GitConfig> {
190    let common = common_git_dir_for_worktree_config(git_dir);
191    let context = sley_config::ConfigIncludeContext::new(
192        Some(common.clone()),
193        sley_config::repo_current_branch_name(git_dir),
194    );
195    let mut config = sley_config::load_effective_config(&common, &context)?;
196    if let Ok(parameters) = sley_config::injected_config_parameters(parameters_env) {
197        let base = env::current_dir().unwrap_or_else(|_| PathBuf::from("."));
198        sley_config::append_injected_config_sections_with_includes(
199            &mut config,
200            &parameters,
201            &context,
202            &base,
203        )?;
204    }
205    Ok(config)
206}
207
208fn common_git_dir_for_worktree_config(git_dir: &Path) -> PathBuf {
209    if let Ok(value) = fs::read_to_string(git_dir.join("commondir")) {
210        let path = PathBuf::from(value.trim());
211        if path.is_absolute() {
212            return path;
213        }
214        return git_dir.join(path);
215    }
216    git_dir.to_path_buf()
217}
218
219fn pack_compression_level(config: &GitConfig) -> u32 {
220    config_int_in_range(config.get("pack", None, "compression"))
221        .or_else(|| config_int_in_range(config.get("core", None, "compression")))
222        .unwrap_or(6)
223}
224
225fn config_int_in_range(value: Option<&str>) -> Option<u32> {
226    let parsed = sley_config::parse_config_int(value?)?;
227    (0..=9).contains(&parsed).then_some(parsed as u32)
228}
229
230/// A single positional path passed to `update-index`, together with the
231/// *mode* that was active at the point the path was seen on the command line.
232///
233/// git's `update-index` processes argv left-to-right with `parse_options_step`
234/// (`PARSE_OPT_STOP_AT_NON_OPTION`): the mode flags `--add`/`--remove`/
235/// `--force-remove`/`--info-only`/`--chmod` set sticky global state, and each
236/// non-option path is handed to `update_one()` under whatever state is in
237/// effect *at that point*. So `--add foo --force-remove bar` ADDs `foo` and
238/// FORCE-REMOVEs `bar` — the flags are positional, not global. We mirror that
239/// by snapshotting the mode onto each path as it is parsed, rather than
240/// applying one batch-wide `UpdateIndexOptions` to every path.
241///
242/// `--chmod=(+|-)x` is likewise sticky (`--chmod=+x A --chmod=-x B` flips A
243/// executable and B non-executable). Each path reports its action
244/// (`add '<p>'`, `remove '<p>'`, `chmod (+|-)x '<p>'`) inline under `--verbose`,
245/// interleaved in command-line order — which is why the mode must travel with
246/// the path.
247#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
248pub struct UpdateIndexPathMode {
249    pub add: bool,
250    pub remove: bool,
251    pub force_remove: bool,
252    pub info_only: bool,
253    /// `--chmod=+x` → `Some(true)`, `--chmod=-x` → `Some(false)`, else `None`.
254    pub chmod: Option<bool>,
255}
256
257#[derive(Debug, Clone)]
258pub struct UpdateIndexPath {
259    pub path: PathBuf,
260    pub mode: UpdateIndexPathMode,
261}
262
263#[derive(Debug, Clone, PartialEq, Eq, Default)]
264pub struct WriteTreeOptions {
265    pub missing_ok: bool,
266    pub prefix: Option<Vec<u8>>,
267}
268
269#[derive(Debug, Clone, PartialEq, Eq)]
270pub struct ShortStatusEntry {
271    pub index: u8,
272    pub worktree: u8,
273    pub path: Vec<u8>,
274    pub head_mode: Option<u32>,
275    pub index_mode: Option<u32>,
276    pub worktree_mode: Option<u32>,
277    pub head_oid: Option<ObjectId>,
278    pub index_oid: Option<ObjectId>,
279    /// For a tracked gitlink (submodule) path: how the submodule's working
280    /// state differs from the staged gitlink. `None` for ordinary paths.
281    pub submodule: Option<SubmoduleStatus>,
282}
283
284#[derive(Debug, Clone, Copy, PartialEq, Eq)]
285pub struct ShortStatusRow<'a> {
286    pub index: u8,
287    pub worktree: u8,
288    pub path: &'a [u8],
289    pub head_mode: Option<u32>,
290    pub index_mode: Option<u32>,
291    pub worktree_mode: Option<u32>,
292    pub head_oid: Option<ObjectId>,
293    pub index_oid: Option<ObjectId>,
294    /// For a tracked gitlink (submodule) path: how the submodule's working
295    /// state differs from the staged gitlink. `None` for ordinary paths.
296    pub submodule: Option<SubmoduleStatus>,
297}
298
299#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
300pub enum StreamControl {
301    #[default]
302    Continue,
303    Stop,
304}
305
306impl StreamControl {
307    fn is_stop(self) -> bool {
308        matches!(self, Self::Stop)
309    }
310}
311
312/// Submodule-specific change detail for a status entry, mirroring upstream's
313/// `wt_status_change_data` trio: `new_submodule_commits` plus the
314/// `DIRTY_SUBMODULE_MODIFIED`/`DIRTY_SUBMODULE_UNTRACKED` dirty bits.
315#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
316pub struct SubmoduleStatus {
317    /// The submodule's checked-out HEAD differs from the staged gitlink oid.
318    pub new_commits: bool,
319    /// The submodule has staged or unstaged changes to tracked files.
320    pub modified_content: bool,
321    /// The submodule has untracked files.
322    pub untracked_content: bool,
323}
324
325impl SubmoduleStatus {
326    pub fn any(&self) -> bool {
327        self.new_commits || self.modified_content || self.untracked_content
328    }
329}
330
331/// Bit set in a submodule dirt mask when the submodule has staged or unstaged
332/// changes to tracked files (upstream `DIRTY_SUBMODULE_MODIFIED`).
333pub const DIRTY_SUBMODULE_MODIFIED: u8 = 1;
334/// Bit set in a submodule dirt mask when the submodule has untracked files
335/// (upstream `DIRTY_SUBMODULE_UNTRACKED`).
336pub const DIRTY_SUBMODULE_UNTRACKED: u8 = 2;
337
338/// Inspect the working state of the submodule whose worktree is at `sub_root`
339/// and report its dirt mask: [`DIRTY_SUBMODULE_MODIFIED`] for staged/unstaged
340/// changes to tracked files, [`DIRTY_SUBMODULE_UNTRACKED`] for untracked
341/// files. Returns 0 for a clean submodule — and for a directory that is not a
342/// populated repository at all (upstream treats an unpopulated gitlink as
343/// always unchanged). The native equivalent of upstream's
344/// `is_submodule_modified()` (which runs `git status --porcelain=2` inside the
345/// submodule and classifies `?` lines as untracked, everything else as
346/// modified).
347pub fn submodule_dirt(sub_root: &Path) -> u8 {
348    let Some(git_dir) = sley_diff_merge::gitlink_git_dir(sub_root) else {
349        return 0;
350    };
351    let Ok(config) = sley_config::read_repo_config(&git_dir, None) else {
352        return 0;
353    };
354    let Ok(format) = config.repository_object_format() else {
355        return 0;
356    };
357    let mut dirt = 0;
358    let status_result = stream_short_status_with_options(
359        sub_root,
360        &git_dir,
361        format,
362        ShortStatusOptions {
363            include_ignored: false,
364            ignored_mode: StatusIgnoredMode::Traditional,
365            untracked_mode: StatusUntrackedMode::Normal,
366        },
367        |entry| {
368            if let Some(submodule) = entry.submodule {
369                if submodule.new_commits || submodule.modified_content {
370                    dirt |= DIRTY_SUBMODULE_MODIFIED;
371                }
372                if submodule.untracked_content {
373                    dirt |= DIRTY_SUBMODULE_UNTRACKED;
374                }
375            } else if entry.index == b'?' && entry.worktree == b'?' {
376                dirt |= DIRTY_SUBMODULE_UNTRACKED;
377            } else {
378                dirt |= DIRTY_SUBMODULE_MODIFIED;
379            }
380            let complete = DIRTY_SUBMODULE_MODIFIED | DIRTY_SUBMODULE_UNTRACKED;
381            Ok(if dirt == complete {
382                StreamControl::Stop
383            } else {
384                StreamControl::Continue
385            })
386        },
387    );
388    if status_result.is_err() {
389        return 0;
390    }
391    dirt
392}
393
394fn embedded_repo_object_format(sub_root: &Path) -> Option<ObjectFormat> {
395    let git_dir = sley_diff_merge::gitlink_git_dir(sub_root)?;
396    sley_config::read_repo_config(&git_dir, None)
397        .ok()?
398        .repository_object_format()
399        .ok()
400}
401
402#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
403pub enum StatusUntrackedMode {
404    #[default]
405    All,
406    Normal,
407    None,
408}
409
410#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
411pub enum StatusIgnoredMode {
412    #[default]
413    Traditional,
414    Matching,
415}
416
417#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
418pub struct ShortStatusOptions {
419    pub include_ignored: bool,
420    pub ignored_mode: StatusIgnoredMode,
421    pub untracked_mode: StatusUntrackedMode,
422}
423
424/// The worktree state of one tracked path relative to an expected index/tree
425/// entry.
426#[derive(Debug, Clone, Copy, PartialEq, Eq)]
427pub enum WorktreeEntryState {
428    /// The path exists in the worktree and matches the expected mode/object id.
429    Clean,
430    /// The path exists, but its type, mode, filtered content, symlink target, or
431    /// gitlink HEAD differs from the expected entry.
432    Modified,
433    /// The path, or one of its parents, is missing from the worktree.
434    Deleted,
435}
436
437#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
438pub struct AtomicMetadataWriteOptions {
439    pub fsync_file: bool,
440    pub fsync_dir: bool,
441}
442
443#[derive(Debug, Clone, PartialEq, Eq)]
444pub struct AtomicMetadataWriteResult {
445    pub path: PathBuf,
446    pub len: u64,
447    pub mtime: Option<(u64, u64)>,
448}
449
450/// Stage-0 index stat data that can prove a worktree path clean without
451/// re-reading and re-hashing it.
452///
453/// This is the public carrier for sley's racy-git shortcut. Callers that already
454/// parsed `.git/index` can build a probe from the matching [`IndexEntry`] and
455/// the index file's mtime, then pass it to [`worktree_entry_state`] or
456/// [`worktree_entry_state_by_git_path`]. The probe is trusted only when its path,
457/// mode, and object id match the expected entry and the cached stat is not
458/// racily clean; otherwise the helper falls back to the same content hashing
459/// path used by [`stream_short_status_with_options`].
460#[derive(Debug, Clone, PartialEq, Eq)]
461pub struct IndexStatProbe {
462    entry: IndexEntry,
463    index_mtime: Option<(u64, u64)>,
464}
465
466/// Reusable stage-0 index stat probes for many worktree paths.
467///
468/// Prefer this over repeated [`IndexStatProbe::from_repository_index`] calls
469/// when an embedder needs to verify many paths. It parses `.git/index` once,
470/// records the index file mtime used for racy-git checks, and serves cheap
471/// per-path probes from memory.
472#[derive(Debug, Clone, PartialEq, Eq, Default)]
473pub struct IndexStatProbeCache {
474    entries: HashMap<Vec<u8>, IndexEntry>,
475    index_mtime: Option<(u64, u64)>,
476}
477
478impl IndexStatProbe {
479    /// Build a probe from a parsed stage-0 index entry and the index file's mtime
480    /// split as `(seconds, nanoseconds)`.
481    pub fn from_index_entry(entry: IndexEntry, index_mtime: Option<(u64, u64)>) -> Self {
482        Self { entry, index_mtime }
483    }
484
485    /// Build a probe from a parsed index entry and the path of the index file on
486    /// disk, using that file's mtime as the racy-clean reference timestamp.
487    pub fn from_index_entry_and_index_path(
488        entry: IndexEntry,
489        index_path: impl AsRef<Path>,
490    ) -> Self {
491        let index_mtime = fs::metadata(index_path.as_ref())
492            .ok()
493            .and_then(|metadata| file_mtime_parts(&metadata));
494        Self { entry, index_mtime }
495    }
496
497    /// Read this repository's index and return a probe for `git_path` when a
498    /// stage-0 entry exists.
499    ///
500    /// For repeated lookups prefer [`IndexStatProbeCache::from_repository_index`]
501    /// and [`IndexStatProbeCache::probe_for_git_path`]. This one-shot helper
502    /// keeps a small process-local cache for back-to-back calls against an
503    /// unchanged index, but the explicit cache makes ownership and invalidation
504    /// clearer for high-volume embedders.
505    pub fn from_repository_index(
506        git_dir: impl AsRef<Path>,
507        format: ObjectFormat,
508        git_path: &[u8],
509    ) -> Result<Option<Self>> {
510        let index_path = repository_index_path(git_dir);
511        cached_repository_index_stat_probe(&index_path, format, git_path)
512    }
513
514    /// The parsed index entry this probe was built from.
515    pub fn entry(&self) -> &IndexEntry {
516        &self.entry
517    }
518
519    /// The index file mtime used as the racy-clean reference timestamp.
520    pub fn index_mtime(&self) -> Option<(u64, u64)> {
521        self.index_mtime
522    }
523
524    fn stat_cache_for(
525        &self,
526        git_path: &[u8],
527        expected_oid: &ObjectId,
528        expected_mode: u32,
529    ) -> Option<IndexStatCache> {
530        if index_entry_stage(&self.entry) != 0
531            || self.entry.path.as_bytes() != git_path
532            || self.entry.oid != *expected_oid
533            || self.entry.mode != expected_mode
534        {
535            return None;
536        }
537        let mut entries = HashMap::new();
538        entries.insert(git_path.to_vec(), self.entry.clone());
539        Some(IndexStatCache {
540            entries,
541            index_mtime: self.index_mtime,
542        })
543    }
544}
545
546impl IndexStatProbeCache {
547    /// Build a reusable probe cache from an already parsed index and index-file
548    /// mtime.
549    pub fn from_index(index: &Index, index_mtime: Option<(u64, u64)>) -> Self {
550        Self {
551            entries: stage0_index_entries(index),
552            index_mtime,
553        }
554    }
555
556    /// Read this repository's index once and build reusable stat probes.
557    ///
558    /// A missing index returns an empty cache, matching the one-shot helper's
559    /// `Ok(None)` result for every path.
560    pub fn from_repository_index(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Result<Self> {
561        let index_path = repository_index_path(git_dir);
562        read_index_stat_probe_cache(&index_path, format)
563    }
564
565    /// Return a per-path probe for a stage-0 entry, if present.
566    pub fn probe_for_git_path(&self, git_path: &[u8]) -> Option<IndexStatProbe> {
567        self.entries
568            .get(git_path)
569            .cloned()
570            .map(|entry| IndexStatProbe {
571                entry,
572                index_mtime: self.index_mtime,
573            })
574    }
575
576    /// Whether this cache has a stage-0 entry for `git_path`.
577    pub fn contains_git_path(&self, git_path: &[u8]) -> bool {
578        self.entries.contains_key(git_path)
579    }
580
581    /// Number of stage-0 entries in the cache.
582    pub fn len(&self) -> usize {
583        self.entries.len()
584    }
585
586    /// Whether the cache has no stage-0 entries.
587    pub fn is_empty(&self) -> bool {
588        self.entries.is_empty()
589    }
590
591    /// The index file mtime used as the racy-clean reference timestamp.
592    pub fn index_mtime(&self) -> Option<(u64, u64)> {
593        self.index_mtime
594    }
595}
596
597#[derive(Clone)]
598struct CachedRepositoryIndexStatProbes {
599    index_path: PathBuf,
600    format: ObjectFormat,
601    len: u64,
602    mtime: Option<(u64, u64)>,
603    probes: IndexStatProbeCache,
604}
605
606static REPOSITORY_INDEX_STAT_PROBES: OnceLock<Mutex<Option<CachedRepositoryIndexStatProbes>>> =
607    OnceLock::new();
608
609fn cached_repository_index_stat_probe(
610    index_path: &Path,
611    format: ObjectFormat,
612    git_path: &[u8],
613) -> Result<Option<IndexStatProbe>> {
614    let metadata = match fs::metadata(index_path) {
615        Ok(metadata) => metadata,
616        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
617            if let Some(cache) = REPOSITORY_INDEX_STAT_PROBES.get()
618                && let Ok(mut guard) = cache.lock()
619            {
620                *guard = None;
621            }
622            return Ok(None);
623        }
624        Err(err) => return Err(err.into()),
625    };
626    let len = metadata.len();
627    let mtime = file_mtime_parts(&metadata);
628    let cache = REPOSITORY_INDEX_STAT_PROBES.get_or_init(|| Mutex::new(None));
629    if let Ok(guard) = cache.lock()
630        && let Some(cached) = guard.as_ref()
631        && cached.index_path == index_path
632        && cached.format == format
633        && cached.len == len
634        && cached.mtime == mtime
635    {
636        return Ok(cached.probes.probe_for_git_path(git_path));
637    }
638
639    let probes = read_index_stat_probe_cache_with_metadata(index_path, format, mtime)?;
640    let probe = probes.probe_for_git_path(git_path);
641    if let Ok(mut guard) = cache.lock() {
642        *guard = Some(CachedRepositoryIndexStatProbes {
643            index_path: index_path.to_path_buf(),
644            format,
645            len,
646            mtime,
647            probes: probes.clone(),
648        });
649    }
650    Ok(probe)
651}
652
653fn read_index_stat_probe_cache(
654    index_path: &Path,
655    format: ObjectFormat,
656) -> Result<IndexStatProbeCache> {
657    let metadata = match fs::metadata(index_path) {
658        Ok(metadata) => metadata,
659        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
660            return Ok(IndexStatProbeCache::default());
661        }
662        Err(err) => return Err(err.into()),
663    };
664    read_index_stat_probe_cache_with_metadata(index_path, format, file_mtime_parts(&metadata))
665}
666
667fn read_index_stat_probe_cache_with_metadata(
668    index_path: &Path,
669    format: ObjectFormat,
670    index_mtime: Option<(u64, u64)>,
671) -> Result<IndexStatProbeCache> {
672    let bytes = fs::read(index_path)?;
673    let index = Index::parse(&bytes, format)?;
674    Ok(IndexStatProbeCache::from_index(&index, index_mtime))
675}
676
677fn stage0_index_entries(index: &Index) -> HashMap<Vec<u8>, IndexEntry> {
678    let mut entries = HashMap::new();
679    for entry in &index.entries {
680        if index_entry_stage(entry) == 0 {
681            entries.insert(entry.path.as_bytes().to_vec(), entry.clone());
682        }
683    }
684    entries
685}
686
687#[derive(Debug, Clone, PartialEq, Eq)]
688pub struct CheckoutResult {
689    pub branch: String,
690    pub oid: ObjectId,
691    pub files: usize,
692}
693
694#[derive(Debug, Clone, PartialEq, Eq)]
695pub struct RestoreResult {
696    pub restored: usize,
697}
698
699#[derive(Debug, Clone, Copy, PartialEq, Eq)]
700pub enum CheckoutStage {
701    Ours,
702    Theirs,
703}
704
705#[derive(Debug, Clone, Copy, PartialEq, Eq)]
706pub enum CheckoutConflictStyle {
707    Merge,
708    Diff3,
709}
710
711#[derive(Debug, Clone, Copy)]
712pub struct CheckoutIndexPathOptions<'a> {
713    pub force: bool,
714    pub merge: bool,
715    pub stage: Option<CheckoutStage>,
716    pub conflict_style: CheckoutConflictStyle,
717    pub smudge_config: Option<&'a GitConfig>,
718}
719
720#[derive(Debug, Clone, PartialEq, Eq)]
721pub struct RemoveResult {
722    pub removed: Vec<Vec<u8>>,
723}
724
725#[derive(Debug, Clone, PartialEq, Eq)]
726pub struct MoveResult {
727    pub source: Vec<u8>,
728    pub destination: Vec<u8>,
729    pub skipped: bool,
730    pub fatal: Option<String>,
731    pub details: Vec<MoveDetail>,
732}
733
734#[derive(Debug, Clone, PartialEq, Eq)]
735pub struct MoveDetail {
736    pub source: Vec<u8>,
737    pub destination: Vec<u8>,
738    pub skipped: bool,
739}
740
741#[derive(Debug, Clone, PartialEq, Eq)]
742struct GitmodulesMove {
743    source: Vec<u8>,
744    destination: Vec<u8>,
745}
746
747#[derive(Debug, Clone, PartialEq, Eq)]
748struct GitlinkGitdirMove {
749    git_dir: PathBuf,
750    destination_root: PathBuf,
751}
752
753pub fn repository_index_path(git_dir: impl AsRef<Path>) -> PathBuf {
754    env::var_os("GIT_INDEX_FILE")
755        .map(PathBuf::from)
756        .unwrap_or_else(|| git_dir.as_ref().join("index"))
757}
758
759pub fn read_repository_index(
760    git_dir: impl AsRef<Path>,
761    format: ObjectFormat,
762) -> Result<Option<Index>> {
763    let git_dir = git_dir.as_ref();
764    let index_path = repository_index_path(git_dir);
765    if !index_path.exists() {
766        return Ok(None);
767    }
768    Ok(Some(sley_index::read_repository_index(git_dir, format)?))
769}
770
771fn empty_index() -> Index {
772    Index {
773        version: 2,
774        entries: Vec::new(),
775        extensions: Vec::new(),
776        checksum: None,
777    }
778}
779
780/// Resolve the working-tree root for a repository identified by its git
781/// directory, returning `Ok(None)` for a bare repository.
782///
783/// This is the repository-intrinsic worktree resolution (it does *not* consult
784/// `GIT_WORK_TREE`/`GIT_DIR` or CLI overrides — those are the caller's job):
785///
786/// 0. for a linked worktree (a git directory that has both a `commondir` and a
787///    `gitdir` administrative file), the directory containing the worktree's
788///    `.git` link, canonicalised;
789/// 1. otherwise, if `core.bare` is true the repository is bare and `Ok(None)` is
790///    returned immediately — `core.bare` takes precedence for the main repo, so
791///    a bare repo ignores `core.worktree` and the `.git`-parent fallback;
792/// 2. otherwise, a `core.worktree` setting in `<git_dir>/config` (absolute, or
793///    relative to the git directory), canonicalised;
794/// 3. otherwise, when the git directory is a `.git` directory, its parent (the
795///    ordinary non-bare layout) — returned verbatim, not canonicalised;
796/// 4. otherwise the repository is bare and `Ok(None)` is returned.
797///
798/// `Ok(None)` means specifically "bare" (case 0 or case 4). A [`GitError::Io`] is
799/// returned if a path that should exist cannot be canonicalised, and a
800/// [`GitError::InvalidPath`] if a `.git` directory has no parent (a malformed
801/// layout).
802pub fn worktree_root_for_git_dir(git_dir: &Path) -> Result<Option<PathBuf>> {
803    if git_dir.join("commondir").is_file() {
804        let gitdir_file = git_dir.join("gitdir");
805        if gitdir_file.is_file() {
806            let value = fs::read_to_string(&gitdir_file)?;
807            let worktree_git_file = resolve_worktree_admin_path(git_dir, value.trim());
808            if let Some(worktree) = worktree_git_file.parent() {
809                return fs::canonicalize(worktree)
810                    .map(Some)
811                    .map_err(|err| GitError::Io(err.to_string()));
812            }
813        }
814    }
815    if let Ok(config) = sley_config::read_repo_config(git_dir, None) {
816        // A bare repository has no working tree, and `core.bare` takes precedence:
817        // a bare repo ignores `core.worktree`. Check it before any worktree
818        // resolution so a bare `.git`-named directory does not fall through to the
819        // "parent of .git" case below.
820        if config.get_bool("core", None, "bare") == Some(true) {
821            return Ok(None);
822        }
823        if let Some(worktree) = config.get("core", None, "worktree") {
824            let worktree = PathBuf::from(worktree);
825            let worktree = if worktree.is_absolute() {
826                worktree
827            } else {
828                git_dir.join(worktree)
829            };
830            return fs::canonicalize(worktree)
831                .map(Some)
832                .map_err(|err| GitError::Io(err.to_string()));
833        }
834    }
835    if git_dir.file_name().and_then(|name| name.to_str()) != Some(".git") {
836        return Ok(None);
837    }
838    git_dir
839        .parent()
840        .map(Path::to_path_buf)
841        .map(Some)
842        .ok_or_else(|| GitError::InvalidPath("git dir has no parent worktree".into()))
843}
844
845pub fn common_git_dir_for_git_dir(git_dir: &Path) -> Result<PathBuf> {
846    if let Some(common_dir) = env::var_os("GIT_COMMON_DIR") {
847        return Ok(PathBuf::from(common_dir));
848    }
849    let commondir = git_dir.join("commondir");
850    if commondir.is_file() {
851        let value = fs::read_to_string(&commondir)?;
852        let path = PathBuf::from(value.trim());
853        let common = if path.is_absolute() {
854            path
855        } else {
856            git_dir.join(path)
857        };
858        return fs::canonicalize(common).map_err(|err| GitError::Io(err.to_string()));
859    }
860    fs::canonicalize(git_dir).map_err(|err| GitError::Io(err.to_string()))
861}
862
863#[derive(Debug, Clone, PartialEq, Eq)]
864pub struct SharedSymrefWorktree {
865    pub refname: String,
866    pub path: PathBuf,
867}
868
869struct WorktreeAdmin {
870    git_dir: PathBuf,
871    path: Option<PathBuf>,
872}
873
874pub fn find_shared_symref(
875    git_dir: &Path,
876    symref: &str,
877    target: &str,
878) -> Result<Option<SharedSymrefWorktree>> {
879    let common_git_dir = common_git_dir_for_git_dir(git_dir)?;
880    for admin in worktree_admins(&common_git_dir)? {
881        if worktree_uses_symref(&admin.git_dir, symref, target)? {
882            let path = admin
883                .path
884                .unwrap_or_else(|| admin.git_dir.clone())
885                .to_string_lossy()
886                .into_owned();
887            return Ok(Some(SharedSymrefWorktree {
888                refname: target.to_string(),
889                path: PathBuf::from(path),
890            }));
891        }
892    }
893    Ok(None)
894}
895
896pub fn worktree_refs_in_use(git_dir: &Path) -> Result<HashSet<String>> {
897    let common_git_dir = common_git_dir_for_git_dir(git_dir)?;
898    let mut refs = HashSet::new();
899    for admin in worktree_admins(&common_git_dir)? {
900        if let Ok(head) = fs::read_to_string(admin.git_dir.join("HEAD")) {
901            let head = head.trim();
902            if let Some(target) = head.strip_prefix("ref: ") {
903                refs.insert(target.to_string());
904            }
905            refs.extend(worktree_detached_operation_refs(&admin.git_dir));
906        }
907    }
908    Ok(refs)
909}
910
911fn worktree_admins(common_git_dir: &Path) -> Result<Vec<WorktreeAdmin>> {
912    let mut admins = Vec::new();
913    admins.push(WorktreeAdmin {
914        git_dir: common_git_dir.to_path_buf(),
915        path: worktree_root_for_git_dir(common_git_dir)?,
916    });
917    let worktrees_dir = common_git_dir.join("worktrees");
918    let Ok(entries) = fs::read_dir(worktrees_dir) else {
919        return Ok(admins);
920    };
921    for entry in entries {
922        let entry = entry?;
923        let git_dir = entry.path();
924        let path = linked_worktree_path(&git_dir);
925        admins.push(WorktreeAdmin { git_dir, path });
926    }
927    Ok(admins)
928}
929
930fn linked_worktree_path(admin_dir: &Path) -> Option<PathBuf> {
931    let gitdir = fs::read_to_string(admin_dir.join("gitdir")).ok()?;
932    let gitdir = gitdir.trim();
933    if gitdir.is_empty() {
934        return None;
935    }
936    let gitdir_path = resolve_worktree_admin_path(admin_dir, gitdir);
937    gitdir_path.parent().map(|path| {
938        fs::canonicalize(path).unwrap_or_else(|_| normalize_lexical_worktree_path(path))
939    })
940}
941
942fn normalize_lexical_worktree_path(path: &Path) -> PathBuf {
943    let mut out = PathBuf::new();
944    for component in path.components() {
945        match component {
946            std::path::Component::CurDir => {}
947            std::path::Component::ParentDir => {
948                out.pop();
949            }
950            _ => out.push(component.as_os_str()),
951        }
952    }
953    out
954}
955
956fn worktree_uses_symref(git_dir: &Path, symref: &str, target: &str) -> Result<bool> {
957    if symref != "HEAD" {
958        return Ok(false);
959    }
960    let Ok(head) = fs::read_to_string(git_dir.join(symref)) else {
961        return Ok(false);
962    };
963    let head = head.trim();
964    if head.strip_prefix("ref: ") == Some(target) {
965        return Ok(true);
966    }
967    if worktree_rebase_update_refs(git_dir)
968        .iter()
969        .any(|name| name == target)
970    {
971        return Ok(true);
972    }
973    if worktree_detached_operation_uses_ref(git_dir, target) {
974        return Ok(true);
975    }
976    Ok(false)
977}
978
979fn worktree_detached_operation_uses_ref(git_dir: &Path, target: &str) -> bool {
980    worktree_detached_operation_refs(git_dir)
981        .iter()
982        .any(|name| name == target)
983}
984
985fn worktree_detached_operation_refs(git_dir: &Path) -> Vec<String> {
986    let mut refs = Vec::new();
987    for dir in ["rebase-merge", "rebase-apply"] {
988        let Some(refname) = operation_head_name_ref(git_dir.join(dir).join("head-name")) else {
989            continue;
990        };
991        refs.push(refname);
992    }
993    refs.extend(worktree_rebase_update_refs(git_dir));
994    if let Some(refname) = operation_head_name_ref(git_dir.join("BISECT_START")) {
995        refs.push(refname);
996    }
997    refs
998}
999
1000fn worktree_rebase_update_refs(git_dir: &Path) -> Vec<String> {
1001    let Ok(text) = fs::read_to_string(git_dir.join("rebase-merge").join("update-refs")) else {
1002        return Vec::new();
1003    };
1004    text.lines()
1005        .step_by(3)
1006        .filter_map(|line| {
1007            let line = line.trim();
1008            (!line.is_empty()).then(|| line.to_string())
1009        })
1010        .collect()
1011}
1012
1013fn operation_head_name_ref(path: PathBuf) -> Option<String> {
1014    let value = fs::read_to_string(path).ok()?;
1015    let value = value.trim();
1016    if value.is_empty() {
1017        return None;
1018    }
1019    if value.starts_with("refs/heads/") {
1020        Some(value.to_string())
1021    } else {
1022        Some(format!("refs/heads/{value}"))
1023    }
1024}
1025
1026/// Resolve a path read from a git-directory administrative file (e.g. the
1027/// `gitdir` link of a linked worktree): absolute paths are kept as-is, relative
1028/// paths are joined onto the administrative directory.
1029fn resolve_worktree_admin_path(admin_dir: &Path, value: &str) -> PathBuf {
1030    let path = PathBuf::from(value);
1031    if path.is_absolute() {
1032        path
1033    } else {
1034        admin_dir.join(path)
1035    }
1036}
1037
1038/// Whether the repository at `git_dir` is shallow — i.e. it has a `shallow`
1039/// file recording grafted commit boundaries (`git clone --depth`).
1040pub fn is_shallow_repository(git_dir: &Path) -> bool {
1041    git_dir.join("shallow").exists()
1042}
1043
1044#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1045pub struct RemoveOptions {
1046    pub recursive: bool,
1047    pub cached: bool,
1048    pub force: bool,
1049    pub dry_run: bool,
1050    pub ignore_unmatch: bool,
1051}
1052
1053#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1054pub struct MoveOptions {
1055    pub force: bool,
1056    pub dry_run: bool,
1057    pub skip_errors: bool,
1058}
1059
1060impl ShortStatusEntry {
1061    pub fn as_row(&self) -> ShortStatusRow<'_> {
1062        ShortStatusRow {
1063            index: self.index,
1064            worktree: self.worktree,
1065            path: &self.path,
1066            head_mode: self.head_mode,
1067            index_mode: self.index_mode,
1068            worktree_mode: self.worktree_mode,
1069            head_oid: self.head_oid,
1070            index_oid: self.index_oid,
1071            submodule: self.submodule,
1072        }
1073    }
1074
1075    pub fn line(&self) -> String {
1076        format!(
1077            "{}{} {}",
1078            self.index as char,
1079            self.worktree as char,
1080            String::from_utf8_lossy(&self.path)
1081        )
1082    }
1083}
1084
1085impl ShortStatusRow<'_> {
1086    pub fn to_owned_entry(self) -> ShortStatusEntry {
1087        ShortStatusEntry {
1088            index: self.index,
1089            worktree: self.worktree,
1090            path: self.path.to_vec(),
1091            head_mode: self.head_mode,
1092            index_mode: self.index_mode,
1093            worktree_mode: self.worktree_mode,
1094            head_oid: self.head_oid,
1095            index_oid: self.index_oid,
1096            submodule: self.submodule,
1097        }
1098    }
1099
1100    pub fn line(&self) -> String {
1101        format!(
1102            "{}{} {}",
1103            self.index as char,
1104            self.worktree as char,
1105            String::from_utf8_lossy(self.path)
1106        )
1107    }
1108}
1109
1110pub fn add_paths_to_index(
1111    worktree_root: impl AsRef<Path>,
1112    git_dir: impl AsRef<Path>,
1113    format: ObjectFormat,
1114    paths: &[PathBuf],
1115) -> Result<UpdateIndexResult> {
1116    update_index_paths(
1117        worktree_root,
1118        git_dir,
1119        format,
1120        paths,
1121        UpdateIndexOptions {
1122            add: true,
1123            remove: false,
1124            force_remove: false,
1125            chmod: None,
1126            info_only: false,
1127            ignore_skip_worktree_entries: false,
1128            allow_skip_worktree_entries: false,
1129        },
1130    )
1131}
1132
1133pub fn update_index_paths(
1134    worktree_root: impl AsRef<Path>,
1135    git_dir: impl AsRef<Path>,
1136    format: ObjectFormat,
1137    paths: &[PathBuf],
1138    options: UpdateIndexOptions,
1139) -> Result<UpdateIndexResult> {
1140    let git_dir = git_dir.as_ref();
1141    let index = read_repository_index(git_dir, format)?.unwrap_or_else(empty_index);
1142    update_index_paths_with_index(worktree_root, git_dir, format, index, paths, options)
1143}
1144
1145pub fn update_index_paths_with_index(
1146    worktree_root: impl AsRef<Path>,
1147    git_dir: impl AsRef<Path>,
1148    format: ObjectFormat,
1149    index: Index,
1150    paths: &[PathBuf],
1151    options: UpdateIndexOptions,
1152) -> Result<UpdateIndexResult> {
1153    let ordered = ordered_paths_from_plain(paths, options);
1154    update_index_paths_impl(
1155        worktree_root.as_ref(),
1156        git_dir.as_ref(),
1157        format,
1158        index,
1159        &ordered,
1160        options,
1161        None,
1162        false,
1163    )
1164}
1165
1166/// Stamp a single uniform mode (from a batch-wide [`UpdateIndexOptions`]) onto
1167/// every path. Used by the `git add`-style callers that genuinely apply one
1168/// mode to all paths; the positional `git update-index <flag> <path>...` path
1169/// instead snapshots a distinct mode per path in the CLI parse walk.
1170fn ordered_paths_from_plain(
1171    paths: &[PathBuf],
1172    options: UpdateIndexOptions,
1173) -> Vec<UpdateIndexPath> {
1174    let mode = options.path_mode();
1175    paths
1176        .iter()
1177        .map(|path| UpdateIndexPath {
1178            path: path.clone(),
1179            mode,
1180        })
1181        .collect()
1182}
1183
1184/// Stage an ordered list of paths, each carrying its own `--chmod` state, and
1185/// (under `verbose`) print the `add`/`remove`/`chmod` action lines inline in
1186/// command-line order. This is the entry point `git update-index <path>...`
1187/// uses so that `--chmod=+x A --chmod=-x B --verbose` produces the interleaved
1188/// `add 'A'` / `chmod +x 'A'` / `add 'B'` / `chmod -x 'B'` output git emits.
1189pub fn update_index_ordered_paths_filtered(
1190    worktree_root: impl AsRef<Path>,
1191    git_dir: impl AsRef<Path>,
1192    format: ObjectFormat,
1193    paths: &[UpdateIndexPath],
1194    options: UpdateIndexOptions,
1195    config: &GitConfig,
1196    verbose: bool,
1197) -> Result<UpdateIndexResult> {
1198    let git_dir = git_dir.as_ref();
1199    let index = read_repository_index(git_dir, format)?.unwrap_or_else(empty_index);
1200    update_index_ordered_paths_filtered_with_index(
1201        worktree_root,
1202        git_dir,
1203        format,
1204        index,
1205        paths,
1206        options,
1207        config,
1208        verbose,
1209    )
1210}
1211
1212pub fn update_index_ordered_paths_filtered_with_index(
1213    worktree_root: impl AsRef<Path>,
1214    git_dir: impl AsRef<Path>,
1215    format: ObjectFormat,
1216    index: Index,
1217    paths: &[UpdateIndexPath],
1218    options: UpdateIndexOptions,
1219    config: &GitConfig,
1220    verbose: bool,
1221) -> Result<UpdateIndexResult> {
1222    update_index_paths_impl(
1223        worktree_root.as_ref(),
1224        git_dir.as_ref(),
1225        format,
1226        index,
1227        paths,
1228        options,
1229        Some(config),
1230        verbose,
1231    )
1232}
1233
1234/// Like [`add_paths_to_index`], but runs the configured content filters
1235/// (`core.autocrlf`/`text`/`eol` EOL conversion and `filter.<name>.clean`
1236/// drivers) on each file's contents before hashing it into the object store.
1237///
1238/// `config` is the repository config used to resolve the filters; pass the
1239/// parsed `<git_dir>/config` (the orchestrator typically already has this).
1240pub fn add_paths_to_index_filtered(
1241    worktree_root: impl AsRef<Path>,
1242    git_dir: impl AsRef<Path>,
1243    format: ObjectFormat,
1244    paths: &[PathBuf],
1245    config: &GitConfig,
1246) -> Result<UpdateIndexResult> {
1247    update_index_paths_filtered(
1248        worktree_root,
1249        git_dir,
1250        format,
1251        paths,
1252        UpdateIndexOptions {
1253            add: true,
1254            remove: false,
1255            force_remove: false,
1256            chmod: None,
1257            info_only: false,
1258            ignore_skip_worktree_entries: false,
1259            allow_skip_worktree_entries: false,
1260        },
1261        config,
1262    )
1263}
1264
1265/// Like [`update_index_paths`], but applies the clean-side content filters (see
1266/// [`apply_clean_filter`]) to file contents before they are hashed/written.
1267pub fn update_index_paths_filtered(
1268    worktree_root: impl AsRef<Path>,
1269    git_dir: impl AsRef<Path>,
1270    format: ObjectFormat,
1271    paths: &[PathBuf],
1272    options: UpdateIndexOptions,
1273    config: &GitConfig,
1274) -> Result<UpdateIndexResult> {
1275    let git_dir = git_dir.as_ref();
1276    let index = read_repository_index(git_dir, format)?.unwrap_or_else(empty_index);
1277    update_index_paths_filtered_with_index(
1278        worktree_root,
1279        git_dir,
1280        format,
1281        index,
1282        paths,
1283        options,
1284        config,
1285    )
1286}
1287
1288pub fn update_index_paths_filtered_with_index(
1289    worktree_root: impl AsRef<Path>,
1290    git_dir: impl AsRef<Path>,
1291    format: ObjectFormat,
1292    index: Index,
1293    paths: &[PathBuf],
1294    options: UpdateIndexOptions,
1295    config: &GitConfig,
1296) -> Result<UpdateIndexResult> {
1297    let ordered = ordered_paths_from_plain(paths, options);
1298    update_index_paths_impl(
1299        worktree_root.as_ref(),
1300        git_dir.as_ref(),
1301        format,
1302        index,
1303        &ordered,
1304        options,
1305        Some(config),
1306        false,
1307    )
1308}
1309
1310pub fn add_update_all_tracked_filtered(
1311    worktree_root: impl AsRef<Path>,
1312    git_dir: impl AsRef<Path>,
1313    format: ObjectFormat,
1314    clean_config: &GitConfig,
1315) -> Result<Vec<AddUpdateTrackedAction>> {
1316    let worktree_root = worktree_root.as_ref();
1317    let git_dir = git_dir.as_ref();
1318    let index_path = repository_index_path(git_dir);
1319    if !index_path.exists() {
1320        return Ok(Vec::new());
1321    }
1322    let mut index = Index::parse(&fs::read(&index_path)?, format)?;
1323    let index_mtime = fs::metadata(&index_path)
1324        .ok()
1325        .and_then(|metadata| file_mtime_parts(&metadata));
1326    let stat_cache = IndexStatCache::from_index_mtime_only(index_mtime);
1327    let prechecks =
1328        tracked_only_non_clean_prechecks_parallel(worktree_root, &index, &stat_cache, false)?;
1329    if prechecks.is_empty() {
1330        return Ok(Vec::new());
1331    }
1332
1333    let pending = prechecks
1334        .into_iter()
1335        .map(|precheck| match precheck {
1336            TrackedOnlyPrecheck::Deleted(idx) => {
1337                (precheck, index.entries[idx].path.as_bytes().to_vec())
1338            }
1339            TrackedOnlyPrecheck::Slow(idx) => {
1340                (precheck, index.entries[idx].path.as_bytes().to_vec())
1341            }
1342        })
1343        .collect::<Vec<_>>();
1344    let odb = FileObjectDatabase::from_git_dir(git_dir, format);
1345    let mut actions = Vec::new();
1346    let mut index_dirty = false;
1347    let mut clean_filter = None;
1348    let trust_filemode = trust_executable_bit(clean_config);
1349    for (precheck, path) in pending {
1350        match precheck {
1351            TrackedOnlyPrecheck::Deleted(_) => {
1352                if remove_index_entries_with_path(&mut index.entries, &path) {
1353                    actions.push(AddUpdateTrackedAction::Remove(path));
1354                    index_dirty = true;
1355                }
1356            }
1357            TrackedOnlyPrecheck::Slow(_) => {
1358                let (action, dirty) = add_update_tracked_path(
1359                    worktree_root,
1360                    git_dir,
1361                    format,
1362                    Some(clean_config),
1363                    trust_filemode,
1364                    &odb,
1365                    &stat_cache,
1366                    &mut clean_filter,
1367                    &mut index,
1368                    &path,
1369                )?;
1370                index_dirty |= dirty;
1371                if let Some(action) = action {
1372                    actions.push(action);
1373                }
1374            }
1375        }
1376    }
1377
1378    if index_dirty {
1379        normalize_index_version_for_extended_flags(&mut index);
1380        index.extensions = index_extensions_without_cache_tree(&index.extensions);
1381        write_repository_index_ref(git_dir, format, &index)?;
1382    }
1383    Ok(actions)
1384}
1385
1386pub fn add_exact_tracked_path_from_disk(
1387    worktree_root: impl AsRef<Path>,
1388    git_dir: impl AsRef<Path>,
1389    format: ObjectFormat,
1390    git_path: &[u8],
1391    ignore_removal: bool,
1392    config_parameters_env: Option<&str>,
1393) -> Result<AddExactTrackedPathResult> {
1394    let worktree_root = worktree_root.as_ref();
1395    let git_dir = git_dir.as_ref();
1396    let index_path = repository_index_path(git_dir);
1397    let index_metadata = match fs::metadata(&index_path) {
1398        Ok(metadata) => metadata,
1399        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
1400            return Ok(AddExactTrackedPathResult::Unsupported);
1401        }
1402        Err(err) => return Err(err.into()),
1403    };
1404    let mut index_bytes = fs::read(&index_path)?;
1405    let Some(raw) = raw_exact_index_entry(&index_bytes, format, git_path)? else {
1406        return Ok(AddExactTrackedPathResult::Unsupported);
1407    };
1408    if !raw_exact_entry_can_patch(&raw, git_path) {
1409        return Ok(AddExactTrackedPathResult::Unsupported);
1410    }
1411    if !raw_index_extensions_are_filterable(&index_bytes, raw.entries_end, raw.checksum_offset) {
1412        return Ok(AddExactTrackedPathResult::Unsupported);
1413    }
1414
1415    let entry = raw.entry.clone();
1416    if entry.stage() != Stage::Normal
1417        || index_entry_skip_worktree(&entry)
1418        || sley_index::is_gitlink(entry.mode)
1419    {
1420        return Ok(AddExactTrackedPathResult::Unsupported);
1421    }
1422    let absolute = worktree_root.join(repo_path_to_os_path(git_path)?);
1423    let metadata = match fs::symlink_metadata(&absolute) {
1424        Ok(metadata) => metadata,
1425        Err(err)
1426            if matches!(
1427                err.kind(),
1428                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
1429            ) =>
1430        {
1431            return Ok(if ignore_removal {
1432                AddExactTrackedPathResult::Handled(None)
1433            } else {
1434                AddExactTrackedPathResult::Unsupported
1435            });
1436        }
1437        Err(err) => return Err(err.into()),
1438    };
1439    let file_type = metadata.file_type();
1440    if metadata.is_dir() || !(file_type.is_file() || file_type.is_symlink()) {
1441        return Ok(AddExactTrackedPathResult::Unsupported);
1442    }
1443    let index_mtime = file_mtime_parts(&index_metadata);
1444    let stat_cache = IndexStatCache::from_index_mtime_only(index_mtime);
1445    if stat_cache.reuse_index_entry(&entry, &metadata).is_some() {
1446        return Ok(AddExactTrackedPathResult::Handled(None));
1447    }
1448
1449    let odb = FileObjectDatabase::from_git_dir(git_dir, format);
1450    let is_symlink = file_type.is_symlink();
1451    let body = if is_symlink {
1452        symlink_target_bytes(&absolute)?
1453    } else {
1454        let body = fs::read(&absolute)?;
1455        // Resolve the effective config WITH command-line `-c` / `--config-env`
1456        // overrides folded in (e.g. upstream t0027's `git -c core.autocrlf=true
1457        // add`); the plain repo-config reader would drop them and the fast path
1458        // would convert/warn against the wrong EOL policy.
1459        let config =
1460            sley_config::read_repo_config(git_dir, config_parameters_env).unwrap_or_default();
1461        let mut clean_filter = None;
1462        let clean_filter =
1463            tracked_only_clean_filter_with_config(&mut clean_filter, worktree_root, &config);
1464        clean_filter.read_attributes_for_path(worktree_root, git_path)?;
1465        let checks =
1466            clean_filter
1467                .matcher
1468                .attributes_for_path(git_path, &clean_filter.requested, false);
1469        // git's index update folds in `global_conv_flags_eol`, so `git add`
1470        // emits the `core.safecrlf` round-trip warning (default: warn). The
1471        // current index blob (`entry.oid`) drives the auto-crlf
1472        // `has_crlf_in_index` decision. Mirror the slow `add_update_tracked_path`
1473        // path here so the exact-patch fast path does not silently drop the
1474        // warning (upstream t0020 'safecrlf: print warning only once').
1475        let conv_flags = ConvFlags::from_config(&clean_filter.config);
1476        let index_blob = match conv_flags {
1477            ConvFlags::Off => SafeCrlfIndexBlob::None,
1478            _ => SafeCrlfIndexBlob::Lookup {
1479                odb: &odb,
1480                oid: entry.oid,
1481            },
1482        };
1483        apply_clean_filter_with_attributes_cow_safecrlf(
1484            &clean_filter.config,
1485            &checks,
1486            git_path,
1487            &body,
1488            conv_flags,
1489            index_blob,
1490        )?
1491        .into_owned()
1492    };
1493    let object = EncodedObject::new(ObjectType::Blob, body);
1494    let oid = object.object_id(format)?;
1495    if oid != entry.oid || entry.is_intent_to_add() {
1496        odb.write_object(object)?;
1497    }
1498
1499    let config = sley_config::read_repo_config(git_dir, config_parameters_env).unwrap_or_default();
1500    let trust_filemode = trust_executable_bit(&config);
1501    let mut updated_entry =
1502        index_entry_from_metadata_with_filemode(entry.path.clone(), oid, &metadata, trust_filemode);
1503    if is_symlink {
1504        updated_entry.mode = 0o120000;
1505    }
1506    if updated_entry == entry {
1507        return Ok(AddExactTrackedPathResult::Handled(None));
1508    }
1509    if !raw_updated_entry_can_patch(&entry, &updated_entry, git_path) {
1510        return Ok(AddExactTrackedPathResult::Unsupported);
1511    }
1512    patch_raw_index_entry(&mut index_bytes, format, &raw, &updated_entry)?;
1513    fs::write(index_path, index_bytes)?;
1514    let changed = updated_entry.oid != entry.oid || updated_entry.mode != entry.mode;
1515    Ok(AddExactTrackedPathResult::Handled(
1516        changed.then(|| AddUpdateTrackedAction::Add(git_path.to_vec())),
1517    ))
1518}
1519
1520pub fn add_exact_tracked_path_with_index(
1521    worktree_root: impl AsRef<Path>,
1522    git_dir: impl AsRef<Path>,
1523    format: ObjectFormat,
1524    mut index: Index,
1525    git_path: &[u8],
1526) -> Result<Option<AddUpdateTrackedAction>> {
1527    let worktree_root = worktree_root.as_ref();
1528    let git_dir = git_dir.as_ref();
1529    let range = index_entries_path_range(&index.entries, git_path);
1530    if range.len() != 1 {
1531        return Ok(None);
1532    }
1533    let entry = &index.entries[range.start];
1534    if entry.stage() != Stage::Normal || index_entry_skip_worktree(entry) {
1535        return Ok(None);
1536    }
1537    let index_path = repository_index_path(git_dir);
1538    let index_mtime = fs::metadata(&index_path)
1539        .ok()
1540        .and_then(|metadata| file_mtime_parts(&metadata));
1541    let stat_cache = IndexStatCache::from_index_mtime_only(index_mtime);
1542    let odb = FileObjectDatabase::from_git_dir(git_dir, format);
1543    let trust_filemode = trust_executable_bit_from_git_dir(git_dir, None);
1544    let mut clean_filter = None;
1545    let (action, dirty) = add_update_tracked_path(
1546        worktree_root,
1547        git_dir,
1548        format,
1549        None,
1550        trust_filemode,
1551        &odb,
1552        &stat_cache,
1553        &mut clean_filter,
1554        &mut index,
1555        git_path,
1556    )?;
1557    if dirty {
1558        normalize_index_version_for_extended_flags(&mut index);
1559        index.extensions = index_extensions_without_cache_tree(&index.extensions);
1560        write_repository_index_ref(git_dir, format, &index)?;
1561    }
1562    Ok(action)
1563}
1564
1565struct RawExactIndexEntry {
1566    version: u32,
1567    entry: IndexEntry,
1568    entry_start: usize,
1569    entries_end: usize,
1570    checksum_offset: usize,
1571}
1572
1573fn raw_exact_index_entry(
1574    bytes: &[u8],
1575    format: ObjectFormat,
1576    git_path: &[u8],
1577) -> Result<Option<RawExactIndexEntry>> {
1578    let hash_len = format.raw_len();
1579    if bytes.len() < 12 + hash_len {
1580        return Err(GitError::InvalidFormat("index header too short".into()));
1581    }
1582    let checksum_offset = bytes.len() - hash_len;
1583    let actual_checksum = sley_core::digest_bytes(format, &bytes[..checksum_offset])?;
1584    let expected_checksum = ObjectId::from_raw(format, &bytes[checksum_offset..])?;
1585    if actual_checksum != expected_checksum {
1586        return Err(GitError::InvalidFormat(format!(
1587            "index checksum mismatch: expected {expected_checksum}, got {actual_checksum}"
1588        )));
1589    }
1590    if &bytes[..4] != b"DIRC" {
1591        return Err(GitError::InvalidFormat("missing DIRC signature".into()));
1592    }
1593    let version = u32_from_be(&bytes[4..8]);
1594    if !(2..=3).contains(&version) {
1595        return Ok(None);
1596    }
1597    let count = u32_from_be(&bytes[8..12]) as usize;
1598    let mut offset = 12;
1599    let mut found = None;
1600    for _ in 0..count {
1601        let entry_header_len = 40 + hash_len + 2;
1602        if checksum_offset.saturating_sub(offset) < entry_header_len {
1603            return Err(GitError::InvalidFormat("truncated index entry".into()));
1604        }
1605        let start = offset;
1606        let oid_start = offset + 40;
1607        let oid_end = oid_start + hash_len;
1608        let flags = u16_from_be(&bytes[oid_end..oid_end + 2]);
1609        offset = oid_end + 2;
1610        let flags_extended = if flags & INDEX_FLAG_EXTENDED != 0 {
1611            if checksum_offset.saturating_sub(offset) < 2 {
1612                return Err(GitError::InvalidFormat(
1613                    "truncated index extended flags".into(),
1614                ));
1615            }
1616            let flags_extended = u16_from_be(&bytes[offset..offset + 2]);
1617            offset += 2;
1618            flags_extended
1619        } else {
1620            0
1621        };
1622        let path_start = offset;
1623        while bytes.get(offset).copied() != Some(0) {
1624            offset += 1;
1625            if offset >= checksum_offset {
1626                return Err(GitError::InvalidFormat("unterminated index path".into()));
1627            }
1628        }
1629        let path = &bytes[path_start..offset];
1630        offset += 1;
1631        while (offset - start) % 8 != 0 {
1632            offset += 1;
1633            if offset > checksum_offset {
1634                return Err(GitError::InvalidFormat("truncated index padding".into()));
1635            }
1636        }
1637        if path == git_path {
1638            if found.is_some() {
1639                return Ok(None);
1640            }
1641            let oid = ObjectId::from_raw(format, &bytes[oid_start..oid_end])?;
1642            found = Some(RawExactIndexEntry {
1643                version,
1644                entry: IndexEntry {
1645                    ctime_seconds: u32_from_be(&bytes[start..start + 4]),
1646                    ctime_nanoseconds: u32_from_be(&bytes[start + 4..start + 8]),
1647                    mtime_seconds: u32_from_be(&bytes[start + 8..start + 12]),
1648                    mtime_nanoseconds: u32_from_be(&bytes[start + 12..start + 16]),
1649                    dev: u32_from_be(&bytes[start + 16..start + 20]),
1650                    ino: u32_from_be(&bytes[start + 20..start + 24]),
1651                    mode: u32_from_be(&bytes[start + 24..start + 28]),
1652                    uid: u32_from_be(&bytes[start + 28..start + 32]),
1653                    gid: u32_from_be(&bytes[start + 32..start + 36]),
1654                    size: u32_from_be(&bytes[start + 36..start + 40]),
1655                    oid,
1656                    flags,
1657                    flags_extended,
1658                    path: BString::from(path),
1659                },
1660                entry_start: start,
1661                entries_end: 0,
1662                checksum_offset,
1663            });
1664        } else if found.is_none() && path > git_path {
1665            return Ok(None);
1666        }
1667    }
1668    if let Some(mut found) = found {
1669        found.entries_end = offset;
1670        Ok(Some(found))
1671    } else {
1672        Ok(None)
1673    }
1674}
1675
1676fn raw_exact_entry_can_patch(raw: &RawExactIndexEntry, git_path: &[u8]) -> bool {
1677    raw.version == 2
1678        && raw.entry.flags_extended == 0
1679        && raw.entry.flags & INDEX_FLAG_EXTENDED == 0
1680        && raw.entry.flags == index_flags(git_path.len(), 0)
1681        && raw.entry.path.as_bytes() == git_path
1682}
1683
1684fn raw_updated_entry_can_patch(
1685    previous: &IndexEntry,
1686    updated: &IndexEntry,
1687    git_path: &[u8],
1688) -> bool {
1689    updated.path.as_bytes() == git_path
1690        && updated.flags_extended == 0
1691        && updated.flags & INDEX_FLAG_EXTENDED == 0
1692        && updated.flags == previous.flags
1693}
1694
1695fn raw_index_extensions_are_filterable(
1696    bytes: &[u8],
1697    entries_end: usize,
1698    checksum_offset: usize,
1699) -> bool {
1700    let mut offset = entries_end;
1701    while offset < checksum_offset {
1702        if checksum_offset.saturating_sub(offset) < 8 {
1703            return false;
1704        }
1705        let size = u32_from_be(&bytes[offset + 4..offset + 8]) as usize;
1706        let Some(end) = offset
1707            .checked_add(8)
1708            .and_then(|offset| offset.checked_add(size))
1709        else {
1710            return false;
1711        };
1712        if end > checksum_offset {
1713            return false;
1714        }
1715        offset = end;
1716    }
1717    true
1718}
1719
1720fn patch_raw_index_entry(
1721    bytes: &mut Vec<u8>,
1722    format: ObjectFormat,
1723    raw: &RawExactIndexEntry,
1724    entry: &IndexEntry,
1725) -> Result<()> {
1726    let hash_len = format.raw_len();
1727    let start = raw.entry_start;
1728    bytes[start..start + 4].copy_from_slice(&entry.ctime_seconds.to_be_bytes());
1729    bytes[start + 4..start + 8].copy_from_slice(&entry.ctime_nanoseconds.to_be_bytes());
1730    bytes[start + 8..start + 12].copy_from_slice(&entry.mtime_seconds.to_be_bytes());
1731    bytes[start + 12..start + 16].copy_from_slice(&entry.mtime_nanoseconds.to_be_bytes());
1732    bytes[start + 16..start + 20].copy_from_slice(&entry.dev.to_be_bytes());
1733    bytes[start + 20..start + 24].copy_from_slice(&entry.ino.to_be_bytes());
1734    bytes[start + 24..start + 28].copy_from_slice(&entry.mode.to_be_bytes());
1735    bytes[start + 28..start + 32].copy_from_slice(&entry.uid.to_be_bytes());
1736    bytes[start + 32..start + 36].copy_from_slice(&entry.gid.to_be_bytes());
1737    bytes[start + 36..start + 40].copy_from_slice(&entry.size.to_be_bytes());
1738    bytes[start + 40..start + 40 + hash_len].copy_from_slice(entry.oid.as_bytes());
1739    bytes[start + 40 + hash_len..start + 40 + hash_len + 2]
1740        .copy_from_slice(&entry.flags.to_be_bytes());
1741
1742    let mut extension_offset = raw.entries_end;
1743    let mut removed_cache_tree = false;
1744    let mut rewritten = Vec::new();
1745    while extension_offset < raw.checksum_offset {
1746        let signature = &bytes[extension_offset..extension_offset + 4];
1747        let size = u32_from_be(&bytes[extension_offset + 4..extension_offset + 8]) as usize;
1748        let end = extension_offset + 8 + size;
1749        if signature == b"TREE" {
1750            removed_cache_tree = true;
1751        } else {
1752            rewritten.extend_from_slice(&bytes[extension_offset..end]);
1753        }
1754        extension_offset = end;
1755    }
1756
1757    if removed_cache_tree {
1758        bytes.truncate(raw.entries_end);
1759        bytes.extend_from_slice(&rewritten);
1760        let checksum = sley_core::digest_bytes(format, bytes)?;
1761        bytes.extend_from_slice(checksum.as_bytes());
1762    } else {
1763        let checksum = sley_core::digest_bytes(format, &bytes[..raw.checksum_offset])?;
1764        bytes[raw.checksum_offset..raw.checksum_offset + hash_len]
1765            .copy_from_slice(checksum.as_bytes());
1766    }
1767    Ok(())
1768}
1769
1770fn u32_from_be(bytes: &[u8]) -> u32 {
1771    u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]])
1772}
1773
1774fn u16_from_be(bytes: &[u8]) -> u16 {
1775    u16::from_be_bytes([bytes[0], bytes[1]])
1776}
1777
1778fn add_update_tracked_path(
1779    worktree_root: &Path,
1780    git_dir: &Path,
1781    format: ObjectFormat,
1782    clean_config: Option<&GitConfig>,
1783    trust_filemode: bool,
1784    odb: &FileObjectDatabase,
1785    stat_cache: &IndexStatCache,
1786    clean_filter: &mut Option<TrackedOnlyCleanFilter>,
1787    index: &mut Index,
1788    git_path: &[u8],
1789) -> Result<(Option<AddUpdateTrackedAction>, bool)> {
1790    let range = index_entries_path_range(&index.entries, git_path);
1791    if range.is_empty() {
1792        return Ok((None, false));
1793    }
1794    let entry = index.entries[range.start].clone();
1795    if entry.stage() != Stage::Normal {
1796        return Ok((None, false));
1797    }
1798    let absolute = worktree_root.join(repo_path_to_os_path(git_path)?);
1799    let metadata = match fs::symlink_metadata(&absolute) {
1800        Ok(metadata) => metadata,
1801        Err(err)
1802            if matches!(
1803                err.kind(),
1804                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
1805            ) =>
1806        {
1807            if remove_index_entries_with_path(&mut index.entries, git_path) {
1808                return Ok((
1809                    Some(AddUpdateTrackedAction::Remove(git_path.to_vec())),
1810                    true,
1811                ));
1812            }
1813            return Ok((None, false));
1814        }
1815        Err(err) => return Err(err.into()),
1816    };
1817    if metadata.is_dir() {
1818        if !sley_index::is_gitlink(entry.mode) {
1819            return Ok((None, false));
1820        }
1821        let oid = sley_diff_merge::gitlink_head_oid(&absolute, format).unwrap_or(entry.oid);
1822        let mut updated_entry = index_entry_from_metadata_with_filemode(
1823            entry.path.clone(),
1824            oid,
1825            &metadata,
1826            trust_filemode,
1827        );
1828        updated_entry.mode = sley_index::GITLINK_MODE;
1829        let changed = updated_entry.oid != entry.oid || updated_entry.mode != entry.mode;
1830        if updated_entry != entry {
1831            replace_index_entries_with_entry(&mut index.entries, updated_entry);
1832            return Ok((
1833                changed.then(|| AddUpdateTrackedAction::Add(git_path.to_vec())),
1834                true,
1835            ));
1836        }
1837        return Ok((None, false));
1838    }
1839    if !(metadata.is_file() || metadata.file_type().is_symlink()) {
1840        return Ok((None, false));
1841    }
1842    if stat_cache.reuse_index_entry(&entry, &metadata).is_some() {
1843        return Ok((None, false));
1844    }
1845
1846    let is_symlink = metadata.file_type().is_symlink();
1847    let body = if is_symlink {
1848        symlink_target_bytes(&absolute)?
1849    } else {
1850        let body = fs::read(&absolute)?;
1851        let clean_filter = match clean_config {
1852            Some(config) => {
1853                tracked_only_clean_filter_with_config(clean_filter, worktree_root, config)
1854            }
1855            None => tracked_only_clean_filter(clean_filter, worktree_root, git_dir),
1856        };
1857        clean_filter.read_attributes_for_path(worktree_root, git_path)?;
1858        let checks =
1859            clean_filter
1860                .matcher
1861                .attributes_for_path(git_path, &clean_filter.requested, false);
1862        // git's `add -u` index update folds in `global_conv_flags_eol`, so emit
1863        // the `core.safecrlf` round-trip warning (default: warn). The current
1864        // index blob (`entry.oid`) drives the auto-crlf `has_crlf_in_index`
1865        // decision.
1866        let conv_flags = ConvFlags::from_config(&clean_filter.config);
1867        let index_blob = match conv_flags {
1868            ConvFlags::Off => SafeCrlfIndexBlob::None,
1869            _ => SafeCrlfIndexBlob::Lookup {
1870                odb,
1871                oid: entry.oid,
1872            },
1873        };
1874        apply_clean_filter_with_attributes_cow_safecrlf(
1875            &clean_filter.config,
1876            &checks,
1877            git_path,
1878            &body,
1879            conv_flags,
1880            index_blob,
1881        )?
1882        .into_owned()
1883    };
1884    let object = EncodedObject::new(ObjectType::Blob, body);
1885    let oid = object.object_id(format)?;
1886    if oid != entry.oid || entry.is_intent_to_add() {
1887        odb.write_object(object)?;
1888    }
1889    let mut updated_entry =
1890        index_entry_from_metadata_with_filemode(entry.path.clone(), oid, &metadata, trust_filemode);
1891    if is_symlink {
1892        updated_entry.mode = 0o120000;
1893    }
1894    let changed = updated_entry.oid != entry.oid || updated_entry.mode != entry.mode;
1895    if updated_entry != entry {
1896        replace_index_entries_with_entry(&mut index.entries, updated_entry);
1897        return Ok((
1898            changed.then(|| AddUpdateTrackedAction::Add(git_path.to_vec())),
1899            true,
1900        ));
1901    }
1902    Ok((None, false))
1903}
1904
1905enum UpdateIndexCleanFilter {
1906    Full(AttributeMatcher),
1907    PathLocal,
1908}
1909
1910fn index_entries_path_range(entries: &[IndexEntry], path: &[u8]) -> std::ops::Range<usize> {
1911    let mut start = match entries.binary_search_by(|entry| entry.path.as_bytes().cmp(path)) {
1912        Ok(index) => index,
1913        Err(insert) => return insert..insert,
1914    };
1915    while start > 0 && entries[start - 1].path.as_bytes() == path {
1916        start -= 1;
1917    }
1918    let mut end = start;
1919    while end < entries.len() && entries[end].path.as_bytes() == path {
1920        end += 1;
1921    }
1922    start..end
1923}
1924
1925fn remove_index_entries_with_path(entries: &mut Vec<IndexEntry>, path: &[u8]) -> bool {
1926    let range = index_entries_path_range(entries, path);
1927    if range.is_empty() {
1928        return false;
1929    }
1930    entries.drain(range);
1931    true
1932}
1933
1934/// Remove every index entry whose path lives *under* `name/` (a strict
1935/// directory-prefix collision). Mirrors git's `has_file_name`
1936/// (read-cache.c): when a *file* entry `a/b` is being added, any entry
1937/// `a/b/...` already in the index would produce a tree that records `a/b`
1938/// both as a blob and as a tree — `write-tree` would emit a malformed tree.
1939/// Entries are sorted by path, so the conflicting children form a contiguous
1940/// run immediately after `name`'s insertion point.
1941fn remove_index_entries_under_dir(entries: &mut Vec<IndexEntry>, name: &[u8]) {
1942    let start = match entries.binary_search_by(|entry| entry.path.as_bytes().cmp(name)) {
1943        Ok(found) => found + 1,
1944        Err(insert) => insert,
1945    };
1946    let mut end = start;
1947    while end < entries.len() {
1948        let candidate = entries[end].path.as_bytes();
1949        // `candidate` is under `name/` iff it is strictly longer, shares the
1950        // `name` prefix, and the next byte is the path separator.
1951        if candidate.len() > name.len()
1952            && candidate[name.len()] == b'/'
1953            && candidate[..name.len()] == *name
1954        {
1955            end += 1;
1956        } else {
1957            break;
1958        }
1959    }
1960    if end > start {
1961        entries.drain(start..end);
1962    }
1963}
1964
1965/// Remove any *file* entry that is a strict directory-prefix of `name` (e.g.
1966/// when adding `a/b/c`, drop a file entry `a/b` or `a`). Mirrors git's
1967/// `has_dir_name` (read-cache.c): such an entry would make the resulting tree
1968/// record the prefix both as a blob and as the directory containing `name`.
1969/// We walk every parent directory of `name`, longest first; the moment a
1970/// real subdirectory already exists at a prefix, no shorter prefix can
1971/// conflict, so we stop early (git's "already matches the sub-directory"
1972/// trivial optimization).
1973fn remove_index_dir_name_conflicts(entries: &mut Vec<IndexEntry>, name: &[u8]) {
1974    let mut slash = name.len();
1975    // Walk back over each '/' (longest parent dir first) until the path has no
1976    // more components.
1977    while let Some(pos) = name[..slash].iter().rposition(|&byte| byte == b'/') {
1978        slash = pos;
1979        let prefix = &name[..slash];
1980        match entries.binary_search_by(|entry| entry.path.as_bytes().cmp(prefix)) {
1981            Ok(found) => {
1982                // A file entry sits exactly at this directory prefix — drop it.
1983                entries.remove(found);
1984            }
1985            Err(insert) => {
1986                // No file at `prefix`. If a child `prefix/...` already exists,
1987                // the directory is established and nothing at this prefix (or
1988                // any shorter one) can conflict; stop.
1989                if insert < entries.len() {
1990                    let candidate = entries[insert].path.as_bytes();
1991                    if candidate.len() > prefix.len()
1992                        && candidate[prefix.len()] == b'/'
1993                        && candidate[..prefix.len()] == *prefix
1994                    {
1995                        break;
1996                    }
1997                }
1998            }
1999        }
2000    }
2001}
2002
2003fn replace_index_entries_with_entry(entries: &mut Vec<IndexEntry>, entry: IndexEntry) {
2004    let path = entry.path.as_bytes().to_vec();
2005    // Enforce directory/file replacement *before* computing the insert
2006    // position: git's `add_index_entry_with_check` removes the conflicting
2007    // entries, then recomputes where the new entry lands. Adding the entry
2008    // as a file drops any `path/...` children; adding it drops any file that
2009    // is a directory-prefix of `path`. Skipping this leaves a D/F-corrupt
2010    // index that `write-tree` turns into a malformed tree.
2011    remove_index_entries_under_dir(entries, &path);
2012    remove_index_dir_name_conflicts(entries, &path);
2013    let range = index_entries_path_range(entries, &path);
2014    if range.is_empty() {
2015        entries.insert(range.start, entry);
2016    } else {
2017        entries.splice(range, [entry]);
2018    }
2019}
2020
2021fn write_index_blob_object(
2022    odb: &FileObjectDatabase,
2023    format: ObjectFormat,
2024    object: EncodedObject,
2025    large_policy: LargeObjectPolicy,
2026    pending_large: &mut Vec<(ObjectId, EncodedObject)>,
2027) -> Result<ObjectId> {
2028    let oid = object.object_id(format)?;
2029    if object.object_type == ObjectType::Blob && object.body.len() as u64 >= large_policy.threshold
2030    {
2031        if !odb.contains(&oid)? {
2032            pending_large.push((oid, object));
2033        }
2034        return Ok(oid);
2035    }
2036    odb.write_object(object)
2037}
2038
2039fn write_pending_large_blobs(
2040    odb: &FileObjectDatabase,
2041    objects: &[(ObjectId, EncodedObject)],
2042    policy: LargeObjectPolicy,
2043) -> Result<()> {
2044    let Some(limit) = policy.pack_size_limit else {
2045        return odb.write_blobs_as_pack(objects, policy.compression_level);
2046    };
2047    let mut start = 0usize;
2048    let mut current_size = 0u64;
2049    for (idx, (_, object)) in objects.iter().enumerate() {
2050        let estimate = object.body.len() as u64 + 32;
2051        if idx > start && current_size.saturating_add(estimate) > limit {
2052            odb.write_blobs_as_pack(&objects[start..idx], policy.compression_level)?;
2053            start = idx;
2054            current_size = 0;
2055        }
2056        current_size = current_size.saturating_add(estimate);
2057    }
2058    if start < objects.len() {
2059        odb.write_blobs_as_pack(&objects[start..], policy.compression_level)?;
2060    }
2061    Ok(())
2062}
2063
2064fn update_index_paths_impl(
2065    worktree_root: &Path,
2066    git_dir: &Path,
2067    format: ObjectFormat,
2068    mut index: Index,
2069    paths: &[UpdateIndexPath],
2070    options: UpdateIndexOptions,
2071    clean_config: Option<&GitConfig>,
2072    verbose: bool,
2073) -> Result<UpdateIndexResult> {
2074    let odb = FileObjectDatabase::from_git_dir(git_dir, format);
2075    let mut large_policy = LargeObjectPolicy::from_config(git_dir, None)?;
2076    if let Some(config) = clean_config {
2077        large_policy.compression_level = pack_compression_level(config);
2078        large_policy.pack_size_limit = config
2079            .get("pack", None, "packSizeLimit")
2080            .and_then(sley_config::parse_config_int)
2081            .and_then(|value| (value > 0).then_some(value as u64))
2082            .or(large_policy.pack_size_limit);
2083    }
2084    let trust_filemode = clean_config
2085        .map(trust_executable_bit)
2086        .unwrap_or_else(|| trust_executable_bit_from_git_dir(git_dir, None));
2087    let trust_symlinks = clean_config
2088        .map(trust_symlinks)
2089        .unwrap_or_else(|| trust_symlinks_from_git_dir(git_dir, None));
2090    if options.allow_skip_worktree_entries {
2091        expand_sparse_index(&mut index, &odb, format)?;
2092    }
2093    let sparse_checkout_active = sparse_checkout_config_enabled(git_dir)
2094        || index.is_sparse()
2095        || index.entries.iter().any(IndexEntry::is_sparse_dir);
2096    // For small batches, read only each path's `.gitattributes` chain; a
2097    // whole-worktree matcher can dominate `add -u` when only a few files are
2098    // dirty in a huge checkout. Large batches still amortize the full matcher.
2099    let clean_filter = match clean_config {
2100        Some(_) if paths.len() >= 64 => Some(UpdateIndexCleanFilter::Full(
2101            AttributeMatcher::from_worktree_root(worktree_root)?,
2102        )),
2103        Some(_) => Some(UpdateIndexCleanFilter::PathLocal),
2104        None => None,
2105    };
2106    // git's index-update path (object-file.c `get_conv_flags`) folds in
2107    // `global_conv_flags_eol`, so `git add`/`commit` emit the `core.safecrlf`
2108    // round-trip warning (default: warn). It only applies when content filters
2109    // run at all (i.e. when we have a config).
2110    let conv_flags = clean_config.map_or(ConvFlags::Off, ConvFlags::from_config);
2111    let non_atomic_chmod_errors = clean_config.is_some() && options.add && options.remove;
2112    let requested_filter_attrs = filter_attribute_names();
2113    let mut updated = Vec::new();
2114    let mut reports: Vec<String> = Vec::new();
2115    let mut untracked_cache_invalidation_paths = Vec::new();
2116    let mut pending_large = Vec::new();
2117    let mut chmod_error = false;
2118    for update_path in paths {
2119        let path = &update_path.path;
2120        // Each path carries the sticky mode that was in effect when it was
2121        // parsed on the command line (git processes argv left-to-right). Read
2122        // the action from the path's own mode, NOT a batch-wide flag, so
2123        // `--add foo --force-remove bar` adds foo and force-removes bar.
2124        let path_mode = update_path.mode;
2125        let path_chmod = path_mode.chmod;
2126        let absolute = if path.is_absolute() {
2127            path.clone()
2128        } else {
2129            worktree_root.join(path)
2130        };
2131        let absolute = normalize_absolute_path_lexically(&absolute);
2132        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
2133            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
2134        })?;
2135        let git_path = git_path_bytes(relative)?;
2136        if index_sparse_dir_contains_path(&index, &git_path) {
2137            expand_sparse_index(&mut index, &odb, format)?;
2138        }
2139        let existing_range = index_entries_path_range(&index.entries, &git_path);
2140        if path_mode.force_remove {
2141            record_resolve_undo_for_range(&mut index, format, &git_path, existing_range)?;
2142            remove_index_entries_with_path(&mut index.entries, &git_path);
2143            untracked_cache_invalidation_paths.push(git_path.clone());
2144            // git's update_one() reports `remove` for a --force-remove path.
2145            reports.push(format!("remove '{}'", String::from_utf8_lossy(&git_path)));
2146            continue;
2147        }
2148        // lstat (not stat): a symlink must be inspected as the link itself, never
2149        // followed to its target. `Path::exists`/`fs::metadata` both stat through
2150        // the link, which makes a symlink-to-directory look like a directory
2151        // (fs::read then fails with "Is a directory") and a symlink-to-file get
2152        // staged with the target's content + a regular-file mode. git stages a
2153        // symlink as mode 120000 whose blob is the link target string, regardless
2154        // of what (if anything) the target resolves to.
2155        let symlink_metadata = match fs::symlink_metadata(&absolute) {
2156            Ok(metadata) => Some(metadata),
2157            // ENOTDIR (a leading path component is now a file, e.g. staging the
2158            // stale `a/b/c` entry after `a/b` became a regular file in a D/F
2159            // flip) means the path no longer exists as a file — git's lstat
2160            // returns ENOTDIR here and treats it exactly like ENOENT. Fold both
2161            // into the "missing" arm so the `--remove` path drops the stale
2162            // entry instead of aborting the whole add with an I/O error.
2163            Err(err)
2164                if matches!(
2165                    err.kind(),
2166                    std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
2167                ) =>
2168            {
2169                None
2170            }
2171            Err(err) => return Err(err.into()),
2172        };
2173        if !options.allow_skip_worktree_entries
2174            && index.entries[existing_range.clone()]
2175                .iter()
2176                .any(index_entry_skip_worktree)
2177        {
2178            if path_mode.remove {
2179                if !options.ignore_skip_worktree_entries {
2180                    index.entries.drain(existing_range);
2181                }
2182                continue;
2183            }
2184            if symlink_metadata.is_none()
2185                || options.ignore_skip_worktree_entries
2186                || !sparse_checkout_active
2187            {
2188                continue;
2189            }
2190        }
2191        let Some(metadata) = symlink_metadata else {
2192            if path_mode.remove {
2193                record_resolve_undo_for_range(&mut index, format, &git_path, existing_range)?;
2194                remove_index_entries_with_path(&mut index.entries, &git_path);
2195                untracked_cache_invalidation_paths.push(git_path.clone());
2196                // git's update_one() unconditionally reports `add '<path>'`
2197                // after process_path(), even when the missing file was removed
2198                // from the index via the `--remove` (not --force-remove) path.
2199                reports.push(format!("add '{}'", String::from_utf8_lossy(&git_path)));
2200                continue;
2201            }
2202            print_update_index_path_error(&git_path, "does not exist and --remove not passed");
2203            return Err(GitError::Exit(128));
2204        };
2205        if !path_mode.add && index_entries_path_range(&index.entries, &git_path).is_empty() {
2206            print_update_index_path_error(
2207                &git_path,
2208                "cannot add to the index - missing --add option?",
2209            );
2210            return Err(GitError::Exit(128));
2211        }
2212        if metadata.is_dir() {
2213            if path_mode.remove
2214                && !existing_range.is_empty()
2215                && sley_diff_merge::gitlink_head_oid(&absolute, format).is_none()
2216            {
2217                record_resolve_undo_for_range(
2218                    &mut index,
2219                    format,
2220                    &git_path,
2221                    existing_range.clone(),
2222                )?;
2223                remove_index_entries_with_path(&mut index.entries, &git_path);
2224                untracked_cache_invalidation_paths.push(git_path.clone());
2225                reports.push(format!("add '{}'", String::from_utf8_lossy(&git_path)));
2226                continue;
2227            }
2228            // A directory is stageable only as a gitlink: when it is an
2229            // embedded repository with a commit checked out, git records a
2230            // mode-160000 entry whose oid is that commit (no object is
2231            // written). Otherwise it errors — with upstream's exact messages
2232            // for the embedded-repo-without-commit and plain-directory cases
2233            // (object-file.c index_path / builtin/update-index.c
2234            // process_directory).
2235            let display = String::from_utf8_lossy(&git_path).into_owned();
2236            let has_dot_git = absolute.join(".git").exists();
2237            if let Some(submodule_format) = embedded_repo_object_format(&absolute)
2238                && submodule_format != format
2239            {
2240                eprintln!("fatal: cannot add a submodule of a different hash algorithm");
2241                return Err(GitError::Exit(128));
2242            }
2243            let Some(head_oid) = sley_diff_merge::gitlink_head_oid(&absolute, format) else {
2244                if has_dot_git {
2245                    if clean_config.is_some() {
2246                        let display_dir = if display.ends_with('/') {
2247                            display.clone()
2248                        } else {
2249                            format!("{display}/")
2250                        };
2251                        eprintln!("error: '{display_dir}' does not have a commit checked out");
2252                        eprintln!("error: unable to index file '{display_dir}'");
2253                        eprintln!("fatal: adding files failed");
2254                    } else {
2255                        eprintln!("error: '{display}' does not have a commit checked out");
2256                        eprintln!("fatal: Unable to process path {display}");
2257                    }
2258                } else {
2259                    eprintln!("error: {display}: is a directory - add files inside instead");
2260                    eprintln!("fatal: Unable to process path {display}");
2261                }
2262                return Err(GitError::Exit(128));
2263            };
2264            if path_chmod.is_some() {
2265                eprintln!(
2266                    "fatal: git update-index: cannot chmod {}x '{display}'",
2267                    if path_chmod == Some(true) { '+' } else { '-' },
2268                );
2269                return Err(GitError::Exit(128));
2270            }
2271            let mut entry = index_entry_from_metadata_with_filemode(
2272                git_path.clone(),
2273                head_oid,
2274                &metadata,
2275                trust_filemode,
2276            );
2277            entry.mode = sley_index::GITLINK_MODE;
2278            reports.push(format!("add '{display}'"));
2279            record_resolve_undo_for_range(&mut index, format, &git_path, existing_range.clone())?;
2280            replace_index_entries_with_entry(&mut index.entries, entry);
2281            untracked_cache_invalidation_paths.push(git_path.clone());
2282            updated.push(head_oid);
2283            continue;
2284        }
2285        let is_symlink = metadata.file_type().is_symlink();
2286        let body = if is_symlink {
2287            // The blob is the raw link target bytes; clean filters never apply to
2288            // a symlink (git treats it as binary content, not a text path).
2289            symlink_target_bytes(&absolute)?
2290        } else {
2291            let body = fs::read(&absolute)?;
2292            // The safecrlf auto-crlf decision needs the path's *current* index
2293            // blob (git's `has_crlf_in_index`); the stage-0 entry, if any, has it.
2294            let index_blob = match conv_flags {
2295                ConvFlags::Off => SafeCrlfIndexBlob::None,
2296                _ => stage0_oid_in_range(&index.entries, existing_range.clone()).map_or(
2297                    SafeCrlfIndexBlob::None,
2298                    |oid| SafeCrlfIndexBlob::Lookup { odb: &odb, oid },
2299                ),
2300            };
2301            match (clean_config, &clean_filter) {
2302                (Some(config), Some(UpdateIndexCleanFilter::Full(matcher))) => {
2303                    // Identical to `apply_clean_filter`, but reuses the batch's
2304                    // matcher instead of rebuilding it (and re-walking the tree)
2305                    // for this path.
2306                    let checks =
2307                        matcher.attributes_for_path(&git_path, &requested_filter_attrs, false);
2308                    apply_clean_filter_with_attributes_cow_safecrlf(
2309                        config, &checks, &git_path, &body, conv_flags, index_blob,
2310                    )?
2311                    .into_owned()
2312                }
2313                (Some(config), Some(UpdateIndexCleanFilter::PathLocal)) => {
2314                    let checks = filter_attribute_checks(worktree_root, &git_path)?;
2315                    apply_clean_filter_with_attributes_cow_safecrlf(
2316                        config, &checks, &git_path, &body, conv_flags, index_blob,
2317                    )?
2318                    .into_owned()
2319                }
2320                _ => body,
2321            }
2322        };
2323        let object = EncodedObject::new(ObjectType::Blob, body);
2324        let oid = if path_mode.info_only {
2325            object.object_id(format)?
2326        } else {
2327            write_index_blob_object(&odb, format, object, large_policy, &mut pending_large)?
2328        };
2329        let mut entry = index_entry_from_metadata_with_filemode(
2330            git_path.clone(),
2331            oid,
2332            &metadata,
2333            trust_filemode,
2334        );
2335        if is_symlink {
2336            entry.mode = 0o120000;
2337        }
2338        if let Some(mode) = preferred_unmerged_mode_for_untrusted_worktree(
2339            &index.entries[existing_range.clone()],
2340            trust_filemode,
2341            trust_symlinks,
2342        ) {
2343            entry.mode = mode;
2344        }
2345        // git's update_one() reports `add` for every staged path (whether the
2346        // entry is new or an update), then chmod_path() reports the chmod after.
2347        reports.push(format!("add '{}'", String::from_utf8_lossy(&git_path)));
2348        if let Some(executable) = path_chmod {
2349            // git's chmod_path() refuses to flip the executable bit on anything
2350            // that is not a regular file (a symlink/gitlink has no such bit). It
2351            // writes the blob first, reports the error, and still writes the
2352            // other index updates.
2353            if is_symlink {
2354                eprintln!(
2355                    "fatal: git update-index: cannot chmod {}x '{}'",
2356                    if executable { '+' } else { '-' },
2357                    String::from_utf8_lossy(&git_path)
2358                );
2359                if !non_atomic_chmod_errors {
2360                    return Err(GitError::Exit(128));
2361                }
2362                chmod_error = true;
2363            } else {
2364                entry.mode = if executable { 0o100755 } else { 0o100644 };
2365                reports.push(format!(
2366                    "chmod {}x '{}'",
2367                    if executable { '+' } else { '-' },
2368                    String::from_utf8_lossy(&git_path)
2369                ));
2370            }
2371        }
2372        record_resolve_undo_for_range(&mut index, format, &git_path, existing_range.clone())?;
2373        replace_index_entries_with_entry(&mut index.entries, entry);
2374        untracked_cache_invalidation_paths.push(git_path);
2375        updated.push(oid);
2376    }
2377    normalize_index_version_for_extended_flags(&mut index);
2378    index.extensions = index_extensions_without_cache_tree(&index.extensions);
2379    invalidate_untracked_cache_for_git_paths(
2380        &mut index,
2381        format,
2382        &untracked_cache_invalidation_paths,
2383    )?;
2384    if !pending_large.is_empty() {
2385        write_pending_large_blobs(&odb, &pending_large, large_policy)?;
2386    }
2387    write_repository_index_ref(git_dir, format, &index)?;
2388    if verbose {
2389        let mut stdout = std::io::stdout().lock();
2390        for line in &reports {
2391            writeln!(stdout, "{line}")?;
2392        }
2393        stdout.flush()?;
2394    }
2395    if chmod_error {
2396        return Err(GitError::Exit(128));
2397    }
2398    Ok(UpdateIndexResult {
2399        entries: index.entries.len(),
2400        updated,
2401    })
2402}
2403
2404pub fn refresh_index_paths(
2405    worktree_root: impl AsRef<Path>,
2406    git_dir: impl AsRef<Path>,
2407    format: ObjectFormat,
2408    paths: &[PathBuf],
2409    quiet: bool,
2410    ignore_missing: bool,
2411    really_refresh: bool,
2412) -> Result<UpdateIndexResult> {
2413    let worktree_root = worktree_root.as_ref();
2414    let git_dir = git_dir.as_ref();
2415    let index_path = repository_index_path(git_dir);
2416    if !index_path.exists() {
2417        return Ok(UpdateIndexResult {
2418            entries: 0,
2419            updated: Vec::new(),
2420        });
2421    }
2422    let mut index = Index::parse(&fs::read(&index_path)?, format)?;
2423    let trust_filemode = trust_executable_bit_from_git_dir(git_dir, None);
2424    // git's `update-index --refresh` trusts the cached stat: a stage-0 entry
2425    // whose size+mtime still match the worktree file (and is not racily clean) is
2426    // known unchanged, so its content is NOT re-read or re-hashed
2427    // (read-cache.c `refresh_cache_ent` → `ie_match_stat`). Without this shortcut
2428    // sley re-hashed every tracked file on every refresh — the 3.2x slowdown in
2429    // sley#27. We build the cache from the same parsed index + the index file's
2430    // own mtime (the racy-clean reference) so no extra parse is needed.
2431    let index_mtime = fs::metadata(&index_path)
2432        .ok()
2433        .and_then(|metadata| file_mtime_parts(&metadata));
2434    let stat_cache = IndexStatCache::from_index_mtime_only(index_mtime);
2435    let selected_paths = paths
2436        .iter()
2437        .map(|path| {
2438            let absolute = if path.is_absolute() {
2439                path.clone()
2440            } else {
2441                worktree_root.join(path)
2442            };
2443            let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
2444                GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
2445            })?;
2446            git_path_bytes(relative)
2447        })
2448        .collect::<Result<Vec<_>>>()?;
2449    let selected_paths = selected_paths.into_iter().collect::<BTreeSet<_>>();
2450    if selected_paths.is_empty()
2451        && !really_refresh
2452        && !index
2453            .entries
2454            .iter()
2455            .any(|entry| entry.flags & INDEX_FLAG_ASSUME_UNCHANGED != 0)
2456    {
2457        return refresh_all_index_paths_parallel(
2458            worktree_root,
2459            git_dir,
2460            format,
2461            index,
2462            stat_cache,
2463            quiet,
2464            ignore_missing,
2465            trust_filemode,
2466        );
2467    }
2468    let mut needs_update = false;
2469    let mut index_dirty = false;
2470    for entry in &mut index.entries {
2471        if index_entry_stage(entry) != 0 {
2472            continue;
2473        }
2474        if entry.flags & INDEX_FLAG_ASSUME_UNCHANGED != 0 {
2475            if !really_refresh {
2476                continue;
2477            }
2478            entry.flags &= !INDEX_FLAG_ASSUME_UNCHANGED;
2479            index_dirty = true;
2480        }
2481        let absolute = worktree_root.join(repo_path_to_os_path(entry.path.as_bytes())?);
2482        let Ok(metadata) = fs::metadata(&absolute) else {
2483            if ignore_missing {
2484                continue;
2485            }
2486            if !quiet {
2487                print_update_index_needs_update(entry.path.as_bytes());
2488            }
2489            needs_update = true;
2490            continue;
2491        };
2492        // git's `refresh_cache_ent` runs `ie_match_stat`, whose `S_IFGITLINK`
2493        // arm never re-reads content: a gitlink whose worktree path is a
2494        // directory is up to date (an unpopulated/HEAD-matching submodule), so
2495        // `--refresh` leaves it untouched and silent. Only a gitlink that is no
2496        // longer a directory (replaced by a file, or removed) is `TYPE_CHANGED`.
2497        // This is the single `sley_index::gitlink_stat_verdict` rule; without it
2498        // the `!is_file()` guard below mis-flagged every populated submodule as
2499        // "needs update". The populated-HEAD comparison is deliberately left to
2500        // status/diff (the unpopulated default is clean).
2501        if sley_index::is_gitlink(entry.mode) {
2502            match sley_index::gitlink_stat_verdict(&metadata) {
2503                sley_index::GitlinkStatVerdict::Populated => continue,
2504                sley_index::GitlinkStatVerdict::TypeChanged => {
2505                    if !quiet {
2506                        print_update_index_needs_update(entry.path.as_bytes());
2507                    }
2508                    needs_update = true;
2509                    continue;
2510                }
2511            }
2512        }
2513        if !metadata.is_file() {
2514            if !quiet {
2515                print_update_index_needs_update(entry.path.as_bytes());
2516            }
2517            needs_update = true;
2518            continue;
2519        }
2520        // Stat shortcut: when the cached stat proves the file is unchanged since
2521        // it was staged, its content hashes to the cached oid by construction
2522        // (see `IndexStatCache`'s safety invariant). Skip the read+hash and just
2523        // refresh the stat fields from current metadata — byte-identical to the
2524        // clean arm below, since the oid stamped is the cached one and the
2525        // metadata is the same one that re-stamp would read.
2526        if stat_cache.reuse_index_entry(entry, &metadata).is_some() {
2527            continue;
2528        }
2529        let body = fs::read(&absolute)?;
2530        let object = EncodedObject::new(ObjectType::Blob, body);
2531        let oid = object.object_id(format)?;
2532        if oid != entry.oid || file_mode_with_trust(&metadata, trust_filemode) != entry.mode {
2533            if !quiet {
2534                print_update_index_needs_update(entry.path.as_bytes());
2535            }
2536            needs_update = true;
2537            if really_refresh
2538                && !selected_paths.is_empty()
2539                && selected_paths.contains(entry.path.as_bytes())
2540            {
2541                let updated_entry = index_entry_from_metadata_with_filemode(
2542                    entry.path.clone(),
2543                    oid,
2544                    &metadata,
2545                    trust_filemode,
2546                );
2547                if updated_entry != *entry {
2548                    *entry = updated_entry;
2549                    index_dirty = true;
2550                }
2551            }
2552            continue;
2553        }
2554        let updated_entry = index_entry_from_metadata_with_filemode(
2555            entry.path.clone(),
2556            oid,
2557            &metadata,
2558            trust_filemode,
2559        );
2560        if updated_entry != *entry {
2561            *entry = updated_entry;
2562            index_dirty = true;
2563        }
2564    }
2565    if index_dirty {
2566        write_repository_index_ref(git_dir, format, &index)?;
2567    }
2568    if needs_update && !quiet {
2569        return Err(GitError::Exit(1));
2570    }
2571    Ok(UpdateIndexResult {
2572        entries: index.entries.len(),
2573        updated: Vec::new(),
2574    })
2575}
2576
2577fn refresh_all_index_paths_parallel(
2578    worktree_root: &Path,
2579    git_dir: &Path,
2580    format: ObjectFormat,
2581    mut index: Index,
2582    stat_cache: IndexStatCache,
2583    quiet: bool,
2584    ignore_missing: bool,
2585    trust_filemode: bool,
2586) -> Result<UpdateIndexResult> {
2587    let prechecks =
2588        tracked_only_non_clean_prechecks_parallel(worktree_root, &index, &stat_cache, false)?;
2589    let mut needs_update = false;
2590    let mut index_dirty = false;
2591    for precheck in prechecks {
2592        match precheck {
2593            TrackedOnlyPrecheck::Deleted(idx) => {
2594                if ignore_missing {
2595                    continue;
2596                }
2597                if !quiet {
2598                    print_update_index_needs_update(index.entries[idx].path.as_bytes());
2599                }
2600                needs_update = true;
2601            }
2602            TrackedOnlyPrecheck::Slow(idx) => {
2603                let entry = &mut index.entries[idx];
2604                let path = entry.path.as_bytes().to_vec();
2605                let absolute = worktree_root.join(repo_path_to_os_path(&path)?);
2606                let Ok(metadata) = fs::metadata(&absolute) else {
2607                    if ignore_missing {
2608                        continue;
2609                    }
2610                    if !quiet {
2611                        print_update_index_needs_update(&path);
2612                    }
2613                    needs_update = true;
2614                    continue;
2615                };
2616                // Gitlink: never re-read; a directory on disk is up to date (the
2617                // single `sley_index::gitlink_stat_verdict` rule, matching the
2618                // serial path above). Only a type-changed gitlink needs update.
2619                if sley_index::is_gitlink(entry.mode) {
2620                    match sley_index::gitlink_stat_verdict(&metadata) {
2621                        sley_index::GitlinkStatVerdict::Populated => continue,
2622                        sley_index::GitlinkStatVerdict::TypeChanged => {
2623                            if !quiet {
2624                                print_update_index_needs_update(&path);
2625                            }
2626                            needs_update = true;
2627                            continue;
2628                        }
2629                    }
2630                }
2631                if !metadata.is_file() {
2632                    if !quiet {
2633                        print_update_index_needs_update(&path);
2634                    }
2635                    needs_update = true;
2636                    continue;
2637                }
2638                if stat_cache.reuse_index_entry(entry, &metadata).is_some() {
2639                    continue;
2640                }
2641                let body = fs::read(&absolute)?;
2642                let object = EncodedObject::new(ObjectType::Blob, body);
2643                let oid = object.object_id(format)?;
2644                if oid != entry.oid || file_mode_with_trust(&metadata, trust_filemode) != entry.mode
2645                {
2646                    if !quiet {
2647                        print_update_index_needs_update(&path);
2648                    }
2649                    needs_update = true;
2650                    continue;
2651                }
2652                let updated_entry = index_entry_from_metadata_with_filemode(
2653                    entry.path.clone(),
2654                    oid,
2655                    &metadata,
2656                    trust_filemode,
2657                );
2658                if updated_entry != *entry {
2659                    *entry = updated_entry;
2660                    index_dirty = true;
2661                }
2662            }
2663        }
2664    }
2665    if index_dirty {
2666        write_repository_index_ref(git_dir, format, &index)?;
2667    }
2668    if needs_update && !quiet {
2669        return Err(GitError::Exit(1));
2670    }
2671    Ok(UpdateIndexResult {
2672        entries: index.entries.len(),
2673        updated: Vec::new(),
2674    })
2675}
2676
2677pub fn update_index_again(
2678    worktree_root: impl AsRef<Path>,
2679    git_dir: impl AsRef<Path>,
2680    format: ObjectFormat,
2681    paths: &[PathBuf],
2682    options: UpdateIndexOptions,
2683) -> Result<UpdateIndexResult> {
2684    let worktree_root = worktree_root.as_ref();
2685    let git_dir = git_dir.as_ref();
2686    let index_path = repository_index_path(git_dir);
2687    if !index_path.exists() {
2688        return Ok(UpdateIndexResult {
2689            entries: 0,
2690            updated: Vec::new(),
2691        });
2692    }
2693    let index = Index::parse(&fs::read(&index_path)?, format)?;
2694    let db = FileObjectDatabase::from_git_dir(git_dir, format);
2695    let head_entries = head_tree_entries(git_dir, format, &db)?;
2696    let selected_paths = selected_git_paths(worktree_root, paths)?;
2697    let mut again_paths = Vec::new();
2698    for entry in &index.entries {
2699        if index_entry_stage(entry) != 0 {
2700            continue;
2701        }
2702        if !selected_paths.is_empty() && !git_path_selected(entry.path.as_bytes(), &selected_paths)
2703        {
2704            continue;
2705        }
2706        let differs_from_head = match head_entries.get(entry.path.as_bytes()) {
2707            Some(head_entry) => head_entry.oid != entry.oid || head_entry.mode != entry.mode,
2708            None => true,
2709        };
2710        if differs_from_head {
2711            again_paths.push(worktree_root.join(repo_path_to_os_path(entry.path.as_bytes())?));
2712        }
2713    }
2714    if again_paths.is_empty() {
2715        return Ok(UpdateIndexResult {
2716            entries: index.entries.len(),
2717            updated: Vec::new(),
2718        });
2719    }
2720    update_index_paths(worktree_root, git_dir, format, &again_paths, options)
2721}
2722
2723pub fn set_index_assume_unchanged_paths(
2724    worktree_root: impl AsRef<Path>,
2725    git_dir: impl AsRef<Path>,
2726    format: ObjectFormat,
2727    paths: &[PathBuf],
2728    assume_unchanged: bool,
2729) -> Result<UpdateIndexResult> {
2730    let worktree_root = worktree_root.as_ref();
2731    let git_dir = git_dir.as_ref();
2732    let index_path = repository_index_path(git_dir);
2733    let mut index = if index_path.exists() {
2734        Index::parse(&fs::read(&index_path)?, format)?
2735    } else {
2736        Index {
2737            version: 2,
2738            entries: Vec::new(),
2739            extensions: Vec::new(),
2740            checksum: None,
2741        }
2742    };
2743    let sparse = active_sparse_checkout(git_dir)?;
2744    let db = FileObjectDatabase::from_git_dir(git_dir, format);
2745    if index.is_sparse() {
2746        expand_sparse_index(&mut index, &db, format)?;
2747    }
2748    let selected_paths = paths
2749        .iter()
2750        .map(|path| {
2751            let absolute = if path.is_absolute() {
2752                path.clone()
2753            } else {
2754                worktree_root.join(path)
2755            };
2756            let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
2757                GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
2758            })?;
2759            git_path_bytes(relative)
2760        })
2761        .collect::<Result<Vec<_>>>()?;
2762    for path in selected_paths {
2763        if let Some(entry) = index.entries.iter_mut().find(|entry| entry.path == path) {
2764            if assume_unchanged {
2765                entry.flags |= INDEX_FLAG_ASSUME_UNCHANGED;
2766            } else {
2767                entry.flags &= !INDEX_FLAG_ASSUME_UNCHANGED;
2768            }
2769        }
2770    }
2771    normalize_index_version_for_extended_flags(&mut index);
2772    if let Some((sparse, mode)) = sparse
2773        && sparse.sparse_index
2774    {
2775        let matcher = SparseMatcher::new(&sparse, mode);
2776        collapse_to_sparse_index(&mut index, &matcher, &db, format)?;
2777    }
2778    write_repository_index_ref(git_dir, format, &index)?;
2779    Ok(UpdateIndexResult {
2780        entries: index.entries.len(),
2781        updated: Vec::new(),
2782    })
2783}
2784
2785fn selected_git_paths(worktree_root: &Path, paths: &[PathBuf]) -> Result<BTreeSet<Vec<u8>>> {
2786    paths
2787        .iter()
2788        .map(|path| {
2789            let absolute = if path.is_absolute() {
2790                path.clone()
2791            } else {
2792                worktree_root.join(path)
2793            };
2794            let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
2795                GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
2796            })?;
2797            git_path_bytes(relative)
2798        })
2799        .collect()
2800}
2801
2802fn git_path_selected(path: &[u8], selected_paths: &BTreeSet<Vec<u8>>) -> bool {
2803    selected_paths
2804        .iter()
2805        .any(|selected| path == selected || index_entry_is_under_path(path, selected))
2806}
2807
2808pub fn set_index_skip_worktree_paths(
2809    worktree_root: impl AsRef<Path>,
2810    git_dir: impl AsRef<Path>,
2811    format: ObjectFormat,
2812    paths: &[PathBuf],
2813    skip_worktree: bool,
2814) -> Result<UpdateIndexResult> {
2815    let worktree_root = worktree_root.as_ref();
2816    let git_dir = git_dir.as_ref();
2817    let index_path = repository_index_path(git_dir);
2818    let mut index = if index_path.exists() {
2819        Index::parse(&fs::read(&index_path)?, format)?
2820    } else {
2821        Index {
2822            version: 2,
2823            entries: Vec::new(),
2824            extensions: Vec::new(),
2825            checksum: None,
2826        }
2827    };
2828    let sparse = active_sparse_checkout(git_dir)?;
2829    let db = FileObjectDatabase::from_git_dir(git_dir, format);
2830    if index.is_sparse() {
2831        expand_sparse_index(&mut index, &db, format)?;
2832    }
2833    let selected_paths = paths
2834        .iter()
2835        .map(|path| {
2836            let absolute = if path.is_absolute() {
2837                path.clone()
2838            } else {
2839                worktree_root.join(path)
2840            };
2841            let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
2842                GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
2843            })?;
2844            git_path_bytes(relative)
2845        })
2846        .collect::<Result<Vec<_>>>()?;
2847    for path in selected_paths {
2848        if let Some(entry) = index.entries.iter_mut().find(|entry| entry.path == path) {
2849            if skip_worktree {
2850                entry.flags |= INDEX_FLAG_EXTENDED;
2851                entry.flags_extended |= INDEX_EXTENDED_FLAG_SKIP_WORKTREE;
2852            } else {
2853                entry.flags_extended &= !INDEX_EXTENDED_FLAG_SKIP_WORKTREE;
2854                if entry.flags_extended == 0 {
2855                    entry.flags &= !INDEX_FLAG_EXTENDED;
2856                }
2857            }
2858        }
2859    }
2860    normalize_index_version_for_extended_flags(&mut index);
2861    if let Some((sparse, mode)) = sparse
2862        && sparse.sparse_index
2863    {
2864        let matcher = SparseMatcher::new(&sparse, mode);
2865        collapse_to_sparse_index(&mut index, &matcher, &db, format)?;
2866    }
2867    write_repository_index_ref(git_dir, format, &index)?;
2868    Ok(UpdateIndexResult {
2869        entries: index.entries.len(),
2870        updated: Vec::new(),
2871    })
2872}
2873
2874pub fn set_index_fsmonitor_valid_paths(
2875    worktree_root: impl AsRef<Path>,
2876    git_dir: impl AsRef<Path>,
2877    format: ObjectFormat,
2878    paths: &[PathBuf],
2879    _fsmonitor_valid: bool,
2880) -> Result<UpdateIndexResult> {
2881    let worktree_root = worktree_root.as_ref();
2882    let git_dir = git_dir.as_ref();
2883    let index_path = repository_index_path(git_dir);
2884    let index = if index_path.exists() {
2885        Index::parse(&fs::read(&index_path)?, format)?
2886    } else {
2887        Index {
2888            version: 2,
2889            entries: Vec::new(),
2890            extensions: Vec::new(),
2891            checksum: None,
2892        }
2893    };
2894    let selected_paths = paths
2895        .iter()
2896        .map(|path| {
2897            let absolute = if path.is_absolute() {
2898                path.clone()
2899            } else {
2900                worktree_root.join(path)
2901            };
2902            let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
2903                GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
2904            })?;
2905            git_path_bytes(relative)
2906        })
2907        .collect::<Result<Vec<_>>>()?;
2908    for path in selected_paths {
2909        if !index.entries.iter().any(|entry| entry.path == path) {
2910            eprintln!(
2911                "fatal: Unable to mark file {}",
2912                String::from_utf8_lossy(&path)
2913            );
2914            return Err(GitError::Exit(128));
2915        }
2916    }
2917    Ok(UpdateIndexResult {
2918        entries: index.entries.len(),
2919        updated: Vec::new(),
2920    })
2921}
2922
2923pub fn set_index_version(
2924    git_dir: impl AsRef<Path>,
2925    format: ObjectFormat,
2926    version: u32,
2927    verbose: bool,
2928) -> Result<UpdateIndexResult> {
2929    if !matches!(version, 2..=4) {
2930        return Err(GitError::Unsupported(format!(
2931            "update-index currently supports --index-version 2, 3, or 4, got {version}"
2932        )));
2933    }
2934    let git_dir = git_dir.as_ref();
2935    let index_path = repository_index_path(git_dir);
2936    let mut index = if index_path.exists() {
2937        Index::parse(&fs::read(&index_path)?, format)?
2938    } else {
2939        Index {
2940            version: 2,
2941            entries: Vec::new(),
2942            extensions: Vec::new(),
2943            checksum: None,
2944        }
2945    };
2946    // git reports the transition unconditionally under --verbose, even when the
2947    // requested version equals the current one ("was 4, set to 4").
2948    let previous = index.version;
2949    if verbose {
2950        println!("index-version: was {previous}, set to {version}");
2951    }
2952    index.version = version;
2953    normalize_index_version_for_extended_flags(&mut index);
2954    write_repository_index_ref(git_dir, format, &index)?;
2955    Ok(UpdateIndexResult {
2956        entries: index.entries.len(),
2957        updated: Vec::new(),
2958    })
2959}
2960
2961pub fn force_write_index(
2962    git_dir: impl AsRef<Path>,
2963    format: ObjectFormat,
2964) -> Result<UpdateIndexResult> {
2965    let git_dir = git_dir.as_ref();
2966    let index_path = repository_index_path(git_dir);
2967    let mut index = if index_path.exists() {
2968        Index::parse(&fs::read(&index_path)?, format)?
2969    } else {
2970        Index {
2971            version: 2,
2972            entries: Vec::new(),
2973            extensions: Vec::new(),
2974            checksum: None,
2975        }
2976    };
2977    normalize_index_version_for_extended_flags(&mut index);
2978    write_repository_index_ref(git_dir, format, &index)?;
2979    Ok(UpdateIndexResult {
2980        entries: index.entries.len(),
2981        updated: Vec::new(),
2982    })
2983}
2984
2985pub fn enable_untracked_cache(
2986    worktree_root: impl AsRef<Path>,
2987    git_dir: impl AsRef<Path>,
2988    format: ObjectFormat,
2989) -> Result<()> {
2990    let worktree_root = worktree_root.as_ref();
2991    let git_dir = git_dir.as_ref();
2992    let index_path = repository_index_path(git_dir);
2993    let mut index = if index_path.exists() {
2994        Index::parse(&fs::read(&index_path)?, format)?
2995    } else {
2996        empty_index()
2997    };
2998    let ident = untracked_cache_ident(worktree_root);
2999    let dir_flags = untracked_cache_dir_flags(StatusUntrackedMode::Normal);
3000    let cache = match index.untracked_cache(format)? {
3001        Some(mut cache) if cache.ident == ident => {
3002            cache.dir_flags = dir_flags;
3003            cache
3004        }
3005        _ => UntrackedCache::new(format, ident, dir_flags),
3006    };
3007    index.set_untracked_cache(format, Some(&cache))?;
3008    write_repository_index_ref(git_dir, format, &index)?;
3009    Ok(())
3010}
3011
3012pub fn disable_untracked_cache(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Result<()> {
3013    let git_dir = git_dir.as_ref();
3014    let index_path = repository_index_path(git_dir);
3015    if !index_path.exists() {
3016        return Ok(());
3017    }
3018    let mut index = Index::parse(&fs::read(&index_path)?, format)?;
3019    index.set_untracked_cache(format, None)?;
3020    write_repository_index_ref(git_dir, format, &index)?;
3021    Ok(())
3022}
3023
3024pub fn refresh_untracked_cache_after_status(
3025    worktree_root: impl AsRef<Path>,
3026    git_dir: impl AsRef<Path>,
3027    format: ObjectFormat,
3028    config: &GitConfig,
3029    untracked_mode: StatusUntrackedMode,
3030) -> Result<()> {
3031    if matches!(untracked_mode, StatusUntrackedMode::None) {
3032        return Ok(());
3033    }
3034    let worktree_root = worktree_root.as_ref();
3035    let git_dir = git_dir.as_ref();
3036    let index_path = repository_index_path(git_dir);
3037    let untracked_cache_setting = config.get("core", None, "untrackedCache");
3038    match untracked_cache_setting {
3039        Some("keep") | None => {
3040            if !repository_index_has_extension(git_dir, format, b"UNTR")? {
3041                return Ok(());
3042            }
3043        }
3044        Some("false" | "no" | "off" | "0") | Some("true" | "yes" | "on" | "1") => {}
3045        Some(_) => {
3046            if !repository_index_has_extension(git_dir, format, b"UNTR")? {
3047                return Ok(());
3048            }
3049        }
3050    }
3051    let mut index = if index_path.exists() {
3052        Index::parse(&fs::read(&index_path)?, format)?
3053    } else {
3054        empty_index()
3055    };
3056    match untracked_cache_setting {
3057        Some("false") | Some("no") | Some("off") | Some("0") => {
3058            index.set_untracked_cache(format, None)?;
3059            write_repository_index_ref(git_dir, format, &index)?;
3060            return Ok(());
3061        }
3062        Some("true") | Some("yes") | Some("on") | Some("1") => {}
3063        Some("keep") | None => {
3064            if index.untracked_cache(format)?.is_none() {
3065                return Ok(());
3066            }
3067        }
3068        Some(_) => {
3069            if index.untracked_cache(format)?.is_none() {
3070                return Ok(());
3071            }
3072        }
3073    }
3074    let old_cache = index.untracked_cache(format).ok().flatten();
3075    let ident = untracked_cache_ident(worktree_root);
3076    if old_cache.as_ref().is_some_and(|cache| cache.ident != ident) {
3077        eprintln!("warning: untracked cache is disabled on this system or location");
3078        emit_untracked_cache_bypass_trace();
3079        return Ok(());
3080    }
3081    let cache = build_untracked_cache(worktree_root, git_dir, format, &index, untracked_mode)?;
3082    emit_untracked_cache_trace(old_cache.as_ref(), &cache);
3083    index.set_untracked_cache(format, Some(&cache))?;
3084    write_repository_index_ref(git_dir, format, &index)?;
3085    Ok(())
3086}
3087
3088fn repository_index_has_extension(
3089    git_dir: &Path,
3090    format: ObjectFormat,
3091    signature: &[u8; 4],
3092) -> Result<bool> {
3093    let index_path = repository_index_path(git_dir);
3094    if !index_path.exists() {
3095        return Ok(false);
3096    }
3097    let bytes = read_borrowed_index_bytes(&index_path)?;
3098    sley_index::Index::bytes_have_extension(bytes.as_ref(), format, signature)
3099}
3100
3101pub fn emit_untracked_cache_bypass_trace() {
3102    sley_core::trace2::perf_read_directory_data("path", "");
3103}
3104
3105fn index_extensions_without_cache_tree(extensions: &[u8]) -> Vec<u8> {
3106    let mut offset = 0;
3107    let mut filtered = Vec::new();
3108    while offset < extensions.len() {
3109        if extensions.len().saturating_sub(offset) < 8 {
3110            return Vec::new();
3111        }
3112        let signature = &extensions[offset..offset + 4];
3113        let size = u32::from_be_bytes([
3114            extensions[offset + 4],
3115            extensions[offset + 5],
3116            extensions[offset + 6],
3117            extensions[offset + 7],
3118        ]) as usize;
3119        let end = offset + 8 + size;
3120        if end > extensions.len() {
3121            return Vec::new();
3122        }
3123        if signature != b"TREE" {
3124            filtered.extend_from_slice(&extensions[offset..end]);
3125        }
3126        offset = end;
3127    }
3128    filtered
3129}
3130
3131#[derive(Clone)]
3132struct ResolveUndoRecord {
3133    path: Vec<u8>,
3134    stages: [Option<(u32, ObjectId)>; 3],
3135}
3136
3137fn record_resolve_undo_for_path(
3138    index: &mut Index,
3139    format: ObjectFormat,
3140    path: &[u8],
3141    entries: &[IndexEntry],
3142) -> Result<()> {
3143    let mut stages = [None, None, None];
3144    for entry in entries {
3145        match entry.stage() {
3146            Stage::Base => stages[0] = Some((entry.mode, entry.oid)),
3147            Stage::Ours => stages[1] = Some((entry.mode, entry.oid)),
3148            Stage::Theirs => stages[2] = Some((entry.mode, entry.oid)),
3149            Stage::Normal => {}
3150        }
3151    }
3152    if stages.iter().all(Option::is_none) {
3153        return Ok(());
3154    }
3155    let mut records = parse_resolve_undo_records(index.extension(b"REUC")?, format)?;
3156    records.retain(|record| record.path.as_slice() != path);
3157    records.push(ResolveUndoRecord {
3158        path: path.to_vec(),
3159        stages,
3160    });
3161    records.sort_by(|left, right| left.path.cmp(&right.path));
3162    set_resolve_undo_extension(index, &records)
3163}
3164
3165fn record_resolve_undo_for_range(
3166    index: &mut Index,
3167    format: ObjectFormat,
3168    path: &[u8],
3169    range: Range<usize>,
3170) -> Result<()> {
3171    if range.is_empty() {
3172        return Ok(());
3173    }
3174    let entries = index.entries[range].to_vec();
3175    record_resolve_undo_for_path(index, format, path, &entries)
3176}
3177
3178fn parse_resolve_undo_records(
3179    body: Option<&[u8]>,
3180    format: ObjectFormat,
3181) -> Result<Vec<ResolveUndoRecord>> {
3182    let Some(body) = body else {
3183        return Ok(Vec::new());
3184    };
3185    let mut records = Vec::new();
3186    let mut offset = 0usize;
3187    while offset < body.len() {
3188        let path_end = body[offset..]
3189            .iter()
3190            .position(|byte| *byte == 0)
3191            .ok_or_else(|| GitError::InvalidFormat("truncated REUC path".into()))?
3192            + offset;
3193        let path = body[offset..path_end].to_vec();
3194        offset = path_end + 1;
3195
3196        let mut modes = [0u32; 3];
3197        for mode in &mut modes {
3198            let mode_end = body[offset..]
3199                .iter()
3200                .position(|byte| *byte == 0)
3201                .ok_or_else(|| GitError::InvalidFormat("truncated REUC mode".into()))?
3202                + offset;
3203            let text = std::str::from_utf8(&body[offset..mode_end])
3204                .map_err(|_| GitError::InvalidFormat("invalid REUC mode".into()))?;
3205            *mode = u32::from_str_radix(text, 8)
3206                .map_err(|_| GitError::InvalidFormat("invalid REUC mode".into()))?;
3207            offset = mode_end + 1;
3208        }
3209
3210        let mut stages = [None, None, None];
3211        for (idx, mode) in modes.into_iter().enumerate() {
3212            if mode == 0 {
3213                continue;
3214            }
3215            let end = offset
3216                .checked_add(format.raw_len())
3217                .ok_or_else(|| GitError::InvalidFormat("REUC oid length overflow".into()))?;
3218            if end > body.len() {
3219                return Err(GitError::InvalidFormat("truncated REUC oid".into()));
3220            }
3221            stages[idx] = Some((mode, ObjectId::from_raw(format, &body[offset..end])?));
3222            offset = end;
3223        }
3224        records.push(ResolveUndoRecord { path, stages });
3225    }
3226    Ok(records)
3227}
3228
3229fn set_resolve_undo_extension(index: &mut Index, records: &[ResolveUndoRecord]) -> Result<()> {
3230    let mut body = Vec::new();
3231    for record in records {
3232        body.extend_from_slice(&record.path);
3233        body.push(0);
3234        for stage in record.stages {
3235            match stage {
3236                Some((mode, _)) => body.extend_from_slice(format!("{mode:o}").as_bytes()),
3237                None => body.push(b'0'),
3238            }
3239            body.push(0);
3240        }
3241        for (_, oid) in record.stages.into_iter().flatten() {
3242            body.extend_from_slice(oid.as_bytes());
3243        }
3244    }
3245
3246    let chunks = index.extension_chunks()?;
3247    let mut rebuilt = Vec::with_capacity(index.extensions.len() + body.len() + 8);
3248    let mut replaced = false;
3249    for (signature, chunk_body) in chunks {
3250        if &signature == b"REUC" {
3251            if !body.is_empty() {
3252                append_index_extension(&mut rebuilt, b"REUC", &body)?;
3253            }
3254            replaced = true;
3255        } else {
3256            append_index_extension(&mut rebuilt, &signature, chunk_body)?;
3257        }
3258    }
3259    if !replaced && !body.is_empty() {
3260        append_index_extension(&mut rebuilt, b"REUC", &body)?;
3261    }
3262    index.extensions = rebuilt;
3263    Ok(())
3264}
3265
3266pub fn clear_resolve_undo(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Result<()> {
3267    let git_dir = git_dir.as_ref();
3268    let index_path = repository_index_path(git_dir);
3269    match fs::read(&index_path) {
3270        Ok(bytes) => {
3271            let mut index = Index::parse(&bytes, format)?;
3272            set_resolve_undo_extension(&mut index, &[])?;
3273            write_repository_index_ref(git_dir, format, &index)
3274        }
3275        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
3276        Err(err) => Err(err.into()),
3277    }
3278}
3279
3280fn append_index_extension(out: &mut Vec<u8>, signature: &[u8; 4], body: &[u8]) -> Result<()> {
3281    let len = u32::try_from(body.len())
3282        .map_err(|_| GitError::InvalidFormat("index extension body too large".into()))?;
3283    out.extend_from_slice(signature);
3284    out.extend_from_slice(&len.to_be_bytes());
3285    out.extend_from_slice(body);
3286    Ok(())
3287}
3288
3289fn index_extensions_without_split_index_link(extensions: &[u8]) -> Vec<u8> {
3290    let mut offset = 0;
3291    let mut filtered = Vec::new();
3292    while offset < extensions.len() {
3293        if extensions.len().saturating_sub(offset) < 8 {
3294            filtered.extend_from_slice(&extensions[offset..]);
3295            break;
3296        }
3297        let signature = &extensions[offset..offset + 4];
3298        let len = u32::from_be_bytes([
3299            extensions[offset + 4],
3300            extensions[offset + 5],
3301            extensions[offset + 6],
3302            extensions[offset + 7],
3303        ]) as usize;
3304        let end = offset.saturating_add(8).saturating_add(len);
3305        if end > extensions.len() {
3306            filtered.extend_from_slice(&extensions[offset..]);
3307            break;
3308        }
3309        if signature != b"link" {
3310            filtered.extend_from_slice(&extensions[offset..end]);
3311        }
3312        offset = end;
3313    }
3314    filtered
3315}
3316
3317fn preserved_index_extensions(git_dir: &Path, format: ObjectFormat) -> Result<Vec<u8>> {
3318    let index_path = repository_index_path(git_dir);
3319    match fs::read(&index_path) {
3320        Ok(bytes) => {
3321            let index = Index::parse(&bytes, format)?;
3322            Ok(index_extensions_without_cache_tree_or_resolve_undo(
3323                &index.extensions,
3324            ))
3325        }
3326        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(Vec::new()),
3327        Err(err) => Err(err.into()),
3328    }
3329}
3330
3331fn index_extensions_without_cache_tree_or_resolve_undo(extensions: &[u8]) -> Vec<u8> {
3332    let mut filtered = Vec::new();
3333    let mut offset = 0usize;
3334    while offset + 8 <= extensions.len() {
3335        let signature = &extensions[offset..offset + 4];
3336        let len = u32::from_be_bytes([
3337            extensions[offset + 4],
3338            extensions[offset + 5],
3339            extensions[offset + 6],
3340            extensions[offset + 7],
3341        ]) as usize;
3342        let end = offset + 8 + len;
3343        if end > extensions.len() {
3344            filtered.extend_from_slice(&extensions[offset..]);
3345            break;
3346        }
3347        if signature != b"TREE" && signature != b"REUC" {
3348            filtered.extend_from_slice(&extensions[offset..end]);
3349        }
3350        offset = end;
3351    }
3352    filtered
3353}
3354
3355fn repository_index_is_split(git_dir: &Path, format: ObjectFormat) -> Result<bool> {
3356    let index_path = repository_index_path(git_dir);
3357    match fs::read(index_path) {
3358        Ok(bytes) => Ok(Index::parse(&bytes, format)?
3359            .split_index_link(format)?
3360            .is_some()),
3361        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
3362        Err(err) => Err(err.into()),
3363    }
3364}
3365
3366fn git_test_split_index_enabled() -> bool {
3367    env::var("GIT_TEST_SPLIT_INDEX")
3368        .ok()
3369        .is_some_and(|value| !matches!(value.as_str(), "" | "0" | "false" | "False" | "FALSE"))
3370}
3371
3372pub fn write_repository_index(git_dir: &Path, format: ObjectFormat, index: Index) -> Result<()> {
3373    let split = index.split_index_link(format)?.is_some()
3374        || repository_index_is_split(git_dir, format)?
3375        || git_test_split_index_enabled();
3376    write_repository_index_ref_with_split(git_dir, format, &index, split)
3377}
3378
3379pub fn write_repository_index_ref(
3380    git_dir: &Path,
3381    format: ObjectFormat,
3382    index: &Index,
3383) -> Result<()> {
3384    let split = index.split_index_link(format)?.is_some()
3385        || repository_index_is_split(git_dir, format)?
3386        || git_test_split_index_enabled();
3387    write_repository_index_ref_with_split(git_dir, format, index, split)
3388}
3389
3390fn write_repository_index_ref_with_split(
3391    git_dir: &Path,
3392    format: ObjectFormat,
3393    index: &Index,
3394    split: bool,
3395) -> Result<()> {
3396    let index_path = repository_index_path(git_dir);
3397    if !split || alternate_index_output_path(git_dir, &index_path) {
3398        let smudged_entries = racily_clean_entry_indexes_before_write(git_dir, format, index)?;
3399        let extensions = if index.split_index_link(format)?.is_some() {
3400            Cow::Owned(index_extensions_without_split_index_link(&index.extensions))
3401        } else {
3402            Cow::Borrowed(index.extensions.as_slice())
3403        };
3404        let bytes = if smudged_entries.is_empty() && matches!(extensions, Cow::Borrowed(_)) {
3405            index.write(format)?
3406        } else {
3407            write_index_with_entry_size_overrides(format, index, &smudged_entries, &extensions)?
3408        };
3409        fs::write(&index_path, bytes)?;
3410        apply_index_shared_file_mode(git_dir, &index_path, None)?;
3411        return Ok(());
3412    }
3413
3414    if let Some(link) = index.split_index_link(format)?
3415        && !link.base_oid.is_null()
3416        && let Some(base) = read_shared_index_for_link(git_dir, &index_path, format, &link)?
3417        && !split_index_delta_exceeds_threshold(git_dir, index, &base)
3418    {
3419        let (entries, link) = split_index_delta_entries(index, &base, &link)?;
3420        let extensions =
3421            index_extensions_without_split_index_link(&index_extensions_without_cache_tree(
3422                &index.extensions,
3423            ));
3424        let mut primary = Index {
3425            version: index.version,
3426            entries,
3427            extensions,
3428            checksum: None,
3429        };
3430        primary.set_split_index_link(Some(&link))?;
3431        fs::write(&index_path, primary.write(format)?)?;
3432        apply_index_shared_file_mode(git_dir, &index_path, None)?;
3433        return Ok(());
3434    }
3435
3436    let mode_source = fs::metadata(&index_path)
3437        .ok()
3438        .map(|metadata| metadata.permissions());
3439    let mut shared = index.clone();
3440    smudge_racily_clean_entries_before_write(git_dir, format, &mut shared)?;
3441    shared.clear_split_index_link()?;
3442    shared.extensions = index_extensions_without_cache_tree(&shared.extensions);
3443    let shared_bytes = shared.write(format)?;
3444    let shared_oid = index_checksum_from_bytes(format, &shared_bytes)?;
3445    let shared_path = git_dir.join(format!("sharedindex.{shared_oid}"));
3446    if !shared_path.exists() {
3447        fs::write(&shared_path, &shared_bytes)?;
3448    }
3449    apply_index_shared_file_mode(git_dir, &shared_path, mode_source.as_ref())?;
3450    clean_shared_index_files(git_dir, shared_oid)?;
3451
3452    let mut primary = Index {
3453        version: index.version,
3454        entries: Vec::new(),
3455        extensions: Vec::new(),
3456        checksum: None,
3457    };
3458    primary.set_split_index_link(Some(&SplitIndexLink::new(shared_oid)))?;
3459    fs::write(&index_path, primary.write(format)?)?;
3460    apply_index_shared_file_mode(git_dir, &index_path, mode_source.as_ref())?;
3461    Ok(())
3462}
3463
3464fn alternate_index_output_path(git_dir: &Path, index_path: &Path) -> bool {
3465    env::var_os("GIT_INDEX_FILE").is_some() && index_path != git_dir.join("index")
3466}
3467
3468fn clean_shared_index_files(git_dir: &Path, current_oid: ObjectId) -> Result<()> {
3469    let Some(expire_before) = shared_index_expire_before(git_dir) else {
3470        return Ok(());
3471    };
3472    let current_name = format!("sharedindex.{current_oid}");
3473    let mut expired = Vec::new();
3474    for entry in fs::read_dir(git_dir)? {
3475        let entry = entry?;
3476        let name = entry.file_name();
3477        let Some(name) = name.to_str() else {
3478            continue;
3479        };
3480        if !name.starts_with("sharedindex.") || name == current_name {
3481            continue;
3482        }
3483        let metadata = entry.metadata()?;
3484        let Ok(modified) = metadata.modified() else {
3485            continue;
3486        };
3487        if modified <= expire_before {
3488            expired.push((modified, entry.path()));
3489        }
3490    }
3491    expired.sort_by_key(|(modified, _)| *modified);
3492    let delete_count = expired.len().saturating_sub(1);
3493    for (_, path) in expired.into_iter().take(delete_count) {
3494        let _ = fs::remove_file(path);
3495    }
3496    Ok(())
3497}
3498
3499fn shared_index_expire_before(git_dir: &Path) -> Option<SystemTime> {
3500    let value = sley_config::read_repo_config(git_dir, None)
3501        .ok()
3502        .and_then(|config| {
3503            config
3504                .get("splitIndex", None, "sharedIndexExpire")
3505                .map(str::to_string)
3506        })
3507        .unwrap_or_else(|| "2.weeks.ago".to_string());
3508    let value = value.trim();
3509    if value.eq_ignore_ascii_case("never") {
3510        return None;
3511    }
3512    if value.eq_ignore_ascii_case("now") {
3513        return Some(SystemTime::now());
3514    }
3515    if let Some(days) = value
3516        .strip_suffix(".days.ago")
3517        .or_else(|| value.strip_suffix(".day.ago"))
3518        .and_then(|days| days.parse::<u64>().ok())
3519    {
3520        return SystemTime::now().checked_sub(Duration::from_secs(days * 24 * 60 * 60));
3521    }
3522    if let Some(weeks) = value
3523        .strip_suffix(".weeks.ago")
3524        .or_else(|| value.strip_suffix(".week.ago"))
3525        .and_then(|weeks| weeks.parse::<u64>().ok())
3526    {
3527        return SystemTime::now().checked_sub(Duration::from_secs(weeks * 7 * 24 * 60 * 60));
3528    }
3529    SystemTime::now().checked_sub(Duration::from_secs(14 * 24 * 60 * 60))
3530}
3531
3532fn apply_index_shared_file_mode(
3533    git_dir: &Path,
3534    path: &Path,
3535    mode_source: Option<&fs::Permissions>,
3536) -> Result<()> {
3537    #[cfg(unix)]
3538    {
3539        use std::os::unix::fs::PermissionsExt;
3540
3541        let current = fs::metadata(path)?.permissions();
3542        let source_mode = mode_source
3543            .map(fs::Permissions::mode)
3544            .unwrap_or_else(|| current.mode());
3545        let mode = sley_config::read_repo_config(git_dir, None)
3546            .ok()
3547            .and_then(|config| {
3548                config
3549                    .get("core", None, "sharedRepository")
3550                    .and_then(|value| shared_repository_file_mode(value, source_mode))
3551            })
3552            .unwrap_or(source_mode & 0o7777);
3553        fs::set_permissions(path, fs::Permissions::from_mode(mode))?;
3554    }
3555    #[cfg(not(unix))]
3556    {
3557        let _ = git_dir;
3558        let _ = path;
3559        let _ = mode_source;
3560    }
3561    Ok(())
3562}
3563
3564#[cfg(unix)]
3565fn shared_repository_file_mode(value: &str, source_mode: u32) -> Option<u32> {
3566    match value {
3567        "umask" | "false" | "no" | "off" | "0" => None,
3568        "group" | "true" | "yes" | "on" | "1" => Some((source_mode | 0o660) & 0o7777),
3569        "all" | "world" | "everybody" | "2" | "3" => Some((source_mode | 0o664) & 0o7777),
3570        value => {
3571            let parsed = u32::from_str_radix(value, 8).ok()?;
3572            (parsed & 0o600 == 0o600).then_some(parsed & 0o666)
3573        }
3574    }
3575}
3576
3577fn read_shared_index_for_link(
3578    git_dir: &Path,
3579    index_path: &Path,
3580    format: ObjectFormat,
3581    link: &SplitIndexLink,
3582) -> Result<Option<Index>> {
3583    let name = format!("sharedindex.{}", link.base_oid);
3584    let bytes = match fs::read(git_dir.join(&name)) {
3585        Ok(bytes) => bytes,
3586        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
3587            let alternate = index_path
3588                .parent()
3589                .unwrap_or_else(|| Path::new("."))
3590                .join(&name);
3591            match fs::read(alternate) {
3592                Ok(bytes) => bytes,
3593                Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
3594                Err(err) => return Err(err.into()),
3595            }
3596        }
3597        Err(err) => return Err(err.into()),
3598    };
3599    let base = Index::parse(&bytes, format)?;
3600    if base.checksum != Some(link.base_oid) {
3601        return Ok(None);
3602    }
3603    Ok(Some(base))
3604}
3605
3606fn split_index_delta_exceeds_threshold(git_dir: &Path, index: &Index, base: &Index) -> bool {
3607    let max_percent = sley_config::read_repo_config(git_dir, None)
3608        .ok()
3609        .and_then(|config| {
3610            config
3611                .get("splitIndex", None, "maxPercentChange")
3612                .and_then(|value| value.parse::<i64>().ok())
3613        })
3614        .unwrap_or(20);
3615    match max_percent {
3616        0 => return true,
3617        100.. => return false,
3618        value if value < 0 => {}
3619        _ => {}
3620    }
3621    let not_shared = count_entries_not_shared_with_base(index, base);
3622    (index.entries.len() as i64) * max_percent < (not_shared as i64) * 100
3623}
3624
3625fn count_entries_not_shared_with_base(index: &Index, base: &Index) -> usize {
3626    index
3627        .entries
3628        .iter()
3629        .filter(|entry| {
3630            base.entries
3631                .binary_search_by(|base_entry| compare_index_key(base_entry, entry))
3632                .is_err()
3633        })
3634        .count()
3635}
3636
3637fn split_index_delta_entries(
3638    index: &Index,
3639    base: &Index,
3640    previous_link: &SplitIndexLink,
3641) -> Result<(Vec<IndexEntry>, SplitIndexLink)> {
3642    let mut delete_positions = Vec::new();
3643    let mut replace_positions = Vec::new();
3644    let mut replacements = Vec::new();
3645    let mut additions = Vec::new();
3646    let mut base_pos = 0usize;
3647    let mut index_pos = 0usize;
3648    while base_pos < base.entries.len() && index_pos < index.entries.len() {
3649        match compare_index_key(&base.entries[base_pos], &index.entries[index_pos]) {
3650            Ordering::Equal => {
3651                if previous_link
3652                    .delete_positions
3653                    .binary_search(&(base_pos as u32))
3654                    .is_ok()
3655                {
3656                    delete_positions.push(base_pos as u32);
3657                    additions.push(index.entries[index_pos].clone());
3658                } else if !index_entry_content_eq(&base.entries[base_pos], &index.entries[index_pos])
3659                {
3660                    replace_positions.push(base_pos as u32);
3661                    let mut replacement = index.entries[index_pos].clone();
3662                    replacement.path = BString::from(Vec::<u8>::new());
3663                    replacement.refresh_name_length();
3664                    replacements.push(replacement);
3665                }
3666                base_pos += 1;
3667                index_pos += 1;
3668            }
3669            Ordering::Less => {
3670                delete_positions.push(base_pos as u32);
3671                base_pos += 1;
3672            }
3673            Ordering::Greater => {
3674                additions.push(index.entries[index_pos].clone());
3675                index_pos += 1;
3676            }
3677        }
3678    }
3679    while base_pos < base.entries.len() {
3680        delete_positions.push(base_pos as u32);
3681        base_pos += 1;
3682    }
3683    while index_pos < index.entries.len() {
3684        additions.push(index.entries[index_pos].clone());
3685        index_pos += 1;
3686    }
3687    replacements.extend(additions);
3688    Ok((
3689        replacements,
3690        SplitIndexLink {
3691            base_oid: previous_link.base_oid,
3692            delete_positions,
3693            replace_positions,
3694        },
3695    ))
3696}
3697
3698fn compare_index_key(left: &IndexEntry, right: &IndexEntry) -> Ordering {
3699    left.path
3700        .as_bytes()
3701        .cmp(right.path.as_bytes())
3702        .then_with(|| left.stage().as_u16().cmp(&right.stage().as_u16()))
3703}
3704
3705fn index_entry_content_eq(left: &IndexEntry, right: &IndexEntry) -> bool {
3706    const ONDISK_FLAGS: u16 = sley_index::INDEX_FLAG_STAGE_MASK
3707        | sley_index::INDEX_FLAG_VALID
3708        | sley_index::INDEX_FLAG_EXTENDED;
3709    left.ctime_seconds == right.ctime_seconds
3710        && left.ctime_nanoseconds == right.ctime_nanoseconds
3711        && left.mtime_seconds == right.mtime_seconds
3712        && left.mtime_nanoseconds == right.mtime_nanoseconds
3713        && left.dev == right.dev
3714        && left.ino == right.ino
3715        && left.mode == right.mode
3716        && left.uid == right.uid
3717        && left.gid == right.gid
3718        && left.size == right.size
3719        && left.oid == right.oid
3720        && (left.flags & ONDISK_FLAGS) == (right.flags & ONDISK_FLAGS)
3721        && left.flags_extended == right.flags_extended
3722}
3723
3724fn write_index_with_entry_size_overrides(
3725    format: ObjectFormat,
3726    index: &Index,
3727    zero_size_entries: &[usize],
3728    extensions: &[u8],
3729) -> Result<Vec<u8>> {
3730    if !(2..=4).contains(&index.version) {
3731        return Err(GitError::Unsupported(
3732            "canonical writer currently emits index v2/v3/v4".into(),
3733        ));
3734    }
3735    let mut out = Vec::new();
3736    out.extend_from_slice(b"DIRC");
3737    out.extend_from_slice(&index.version.to_be_bytes());
3738    out.extend_from_slice(&(index.entries.len() as u32).to_be_bytes());
3739    let mut previous_path = Vec::new();
3740    for (position, entry) in index.entries.iter().enumerate() {
3741        let start = out.len();
3742        out.extend_from_slice(&entry.ctime_seconds.to_be_bytes());
3743        out.extend_from_slice(&entry.ctime_nanoseconds.to_be_bytes());
3744        out.extend_from_slice(&entry.mtime_seconds.to_be_bytes());
3745        out.extend_from_slice(&entry.mtime_nanoseconds.to_be_bytes());
3746        out.extend_from_slice(&entry.dev.to_be_bytes());
3747        out.extend_from_slice(&entry.ino.to_be_bytes());
3748        out.extend_from_slice(&entry.mode.to_be_bytes());
3749        out.extend_from_slice(&entry.uid.to_be_bytes());
3750        out.extend_from_slice(&entry.gid.to_be_bytes());
3751        let size = if zero_size_entries.binary_search(&position).is_ok() {
3752            0
3753        } else {
3754            entry.size
3755        };
3756        out.extend_from_slice(&size.to_be_bytes());
3757        if entry.oid.format() != format {
3758            return Err(GitError::Unsupported(format!(
3759                "index writer expects {} ids",
3760                format.name()
3761            )));
3762        }
3763        out.extend_from_slice(entry.oid.as_bytes());
3764        let has_extended_flags =
3765            entry.flags & INDEX_FLAG_EXTENDED != 0 || entry.flags_extended != 0;
3766        if has_extended_flags && index.version < 3 {
3767            return Err(GitError::Unsupported(
3768                "index extended flags require version 3".into(),
3769            ));
3770        }
3771        let flags = if has_extended_flags {
3772            entry.flags | INDEX_FLAG_EXTENDED
3773        } else {
3774            entry.flags & !INDEX_FLAG_EXTENDED
3775        };
3776        out.extend_from_slice(&flags.to_be_bytes());
3777        if has_extended_flags {
3778            out.extend_from_slice(&entry.flags_extended.to_be_bytes());
3779        }
3780        if index.version == 4 {
3781            let common_prefix_len = common_prefix_len(&previous_path, entry.path.as_bytes());
3782            let strip_len = previous_path.len() - common_prefix_len;
3783            encode_index_v4_path_strip_len(strip_len, &mut out);
3784            out.extend_from_slice(&entry.path.as_bytes()[common_prefix_len..]);
3785            out.push(0);
3786            previous_path = entry.path.as_bytes().to_vec();
3787        } else {
3788            out.extend_from_slice(entry.path.as_bytes());
3789            out.push(0);
3790            while (out.len() - start) % 8 != 0 {
3791                out.push(0);
3792            }
3793        }
3794    }
3795    out.extend_from_slice(extensions);
3796    let checksum = sley_core::digest_bytes(format, &out)?;
3797    out.extend_from_slice(checksum.as_bytes());
3798    Ok(out)
3799}
3800
3801fn encode_index_v4_path_strip_len(strip_len: usize, out: &mut Vec<u8>) {
3802    let mut bytes = Vec::new();
3803    bytes.push((strip_len & 0x7f) as u8);
3804    let mut value = strip_len >> 7;
3805    while value != 0 {
3806        value -= 1;
3807        bytes.push(((value & 0x7f) as u8) | 0x80);
3808        value >>= 7;
3809    }
3810    for byte in bytes.iter().rev() {
3811        out.push(*byte);
3812    }
3813}
3814
3815fn common_prefix_len(left: &[u8], right: &[u8]) -> usize {
3816    left.iter()
3817        .zip(right.iter())
3818        .take_while(|(left, right)| left == right)
3819        .count()
3820}
3821
3822fn index_checksum_from_bytes(format: ObjectFormat, bytes: &[u8]) -> Result<ObjectId> {
3823    let hash_len = format.raw_len();
3824    if bytes.len() < hash_len {
3825        return Err(GitError::InvalidFormat(
3826            "index too short for checksum".into(),
3827        ));
3828    }
3829    ObjectId::from_raw(format, &bytes[bytes.len() - hash_len..])
3830}
3831
3832pub fn enable_split_index(
3833    git_dir: impl AsRef<Path>,
3834    format: ObjectFormat,
3835) -> Result<UpdateIndexResult> {
3836    let git_dir = git_dir.as_ref();
3837    let mut index = read_repository_index(git_dir, format)?.unwrap_or_else(empty_index);
3838    normalize_index_version_for_extended_flags(&mut index);
3839    write_repository_index_ref_with_split(git_dir, format, &index, true)?;
3840    Ok(UpdateIndexResult {
3841        entries: index.entries.len(),
3842        updated: Vec::new(),
3843    })
3844}
3845
3846pub fn disable_split_index(
3847    git_dir: impl AsRef<Path>,
3848    format: ObjectFormat,
3849) -> Result<UpdateIndexResult> {
3850    let git_dir = git_dir.as_ref();
3851    if !repository_index_path(git_dir).exists() {
3852        return Ok(UpdateIndexResult {
3853            entries: 0,
3854            updated: Vec::new(),
3855        });
3856    }
3857    let mut index = read_repository_index(git_dir, format)?.unwrap_or_else(empty_index);
3858    normalize_index_version_for_extended_flags(&mut index);
3859    write_repository_index_ref_with_split(git_dir, format, &index, false)?;
3860    Ok(UpdateIndexResult {
3861        entries: index.entries.len(),
3862        updated: Vec::new(),
3863    })
3864}
3865
3866fn smudge_racily_clean_entries_before_write(
3867    git_dir: &Path,
3868    format: ObjectFormat,
3869    index: &mut Index,
3870) -> Result<()> {
3871    for position in racily_clean_entry_indexes_before_write(git_dir, format, index)? {
3872        index.entries[position].size = 0;
3873    }
3874    Ok(())
3875}
3876
3877fn racily_clean_entry_indexes_before_write(
3878    git_dir: &Path,
3879    format: ObjectFormat,
3880    index: &Index,
3881) -> Result<Vec<usize>> {
3882    let index_path = repository_index_path(git_dir);
3883    let Some(index_mtime) = fs::metadata(&index_path)
3884        .ok()
3885        .and_then(|metadata| sley_index::file_mtime_parts(&metadata))
3886    else {
3887        return Ok(Vec::new());
3888    };
3889    if index_mtime == (0, 0) {
3890        return Ok(Vec::new());
3891    }
3892    let Some(worktree_root) = (match worktree_root_for_git_dir(git_dir) {
3893        Ok(worktree_root) => worktree_root,
3894        Err(_) => return Ok(Vec::new()),
3895    }) else {
3896        return Ok(Vec::new());
3897    };
3898    let mut smudged = Vec::new();
3899    for (position, entry) in index.entries.iter().enumerate() {
3900        if index_entry_stage(entry) != 0 || sley_index::is_gitlink(entry.mode) {
3901            continue;
3902        }
3903        let entry_mtime = (
3904            u64::from(entry.mtime_seconds),
3905            u64::from(entry.mtime_nanoseconds),
3906        );
3907        if entry_mtime == (0, 0) || index_mtime > entry_mtime {
3908            continue;
3909        }
3910        let absolute = worktree_root.join(repo_path_to_os_path(entry.path.as_bytes())?);
3911        let Ok(metadata) = fs::symlink_metadata(&absolute) else {
3912            continue;
3913        };
3914        if entry.mode != worktree_entry_mode(&metadata)
3915            || !worktree_entry_is_uptodate(entry, &metadata)
3916        {
3917            continue;
3918        }
3919        let body = if metadata.file_type().is_symlink() {
3920            symlink_target_bytes(&absolute)?
3921        } else if metadata.is_file() {
3922            fs::read(&absolute)?
3923        } else {
3924            continue;
3925        };
3926        let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
3927        if oid != entry.oid {
3928            smudged.push(position);
3929        }
3930    }
3931    Ok(smudged)
3932}
3933
3934fn invalidate_untracked_cache_for_git_paths(
3935    index: &mut Index,
3936    format: ObjectFormat,
3937    paths: &[Vec<u8>],
3938) -> Result<()> {
3939    if paths.is_empty() {
3940        return Ok(());
3941    }
3942    let Some(mut cache) = index.untracked_cache(format)? else {
3943        return Ok(());
3944    };
3945    let Some(root) = cache.root.as_mut() else {
3946        return Ok(());
3947    };
3948    for path in paths {
3949        invalidate_untracked_cache_dir_for_path(root, path);
3950    }
3951    index.set_untracked_cache(format, Some(&cache))
3952}
3953
3954fn invalidate_untracked_cache_dir_for_path(root: &mut UntrackedCacheDir, path: &[u8]) {
3955    invalidate_untracked_cache_node(root);
3956    let mut current = root;
3957    let mut components = path.split(|byte| *byte == b'/').peekable();
3958    while let Some(component) = components.next() {
3959        if component.is_empty() || components.peek().is_none() {
3960            break;
3961        }
3962        let Some(child) = current.dirs.iter_mut().find(|dir| dir.name == component) else {
3963            break;
3964        };
3965        invalidate_untracked_cache_node(child);
3966        current = child;
3967    }
3968}
3969
3970fn invalidate_untracked_cache_node(node: &mut UntrackedCacheDir) {
3971    node.valid = false;
3972    node.untracked.clear();
3973}
3974
3975pub fn update_index_cacheinfo(
3976    git_dir: impl AsRef<Path>,
3977    format: ObjectFormat,
3978    entries: &[CacheInfoEntry],
3979    add: bool,
3980    verbose: bool,
3981) -> Result<UpdateIndexResult> {
3982    let git_dir = git_dir.as_ref();
3983    let index_path = repository_index_path(git_dir);
3984    let mut index = if index_path.exists() {
3985        Index::parse(&fs::read(&index_path)?, format)?
3986    } else {
3987        Index {
3988            version: 2,
3989            entries: Vec::new(),
3990            extensions: Vec::new(),
3991            checksum: None,
3992        }
3993    };
3994    let mut updated = Vec::new();
3995    let mut reports: Vec<String> = Vec::new();
3996    let mut untracked_cache_invalidation_paths = Vec::new();
3997    for cacheinfo in entries {
3998        if !add
3999            && !index
4000                .entries
4001                .iter()
4002                .any(|existing| existing.path == cacheinfo.path)
4003        {
4004            let path = String::from_utf8_lossy(&cacheinfo.path);
4005            eprintln!("error: {path}: cannot add to the index - missing --add option?");
4006            eprintln!("fatal: git update-index: --cacheinfo cannot add {path}");
4007            return Err(GitError::Exit(128));
4008        }
4009        let flags = index_flags(cacheinfo.path.len(), cacheinfo.stage);
4010        let entry = IndexEntry {
4011            ctime_seconds: 0,
4012            ctime_nanoseconds: 0,
4013            mtime_seconds: 0,
4014            mtime_nanoseconds: 0,
4015            dev: 0,
4016            ino: 0,
4017            mode: cacheinfo.mode,
4018            uid: 0,
4019            gid: 0,
4020            size: 0,
4021            oid: cacheinfo.oid,
4022            flags,
4023            flags_extended: 0,
4024            path: BString::from(cacheinfo.path.as_slice()),
4025        };
4026        index.entries.retain(|existing| {
4027            existing.path != cacheinfo.path || index_entry_stage(existing) != cacheinfo.stage
4028        });
4029        index.entries.push(entry);
4030        untracked_cache_invalidation_paths.push(cacheinfo.path.clone());
4031        updated.push(cacheinfo.oid);
4032        // git's add_cacheinfo() calls report("add '%s'") *after* the entry is
4033        // staged, regardless of whether the subsequent index write succeeds.
4034        reports.push(format!(
4035            "add '{}'",
4036            String::from_utf8_lossy(&cacheinfo.path)
4037        ));
4038    }
4039    index
4040        .entries
4041        .sort_by(|left, right| left.path.cmp(&right.path));
4042    // git refuses to write an index entry whose object id is the null oid:
4043    // do_write_index() emits `error: cache entry has null sha1: <path>` and
4044    // returns nonzero, leaving the on-disk index untouched. The verbose `add`
4045    // line has already been printed by then.
4046    let null_entry = index.entries.iter().find(|entry| entry.oid.is_null());
4047    if let Some(entry) = null_entry {
4048        if verbose {
4049            flush_update_index_reports(&reports)?;
4050        }
4051        eprintln!(
4052            "error: cache entry has null sha1: {}",
4053            String::from_utf8_lossy(&entry.path)
4054        );
4055        return Err(GitError::Exit(128));
4056    }
4057    invalidate_untracked_cache_for_git_paths(
4058        &mut index,
4059        format,
4060        &untracked_cache_invalidation_paths,
4061    )?;
4062    write_repository_index_ref(git_dir, format, &index)?;
4063    if verbose {
4064        flush_update_index_reports(&reports)?;
4065    }
4066    Ok(UpdateIndexResult {
4067        entries: index.entries.len(),
4068        updated,
4069    })
4070}
4071
4072fn flush_update_index_reports(reports: &[String]) -> Result<()> {
4073    let mut stdout = std::io::stdout().lock();
4074    for line in reports {
4075        writeln!(stdout, "{line}")?;
4076    }
4077    stdout.flush()?;
4078    Ok(())
4079}
4080
4081pub fn update_index_index_info(
4082    git_dir: impl AsRef<Path>,
4083    format: ObjectFormat,
4084    records: &[IndexInfoRecord],
4085) -> Result<UpdateIndexResult> {
4086    let git_dir = git_dir.as_ref();
4087    let index_path = repository_index_path(git_dir);
4088    let mut index = if index_path.exists() {
4089        Index::parse(&fs::read(&index_path)?, format)?
4090    } else {
4091        Index {
4092            version: 2,
4093            entries: Vec::new(),
4094            extensions: Vec::new(),
4095            checksum: None,
4096        }
4097    };
4098    let mut updated = Vec::new();
4099    let mut untracked_cache_invalidation_paths = Vec::new();
4100    for record in records {
4101        match record {
4102            IndexInfoRecord::Remove { path } => {
4103                index.entries.retain(|existing| existing.path != *path);
4104                untracked_cache_invalidation_paths.push(path.clone());
4105            }
4106            IndexInfoRecord::Add(cacheinfo) => {
4107                let flags = index_flags(cacheinfo.path.len(), cacheinfo.stage);
4108                let entry = IndexEntry {
4109                    ctime_seconds: 0,
4110                    ctime_nanoseconds: 0,
4111                    mtime_seconds: 0,
4112                    mtime_nanoseconds: 0,
4113                    dev: 0,
4114                    ino: 0,
4115                    mode: cacheinfo.mode,
4116                    uid: 0,
4117                    gid: 0,
4118                    size: 0,
4119                    oid: cacheinfo.oid,
4120                    flags,
4121                    flags_extended: 0,
4122                    path: BString::from(cacheinfo.path.as_slice()),
4123                };
4124                if cacheinfo.stage == 0 {
4125                    index
4126                        .entries
4127                        .retain(|existing| existing.path != cacheinfo.path);
4128                } else {
4129                    index.entries.retain(|existing| {
4130                        existing.path != cacheinfo.path
4131                            || index_entry_stage(existing) != cacheinfo.stage
4132                    });
4133                }
4134                index.entries.push(entry);
4135                untracked_cache_invalidation_paths.push(cacheinfo.path.clone());
4136                updated.push(cacheinfo.oid);
4137            }
4138        }
4139    }
4140    index.entries.sort_by(|left, right| {
4141        left.path
4142            .cmp(&right.path)
4143            .then_with(|| index_entry_stage(left).cmp(&index_entry_stage(right)))
4144    });
4145    invalidate_untracked_cache_for_git_paths(
4146        &mut index,
4147        format,
4148        &untracked_cache_invalidation_paths,
4149    )?;
4150    write_repository_index_ref(git_dir, format, &index)?;
4151    Ok(UpdateIndexResult {
4152        entries: index.entries.len(),
4153        updated,
4154    })
4155}
4156
4157fn index_flags(path_len: usize, stage: u16) -> u16 {
4158    ((stage & 0x3) << 12) | ((path_len.min(0xfff) as u16) & 0x0fff)
4159}
4160
4161const INDEX_FLAG_ASSUME_UNCHANGED: u16 = 0x8000;
4162const INDEX_FLAG_EXTENDED: u16 = 0x4000;
4163const INDEX_EXTENDED_FLAG_SKIP_WORKTREE: u16 = 0x4000;
4164
4165fn normalize_index_version_for_extended_flags(index: &mut Index) {
4166    let has_extended_flags = index
4167        .entries
4168        .iter()
4169        .any(|entry| entry.flags & INDEX_FLAG_EXTENDED != 0 || entry.flags_extended != 0);
4170    if has_extended_flags && index.version < 3 {
4171        index.version = 3;
4172    } else if !has_extended_flags && index.version == 3 {
4173        index.version = 2;
4174    }
4175}
4176
4177fn index_entry_stage(entry: &IndexEntry) -> u16 {
4178    (entry.flags >> 12) & 0x3
4179}
4180
4181/// The oid of the stage-0 entry in `range` (the path's currently-tracked blob),
4182/// if any. Used by the safecrlf check to fetch `has_crlf_in_index`.
4183fn stage0_oid_in_range(entries: &[IndexEntry], range: std::ops::Range<usize>) -> Option<ObjectId> {
4184    entries[range]
4185        .iter()
4186        .find(|entry| index_entry_stage(entry) == 0)
4187        .map(|entry| entry.oid)
4188}
4189
4190fn index_entry_skip_worktree(entry: &IndexEntry) -> bool {
4191    entry.flags & INDEX_FLAG_EXTENDED != 0
4192        && entry.flags_extended & INDEX_EXTENDED_FLAG_SKIP_WORKTREE != 0
4193}
4194
4195fn print_update_index_path_error(path: &[u8], message: &str) {
4196    let path = String::from_utf8_lossy(path);
4197    eprintln!("error: {path}: {message}");
4198    eprintln!("fatal: Unable to process path {path}");
4199}
4200
4201fn print_update_index_needs_update(path: &[u8]) {
4202    let path = String::from_utf8_lossy(path);
4203    println!("{path}: needs update");
4204}
4205
4206pub fn write_tree_from_index(git_dir: impl AsRef<Path>, format: ObjectFormat) -> Result<ObjectId> {
4207    write_tree_from_index_with_options(git_dir, format, WriteTreeOptions::default())
4208}
4209
4210pub fn write_tree_from_index_with_odb(
4211    git_dir: impl AsRef<Path>,
4212    format: ObjectFormat,
4213    odb: &FileObjectDatabase,
4214) -> Result<ObjectId> {
4215    write_tree_from_index_with_options_and_odb(
4216        git_dir.as_ref(),
4217        format,
4218        WriteTreeOptions::default(),
4219        odb,
4220    )
4221}
4222
4223pub fn write_tree_from_index_with_options(
4224    git_dir: impl AsRef<Path>,
4225    format: ObjectFormat,
4226    options: WriteTreeOptions,
4227) -> Result<ObjectId> {
4228    let git_dir = git_dir.as_ref();
4229    let odb = FileObjectDatabase::from_git_dir(git_dir, format);
4230    write_tree_from_index_with_options_and_odb(git_dir, format, options, &odb)
4231}
4232
4233fn write_tree_from_index_with_options_and_odb(
4234    git_dir: &Path,
4235    format: ObjectFormat,
4236    options: WriteTreeOptions,
4237    odb: &FileObjectDatabase,
4238) -> Result<ObjectId> {
4239    let index_path = repository_index_path(git_dir);
4240    // A repository with no index file yet (fresh init, nothing staged) is an
4241    // empty index: `git write-tree` / `git commit --allow-empty` produce the
4242    // empty tree rather than erroring.
4243    let index_bytes = match fs::read(&index_path) {
4244        Ok(bytes) => bytes,
4245        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
4246            let mut checker = odb.presence_checker();
4247            let empty: &[WriteTreeEntry<'_>] = &[];
4248            return write_tree_entries_stream(
4249                empty,
4250                b"",
4251                None,
4252                odb,
4253                &mut checker,
4254                options.missing_ok,
4255            );
4256        }
4257        Err(err) => return Err(err.into()),
4258    };
4259    let mut checker = odb.presence_checker();
4260    if Index::bytes_have_extension(&index_bytes, format, b"link")? {
4261        let index = sley_index::read_repository_index(git_dir, format)?;
4262        return write_tree_from_owned_index(&index, format, &options, odb, &mut checker);
4263    }
4264    match BorrowedIndex::parse(&index_bytes, format) {
4265        Ok(index) => write_tree_from_borrowed_index(&index, format, &options, odb, &mut checker),
4266        Err(GitError::Unsupported(_)) => {
4267            let index = Index::parse(&index_bytes, format)?;
4268            write_tree_from_owned_index(&index, format, &options, odb, &mut checker)
4269        }
4270        Err(err) => Err(err),
4271    }
4272}
4273
4274fn write_tree_from_borrowed_index(
4275    index: &BorrowedIndex<'_>,
4276    format: ObjectFormat,
4277    options: &WriteTreeOptions,
4278    odb: &FileObjectDatabase,
4279    checker: &mut ObjectPresenceChecker,
4280) -> Result<ObjectId> {
4281    let cache_tree = if options.prefix.is_none() {
4282        index.cache_tree(format).ok().flatten()
4283    } else {
4284        None
4285    };
4286    if options.prefix.is_none() && !index.entries.iter().any(|entry| entry.is_intent_to_add()) {
4287        return write_tree_entries_stream(
4288            &index.entries,
4289            b"",
4290            cache_tree.as_ref(),
4291            odb,
4292            checker,
4293            options.missing_ok,
4294        );
4295    }
4296    // intent-to-add entries (`git add -N`, `git reset -N`) are placeholders that do
4297    // NOT belong in a written tree — git's cache_tree_update skips CE_INTENT_TO_ADD.
4298    // Drop them before building, so `write-tree` succeeds and the tree omits them
4299    // (their empty-blob oid is also typically absent from the odb).
4300    let entries = write_tree_entries_for_prefix(
4301        index
4302            .entries
4303            .iter()
4304            .filter(|entry| !entry.is_intent_to_add()),
4305        options.prefix.as_deref(),
4306    )?;
4307    write_tree_entries_stream(
4308        &entries,
4309        b"",
4310        cache_tree.as_ref(),
4311        odb,
4312        checker,
4313        options.missing_ok,
4314    )
4315}
4316
4317fn write_tree_from_owned_index(
4318    index: &Index,
4319    format: ObjectFormat,
4320    options: &WriteTreeOptions,
4321    odb: &FileObjectDatabase,
4322    checker: &mut ObjectPresenceChecker,
4323) -> Result<ObjectId> {
4324    let cache_tree = if options.prefix.is_none() {
4325        index.cache_tree(format).ok().flatten()
4326    } else {
4327        None
4328    };
4329    if options.prefix.is_none() && !index.entries.iter().any(|entry| entry.is_intent_to_add()) {
4330        return write_tree_entries_stream(
4331            &index.entries,
4332            b"",
4333            cache_tree.as_ref(),
4334            odb,
4335            checker,
4336            options.missing_ok,
4337        );
4338    }
4339    let entries = write_tree_entries_for_prefix(
4340        index
4341            .entries
4342            .iter()
4343            .filter(|entry| !entry.is_intent_to_add()),
4344        options.prefix.as_deref(),
4345    )?;
4346    write_tree_entries_stream(
4347        &entries,
4348        b"",
4349        cache_tree.as_ref(),
4350        odb,
4351        checker,
4352        options.missing_ok,
4353    )
4354}
4355
4356#[derive(Clone, Copy)]
4357struct WriteTreeEntry<'a> {
4358    path: &'a [u8],
4359    mode: u32,
4360    oid: ObjectId,
4361}
4362
4363trait WriteTreeIndexEntry {
4364    fn write_tree_path(&self) -> &[u8];
4365    fn write_tree_mode(&self) -> u32;
4366    fn write_tree_oid(&self) -> ObjectId;
4367}
4368
4369impl WriteTreeIndexEntry for IndexEntry {
4370    fn write_tree_path(&self) -> &[u8] {
4371        self.path.as_bytes()
4372    }
4373
4374    fn write_tree_mode(&self) -> u32 {
4375        self.mode
4376    }
4377
4378    fn write_tree_oid(&self) -> ObjectId {
4379        self.oid
4380    }
4381}
4382
4383impl WriteTreeIndexEntry for IndexEntryRef<'_> {
4384    fn write_tree_path(&self) -> &[u8] {
4385        self.path
4386    }
4387
4388    fn write_tree_mode(&self) -> u32 {
4389        self.mode
4390    }
4391
4392    fn write_tree_oid(&self) -> ObjectId {
4393        self.oid
4394    }
4395}
4396
4397impl WriteTreeIndexEntry for WriteTreeEntry<'_> {
4398    fn write_tree_path(&self) -> &[u8] {
4399        self.path
4400    }
4401
4402    fn write_tree_mode(&self) -> u32 {
4403        self.mode
4404    }
4405
4406    fn write_tree_oid(&self) -> ObjectId {
4407        self.oid
4408    }
4409}
4410
4411fn write_tree_entries_for_prefix<'a, E>(
4412    entries: impl IntoIterator<Item = &'a E>,
4413    prefix: Option<&[u8]>,
4414) -> Result<Vec<WriteTreeEntry<'a>>>
4415where
4416    E: WriteTreeIndexEntry + 'a,
4417{
4418    let Some(prefix) = prefix else {
4419        return Ok(entries
4420            .into_iter()
4421            .map(|entry| WriteTreeEntry {
4422                path: entry.write_tree_path(),
4423                mode: entry.write_tree_mode(),
4424                oid: entry.write_tree_oid(),
4425            })
4426            .collect());
4427    };
4428    let trimmed_len = prefix
4429        .iter()
4430        .rposition(|byte| *byte != b'/')
4431        .map(|idx| idx + 1)
4432        .unwrap_or(0);
4433    let trimmed = &prefix[..trimmed_len];
4434    if trimmed.is_empty() {
4435        return Ok(entries
4436            .into_iter()
4437            .map(|entry| WriteTreeEntry {
4438                path: entry.write_tree_path(),
4439                mode: entry.write_tree_mode(),
4440                oid: entry.write_tree_oid(),
4441            })
4442            .collect());
4443    }
4444    let mut prefixed = Vec::new();
4445    for entry in entries {
4446        let Some(remainder) = entry.write_tree_path().strip_prefix(trimmed) else {
4447            continue;
4448        };
4449        let Some(stripped) = remainder.strip_prefix(b"/") else {
4450            continue;
4451        };
4452        if stripped.is_empty() {
4453            continue;
4454        }
4455        prefixed.push(WriteTreeEntry {
4456            path: stripped,
4457            mode: entry.write_tree_mode(),
4458            oid: entry.write_tree_oid(),
4459        });
4460    }
4461    if prefixed.is_empty() {
4462        eprintln!(
4463            "fatal: git-write-tree: prefix {} not found",
4464            String::from_utf8_lossy(prefix)
4465        );
4466        return Err(GitError::Exit(128));
4467    }
4468    Ok(prefixed)
4469}
4470
4471fn write_tree_entries_stream<E>(
4472    entries: &[E],
4473    prefix: &[u8],
4474    cache_tree: Option<&CacheTree>,
4475    odb: &FileObjectDatabase,
4476    checker: &mut ObjectPresenceChecker,
4477    missing_ok: bool,
4478) -> Result<ObjectId>
4479where
4480    E: WriteTreeIndexEntry,
4481{
4482    if let Some(oid) = valid_cache_tree_oid(cache_tree, entries.len()) {
4483        return Ok(oid);
4484    }
4485
4486    let mut tree_entries = Vec::new();
4487    let mut index = 0usize;
4488    while index < entries.len() {
4489        let entry = &entries[index];
4490        let path = entry.write_tree_path();
4491        let Some(remainder) = path.strip_prefix(prefix) else {
4492            return Err(GitError::InvalidPath(format!(
4493                "invalid index path {}",
4494                String::from_utf8_lossy(path)
4495            )));
4496        };
4497        if remainder.is_empty() || remainder[0] == b'/' {
4498            return Err(GitError::InvalidPath(format!(
4499                "invalid index path {}",
4500                String::from_utf8_lossy(path)
4501            )));
4502        }
4503
4504        if entry.write_tree_mode() == SPARSE_DIR_MODE
4505            && let Some(name) = remainder.strip_suffix(b"/")
4506            && !name.is_empty()
4507            && !name.contains(&b'/')
4508        {
4509            let oid = entry.write_tree_oid();
4510            if !missing_ok && !checker.contains(&oid)? {
4511                eprintln!(
4512                    "error: invalid object {:o} {} for '{}'",
4513                    SPARSE_DIR_MODE,
4514                    oid,
4515                    String::from_utf8_lossy(path)
4516                );
4517                eprintln!("fatal: git-write-tree: error building trees");
4518                return Err(GitError::Exit(128));
4519            }
4520            tree_entries.push(TreeEntry {
4521                mode: SPARSE_DIR_MODE,
4522                name: BString::from(name),
4523                oid,
4524            });
4525            index += 1;
4526            continue;
4527        }
4528
4529        if let Some(slash) = remainder.iter().position(|byte| *byte == b'/') {
4530            let name = &remainder[..slash];
4531            if name.is_empty() {
4532                return Err(GitError::InvalidPath(format!(
4533                    "invalid index path {}",
4534                    String::from_utf8_lossy(path)
4535                )));
4536            }
4537            let start = index;
4538            let child_cache = cache_tree.and_then(|tree| {
4539                tree.subtrees
4540                    .iter()
4541                    .find(|child| child.name.as_slice() == name)
4542                    .map(|child| &child.tree)
4543            });
4544            if let Some(cached_count) = valid_cache_tree_entry_count(child_cache) {
4545                let end = start.saturating_add(cached_count);
4546                if cached_count > 0
4547                    && end <= entries.len()
4548                    && same_tree_component(entries[end - 1].write_tree_path(), prefix, name)?
4549                    && (end == entries.len()
4550                        || !same_tree_component(entries[end].write_tree_path(), prefix, name)?)
4551                {
4552                    index = end;
4553                } else {
4554                    index += 1;
4555                    while index < entries.len()
4556                        && same_tree_component(entries[index].write_tree_path(), prefix, name)?
4557                    {
4558                        index += 1;
4559                    }
4560                }
4561            } else {
4562                index += 1;
4563                while index < entries.len()
4564                    && same_tree_component(entries[index].write_tree_path(), prefix, name)?
4565                {
4566                    index += 1;
4567                }
4568            }
4569            if let Some(oid) = valid_cache_tree_oid(child_cache, index - start) {
4570                tree_entries.push(TreeEntry {
4571                    mode: 0o040000,
4572                    name: BString::from(name),
4573                    oid,
4574                });
4575                continue;
4576            }
4577            let mut child_prefix = Vec::with_capacity(prefix.len() + name.len() + 1);
4578            child_prefix.extend_from_slice(prefix);
4579            child_prefix.extend_from_slice(name);
4580            child_prefix.push(b'/');
4581            let oid = write_tree_entries_stream(
4582                &entries[start..index],
4583                &child_prefix,
4584                child_cache,
4585                odb,
4586                checker,
4587                missing_ok,
4588            )?;
4589            tree_entries.push(TreeEntry {
4590                mode: 0o040000,
4591                name: BString::from(name),
4592                oid,
4593            });
4594            continue;
4595        }
4596
4597        let mode = entry.write_tree_mode();
4598        let oid = entry.write_tree_oid();
4599        if !missing_ok && !sley_index::is_gitlink(mode) && !checker.contains(&oid)? {
4600            eprintln!(
4601                "error: invalid object {:o} {} for '{}'",
4602                mode,
4603                oid,
4604                String::from_utf8_lossy(path)
4605            );
4606            eprintln!("fatal: git-write-tree: error building trees");
4607            return Err(GitError::Exit(128));
4608        }
4609        tree_entries.push(TreeEntry {
4610            mode,
4611            name: BString::from(remainder),
4612            oid,
4613        });
4614        index += 1;
4615    }
4616
4617    tree_entries.sort_by(|left, right| {
4618        git_tree_entry_cmp(
4619            left.name.as_bytes(),
4620            left.mode,
4621            right.name.as_bytes(),
4622            right.mode,
4623        )
4624    });
4625    odb.write_object(EncodedObject::new(
4626        ObjectType::Tree,
4627        Tree {
4628            entries: tree_entries,
4629        }
4630        .write(),
4631    ))
4632}
4633
4634fn valid_cache_tree_oid(tree: Option<&CacheTree>, entry_count: usize) -> Option<ObjectId> {
4635    let tree = tree?;
4636    if valid_cache_tree_entry_count(Some(tree))? != entry_count {
4637        return None;
4638    }
4639    tree.oid
4640}
4641
4642fn valid_cache_tree_entry_count(tree: Option<&CacheTree>) -> Option<usize> {
4643    let tree = tree?;
4644    if tree.entry_count < 0 || tree.oid.is_none() {
4645        return None;
4646    }
4647    Some(tree.entry_count as usize)
4648}
4649
4650fn same_tree_component(path: &[u8], prefix: &[u8], name: &[u8]) -> Result<bool> {
4651    let Some(remainder) = path.strip_prefix(prefix) else {
4652        return Err(GitError::InvalidPath(format!(
4653            "invalid index path {}",
4654            String::from_utf8_lossy(path)
4655        )));
4656    };
4657    Ok(remainder.starts_with(name) && remainder.get(name.len()) == Some(&b'/'))
4658}
4659
4660pub fn stream_short_status<F>(
4661    worktree_root: impl AsRef<Path>,
4662    git_dir: impl AsRef<Path>,
4663    format: ObjectFormat,
4664    emit: F,
4665) -> Result<()>
4666where
4667    F: for<'a> FnMut(ShortStatusRow<'a>) -> Result<StreamControl>,
4668{
4669    stream_short_status_with_options(
4670        worktree_root,
4671        git_dir,
4672        format,
4673        ShortStatusOptions::default(),
4674        emit,
4675    )
4676}
4677
4678pub fn short_status_count(
4679    worktree_root: impl AsRef<Path>,
4680    git_dir: impl AsRef<Path>,
4681    format: ObjectFormat,
4682) -> Result<usize> {
4683    short_status_count_with_options(
4684        worktree_root,
4685        git_dir,
4686        format,
4687        ShortStatusOptions::default(),
4688    )
4689}
4690
4691pub fn short_status_count_with_options(
4692    worktree_root: impl AsRef<Path>,
4693    git_dir: impl AsRef<Path>,
4694    format: ObjectFormat,
4695    options: ShortStatusOptions,
4696) -> Result<usize> {
4697    let worktree_root = worktree_root.as_ref();
4698    let git_dir = git_dir.as_ref();
4699    let db = FileObjectDatabase::from_git_dir(git_dir, format);
4700    if !options.include_ignored
4701        && let Some(count) = short_status_borrowed_head_matches_index_count_if_possible(
4702            worktree_root,
4703            git_dir,
4704            format,
4705            &db,
4706            options.untracked_mode,
4707        )?
4708    {
4709        return Ok(count);
4710    }
4711    let mut count = 0usize;
4712    stream_short_status_with_options(worktree_root, git_dir, format, options, |_| {
4713        count += 1;
4714        Ok(StreamControl::Continue)
4715    })?;
4716    Ok(count)
4717}
4718
4719#[derive(Debug, Clone, Default)]
4720struct StatusProfileCounters {
4721    fast_path_borrowed: bool,
4722    read_dir_calls: u64,
4723    dir_entries_seen: u64,
4724    file_type_calls: u64,
4725    ignore_checks: u64,
4726    ignore_pattern_tests: u64,
4727    ignore_glob_fallback_tests: u64,
4728    tracked_exact_hits: u64,
4729    tracked_dir_prefix_hits: u64,
4730    tracked_skip_worktree_prefix_hits: u64,
4731    read_dir_entry_vec_cap_bytes: u64,
4732    read_dir_entry_vec_max_len: u64,
4733    read_dir_entry_vec_max_cap: u64,
4734    read_dir_name_vec_cap_bytes: u64,
4735    read_dir_name_vec_max_len: u64,
4736    read_dir_name_vec_max_cap: u64,
4737    untracked_rows: u64,
4738    tracked_elapsed_us: u128,
4739    untracked_elapsed_us: u128,
4740    render_elapsed_us: u128,
4741    overlap_enabled: bool,
4742}
4743
4744const STATUS_BORROWED_OVERLAP_MIN_STAGE0: usize = 1024;
4745const STATUS_WORKER_STACK_SIZE: usize = 32 * 1024;
4746
4747fn spawn_status_worker<'scope, 'env, F, T>(
4748    scope: &'scope std::thread::Scope<'scope, 'env>,
4749    name: &str,
4750    f: F,
4751) -> Result<std::thread::ScopedJoinHandle<'scope, Result<T>>>
4752where
4753    F: FnOnce() -> Result<T> + Send + 'scope,
4754    T: Send + 'scope,
4755{
4756    std::thread::Builder::new()
4757        .name(name.to_string())
4758        .stack_size(STATUS_WORKER_STACK_SIZE)
4759        .spawn_scoped(scope, f)
4760        .map_err(|err| GitError::Command(format!("failed to spawn status worker `{name}`: {err}")))
4761}
4762
4763enum BorrowedIndexBytes {
4764    Owned(Vec<u8>),
4765    Mapped(sley_mmap::MappedFile),
4766}
4767
4768impl AsRef<[u8]> for BorrowedIndexBytes {
4769    fn as_ref(&self) -> &[u8] {
4770        match self {
4771            Self::Owned(bytes) => bytes,
4772            Self::Mapped(bytes) => bytes.as_bytes(),
4773        }
4774    }
4775}
4776
4777fn read_borrowed_index_bytes(index_path: &Path) -> Result<BorrowedIndexBytes> {
4778    match sley_mmap::MappedFile::open_index(index_path) {
4779        Ok(mapped) => Ok(BorrowedIndexBytes::Mapped(mapped)),
4780        Err(_) => Ok(BorrowedIndexBytes::Owned(fs::read(index_path)?)),
4781    }
4782}
4783
4784impl StatusProfileCounters {
4785    fn enabled() -> bool {
4786        std::env::var_os("SLEY_STATUS_PROFILE").is_some_and(|value| value != "0")
4787    }
4788
4789    fn memory_enabled() -> bool {
4790        std::env::var_os("SLEY_STATUS_PROFILE")
4791            .and_then(|value| value.into_string().ok())
4792            .is_some_and(|value| value == "mem" || value == "memory")
4793    }
4794
4795    fn merge_untracked(&mut self, other: StatusProfileCounters) {
4796        self.read_dir_calls += other.read_dir_calls;
4797        self.dir_entries_seen += other.dir_entries_seen;
4798        self.file_type_calls += other.file_type_calls;
4799        self.ignore_checks += other.ignore_checks;
4800        self.ignore_pattern_tests += other.ignore_pattern_tests;
4801        self.ignore_glob_fallback_tests += other.ignore_glob_fallback_tests;
4802        self.tracked_exact_hits += other.tracked_exact_hits;
4803        self.tracked_dir_prefix_hits += other.tracked_dir_prefix_hits;
4804        self.tracked_skip_worktree_prefix_hits += other.tracked_skip_worktree_prefix_hits;
4805        self.read_dir_entry_vec_cap_bytes += other.read_dir_entry_vec_cap_bytes;
4806        self.read_dir_entry_vec_max_len = self
4807            .read_dir_entry_vec_max_len
4808            .max(other.read_dir_entry_vec_max_len);
4809        self.read_dir_entry_vec_max_cap = self
4810            .read_dir_entry_vec_max_cap
4811            .max(other.read_dir_entry_vec_max_cap);
4812        self.read_dir_name_vec_cap_bytes += other.read_dir_name_vec_cap_bytes;
4813        self.read_dir_name_vec_max_len = self
4814            .read_dir_name_vec_max_len
4815            .max(other.read_dir_name_vec_max_len);
4816        self.read_dir_name_vec_max_cap = self
4817            .read_dir_name_vec_max_cap
4818            .max(other.read_dir_name_vec_max_cap);
4819        self.untracked_rows += other.untracked_rows;
4820        self.untracked_elapsed_us += other.untracked_elapsed_us;
4821    }
4822
4823    fn emit(&self) {
4824        eprintln!(
4825            "{{\"schema\":\"sley.status.profile.v1\",\
4826             \"fast_path_borrowed\":{},\
4827             \"read_dir_calls\":{},\
4828             \"dir_entries_seen\":{},\
4829             \"file_type_calls\":{},\
4830             \"ignore_checks\":{},\
4831             \"ignore_pattern_tests\":{},\
4832             \"ignore_glob_fallback_tests\":{},\
4833             \"tracked_exact_hits\":{},\
4834             \"tracked_dir_prefix_hits\":{},\
4835             \"tracked_skip_worktree_prefix_hits\":{},\
4836             \"read_dir_entry_size\":{},\
4837             \"read_dir_entry_vec_cap_bytes\":{},\
4838             \"read_dir_entry_vec_max_len\":{},\
4839             \"read_dir_entry_vec_max_cap\":{},\
4840             \"read_dir_name_size\":{},\
4841             \"read_dir_name_vec_cap_bytes\":{},\
4842             \"read_dir_name_vec_max_len\":{},\
4843             \"read_dir_name_vec_max_cap\":{},\
4844             \"untracked_rows\":{},\
4845             \"tracked_elapsed_us\":{},\
4846             \"untracked_elapsed_us\":{},\
4847             \"render_elapsed_us\":{},\
4848             \"overlap_enabled\":{}}}",
4849            self.fast_path_borrowed,
4850            self.read_dir_calls,
4851            self.dir_entries_seen,
4852            self.file_type_calls,
4853            self.ignore_checks,
4854            self.ignore_pattern_tests,
4855            self.ignore_glob_fallback_tests,
4856            self.tracked_exact_hits,
4857            self.tracked_dir_prefix_hits,
4858            self.tracked_skip_worktree_prefix_hits,
4859            std::mem::size_of::<fs::DirEntry>(),
4860            self.read_dir_entry_vec_cap_bytes,
4861            self.read_dir_entry_vec_max_len,
4862            self.read_dir_entry_vec_max_cap,
4863            std::mem::size_of::<std::ffi::OsString>(),
4864            self.read_dir_name_vec_cap_bytes,
4865            self.read_dir_name_vec_max_len,
4866            self.read_dir_name_vec_max_cap,
4867            self.untracked_rows,
4868            self.tracked_elapsed_us,
4869            self.untracked_elapsed_us,
4870            self.render_elapsed_us,
4871            self.overlap_enabled
4872        );
4873    }
4874}
4875
4876fn status_profile_rss_vsz_bytes() -> Option<(u64, u64)> {
4877    let pid = std::process::id().to_string();
4878    let output = Command::new("ps")
4879        .args(["-o", "rss=", "-o", "vsz=", "-p", &pid])
4880        .output()
4881        .ok()?;
4882    if !output.status.success() {
4883        return None;
4884    }
4885    let text = String::from_utf8(output.stdout).ok()?;
4886    let mut parts = text.split_whitespace();
4887    let rss_kib = parts.next()?.parse::<u64>().ok()?;
4888    let vsz_kib = parts.next()?.parse::<u64>().ok()?;
4889    Some((rss_kib * 1024, vsz_kib * 1024))
4890}
4891
4892fn status_profile_pause(label: &str) {
4893    let Some(target) =
4894        std::env::var_os("SLEY_STATUS_PROFILE_PAUSE_AT").and_then(|value| value.into_string().ok())
4895    else {
4896        return;
4897    };
4898    if target != label && target != "*" {
4899        return;
4900    }
4901    let seconds = std::env::var("SLEY_STATUS_PROFILE_PAUSE_SECS")
4902        .ok()
4903        .and_then(|value| value.parse::<u64>().ok())
4904        .unwrap_or(30);
4905    eprintln!(
4906        "{{\"schema\":\"sley.status.mem.pause.v1\",\"label\":\"{}\",\"pid\":{},\"seconds\":{}}}",
4907        label,
4908        std::process::id(),
4909        seconds
4910    );
4911    std::thread::sleep(std::time::Duration::from_secs(seconds));
4912}
4913
4914fn status_profile_mem(label: &str, details: &[(&str, usize)]) {
4915    if !StatusProfileCounters::memory_enabled() {
4916        return;
4917    }
4918    let (rss_bytes, vsz_bytes) = status_profile_rss_vsz_bytes().unwrap_or((0, 0));
4919    eprint!(
4920        "{{\"schema\":\"sley.status.mem.v1\",\"label\":\"{}\",\"pid\":{},\"rss_bytes\":{},\"vsz_bytes\":{}",
4921        label,
4922        std::process::id(),
4923        rss_bytes,
4924        vsz_bytes
4925    );
4926    for (key, value) in details {
4927        eprint!(",\"{}\":{}", key, value);
4928    }
4929    eprintln!("}}");
4930    status_profile_pause(label);
4931}
4932
4933/// Compare one expected tracked entry to the worktree path named by `path`.
4934///
4935/// `path` is repository-relative and uses the platform path representation. For
4936/// callers that already carry git's byte path form, use
4937/// [`worktree_entry_state_by_git_path`].
4938pub fn worktree_entry_state(
4939    worktree_root: impl AsRef<Path>,
4940    git_dir: impl AsRef<Path>,
4941    format: ObjectFormat,
4942    path: impl AsRef<Path>,
4943    expected_oid: &ObjectId,
4944    expected_mode: u32,
4945    index_probe: Option<&IndexStatProbe>,
4946) -> Result<WorktreeEntryState> {
4947    let path = path.as_ref();
4948    if path.is_absolute() {
4949        return Err(GitError::InvalidPath(format!(
4950            "worktree entry path {} is absolute",
4951            path.display()
4952        )));
4953    }
4954    let git_path = git_path_bytes(path)?;
4955    worktree_entry_state_by_git_path(
4956        worktree_root,
4957        git_dir,
4958        format,
4959        &git_path,
4960        expected_oid,
4961        expected_mode,
4962        index_probe,
4963    )
4964}
4965
4966/// Compare one expected tracked entry to the worktree path named by a
4967/// repository-relative git path (`/` separators, raw bytes).
4968///
4969/// The comparison uses the same clean-filter, symlink-target, gitlink, and
4970/// racy-clean stat shortcut rules as [`stream_short_status_with_options`].
4971pub fn worktree_entry_state_by_git_path(
4972    worktree_root: impl AsRef<Path>,
4973    git_dir: impl AsRef<Path>,
4974    format: ObjectFormat,
4975    git_path: &[u8],
4976    expected_oid: &ObjectId,
4977    expected_mode: u32,
4978    index_probe: Option<&IndexStatProbe>,
4979) -> Result<WorktreeEntryState> {
4980    let worktree_root = worktree_root.as_ref();
4981    let git_dir = git_dir.as_ref();
4982    let stat_cache =
4983        index_probe.and_then(|probe| probe.stat_cache_for(git_path, expected_oid, expected_mode));
4984    let Some(worktree_entry) = worktree_entry_for_git_path(
4985        worktree_root,
4986        git_dir,
4987        format,
4988        git_path,
4989        expected_oid,
4990        expected_mode,
4991        stat_cache.as_ref(),
4992    )?
4993    else {
4994        return Ok(WorktreeEntryState::Deleted);
4995    };
4996    if worktree_entry.mode == expected_mode && worktree_entry.oid == *expected_oid {
4997        Ok(WorktreeEntryState::Clean)
4998    } else {
4999        Ok(WorktreeEntryState::Modified)
5000    }
5001}
5002
5003pub fn stream_short_status_with_options<F>(
5004    worktree_root: impl AsRef<Path>,
5005    git_dir: impl AsRef<Path>,
5006    format: ObjectFormat,
5007    options: ShortStatusOptions,
5008    mut emit: F,
5009) -> Result<()>
5010where
5011    F: for<'a> FnMut(ShortStatusRow<'a>) -> Result<StreamControl>,
5012{
5013    let worktree_root = worktree_root.as_ref();
5014    let git_dir = git_dir.as_ref();
5015    let db = FileObjectDatabase::from_git_dir(git_dir, format);
5016    if !options.include_ignored
5017        && let Some(()) = stream_short_status_borrowed_head_matches_index_if_possible(
5018            worktree_root,
5019            git_dir,
5020            format,
5021            &db,
5022            options.untracked_mode,
5023            &mut emit,
5024        )?
5025    {
5026        return Ok(());
5027    }
5028    for entry in collect_short_status_with_options(worktree_root, git_dir, format, options)? {
5029        if emit(entry.as_row())?.is_stop() {
5030            break;
5031        }
5032    }
5033    Ok(())
5034}
5035
5036fn collect_short_status_with_options(
5037    worktree_root: impl AsRef<Path>,
5038    git_dir: impl AsRef<Path>,
5039    format: ObjectFormat,
5040    options: ShortStatusOptions,
5041) -> Result<Vec<ShortStatusEntry>> {
5042    let worktree_root = worktree_root.as_ref();
5043    let git_dir = git_dir.as_ref();
5044    let db = FileObjectDatabase::from_git_dir(git_dir, format);
5045    if !options.include_ignored
5046        && let Some(entries) = short_status_borrowed_head_matches_index_if_possible(
5047            worktree_root,
5048            git_dir,
5049            format,
5050            &db,
5051            options.untracked_mode,
5052        )?
5053    {
5054        return Ok(entries);
5055    }
5056    // Parse the index once: the stat cache lets the worktree walk skip
5057    // re-hashing files whose stat proves they are unchanged since staging
5058    // (git's racy-git shortcut). When HEAD matches the index, the status
5059    // comparison can stream directly from the parsed index and avoid building a
5060    // second path-sorted copy of every tracked entry.
5061    let (mut parsed_index, mut stat_cache, mut head_matches_index) =
5062        read_index_with_stat_cache(git_dir, format, &db)?;
5063    let sparse_checkout_active = sparse_checkout_active_for_status(git_dir, &parsed_index);
5064    if sparse_checkout_active && parsed_index.entries.iter().any(IndexEntry::is_sparse_dir) {
5065        expand_sparse_index(&mut parsed_index, &db, format)?;
5066        stat_cache = IndexStatCache::from_index_mtime(&parsed_index, stat_cache.index_mtime);
5067        head_matches_index = false;
5068    }
5069    let mut unmerged_entries = short_status_unmerged_entries(&parsed_index);
5070    let unmerged_paths = unmerged_entries
5071        .iter()
5072        .map(|entry| entry.path.clone())
5073        .collect::<BTreeSet<_>>();
5074    if head_matches_index && !options.include_ignored {
5075        let mut ignores = IgnoreMatcher::from_worktree_base(worktree_root)?;
5076        let entries = short_status_tracked_only(
5077            worktree_root,
5078            git_dir,
5079            format,
5080            &db,
5081            &parsed_index,
5082            &stat_cache,
5083            true,
5084            sparse_checkout_active,
5085            options.untracked_mode,
5086        );
5087        let mut entries = entries?;
5088        entries.retain(|entry| !unmerged_paths.contains(&entry.path));
5089        let untracked_paths = status_untracked_paths_from_index(
5090            worktree_root,
5091            git_dir,
5092            &parsed_index,
5093            &stat_cache,
5094            &mut ignores,
5095            options.untracked_mode,
5096            None,
5097        )?;
5098        for path in untracked_paths {
5099            entries.push(ShortStatusEntry {
5100                index: b'?',
5101                worktree: b'?',
5102                path,
5103                head_mode: None,
5104                index_mode: None,
5105                worktree_mode: None,
5106                head_oid: None,
5107                index_oid: None,
5108                submodule: None,
5109            });
5110        }
5111        entries.append(&mut unmerged_entries);
5112        entries.sort_by(|left, right| {
5113            status_sort_category(left)
5114                .cmp(&status_sort_category(right))
5115                .then_with(|| left.path.cmp(&right.path))
5116        });
5117        return Ok(entries);
5118    }
5119    let index = index_entries_from_index(parsed_index);
5120    let head = if head_matches_index {
5121        None
5122    } else {
5123        Some(head_tree_entries(git_dir, format, &db)?)
5124    };
5125    let known_tracked_paths = index.keys().cloned().collect::<BTreeSet<_>>();
5126    let tracked_paths = if options.untracked_mode == StatusUntrackedMode::None {
5127        Some(&known_tracked_paths)
5128    } else {
5129        None
5130    };
5131    let mut ignores = IgnoreMatcher::from_worktree_base(worktree_root)?;
5132    let (worktree, submodule_dirt_map, tracked_presence) =
5133        status_worktree_entries_with_submodule_dirt(
5134            worktree_root,
5135            git_dir,
5136            format,
5137            &stat_cache,
5138            Some(&known_tracked_paths),
5139            tracked_paths,
5140            Some(&mut ignores),
5141        )?;
5142    let mut entries = Vec::new();
5143    if head_matches_index {
5144        collect_status_entries_head_matches_index(
5145            &index,
5146            &worktree,
5147            &tracked_presence,
5148            &stat_cache,
5149            sparse_checkout_active,
5150            &submodule_dirt_map,
5151            options.untracked_mode,
5152            &mut entries,
5153        );
5154    } else if let Some(head) = head.as_ref() {
5155        collect_status_entries_with_head(
5156            StatusComparisonInputs {
5157                head,
5158                index: &index,
5159                worktree: &worktree,
5160                tracked_presence: &tracked_presence,
5161                stat_cache: &stat_cache,
5162                sparse_checkout_active,
5163                submodule_dirt_map: &submodule_dirt_map,
5164                ignores: &ignores,
5165            },
5166            options.untracked_mode,
5167            &mut entries,
5168        );
5169    }
5170    entries.retain(|entry| !unmerged_paths.contains(&entry.path));
5171    entries.append(&mut unmerged_entries);
5172    if options.include_ignored {
5173        let ignored_directory_rows = !matches!(options.untracked_mode, StatusUntrackedMode::All);
5174        let ignored_paths = ignored_untracked_paths(
5175            worktree_root,
5176            git_dir,
5177            &index,
5178            &ignores,
5179            ignored_directory_rows,
5180        )?;
5181        let ignored_paths: Vec<Vec<u8>> = match options.ignored_mode {
5182            StatusIgnoredMode::Matching => ignored_paths,
5183            StatusIgnoredMode::Traditional
5184                if matches!(options.untracked_mode, StatusUntrackedMode::All) =>
5185            {
5186                ignored_paths
5187            }
5188            StatusIgnoredMode::Traditional => {
5189                let mut rolled = BTreeSet::new();
5190                for path in ignored_paths {
5191                    let path = ignored_traditional_rollup_path(
5192                        worktree_root,
5193                        git_dir,
5194                        &path,
5195                        &index,
5196                        &ignores,
5197                    )?;
5198                    if ignored_traditional_path_is_empty_directory(worktree_root, &path)? {
5199                        continue;
5200                    }
5201                    rolled.insert(path);
5202                }
5203                rolled.into_iter().collect()
5204            }
5205        };
5206        for path in ignored_paths {
5207            entries.push(ShortStatusEntry {
5208                index: b'!',
5209                worktree: b'!',
5210                path,
5211                head_mode: None,
5212                index_mode: None,
5213                worktree_mode: None,
5214                head_oid: None,
5215                index_oid: None,
5216                submodule: None,
5217            });
5218        }
5219    }
5220    let untracked_paths: Vec<Vec<u8>> = match options.untracked_mode {
5221        StatusUntrackedMode::All => worktree
5222            .iter()
5223            .filter_map(|(path, entry)| {
5224                let is_directory = entry.mode == 0o040000 && entry.oid.is_null();
5225                if index.contains_key(path)
5226                    || path_or_parent_is_ignored(&ignores, path, is_directory)
5227                {
5228                    return None;
5229                }
5230                if is_directory {
5231                    let mut directory = path.clone();
5232                    directory.push(b'/');
5233                    Some(directory)
5234                } else {
5235                    Some(path.clone())
5236                }
5237            })
5238            .collect(),
5239        StatusUntrackedMode::Normal => {
5240            normal_untracked_paths_from_worktree(&worktree, &index, &ignores)
5241        }
5242        StatusUntrackedMode::None => Vec::new(),
5243    };
5244    for path in untracked_paths {
5245        entries.push(ShortStatusEntry {
5246            index: b'?',
5247            worktree: b'?',
5248            path,
5249            head_mode: None,
5250            index_mode: None,
5251            worktree_mode: None,
5252            head_oid: None,
5253            index_oid: None,
5254            submodule: None,
5255        });
5256    }
5257    entries.sort_by(|left, right| {
5258        status_sort_category(left)
5259            .cmp(&status_sort_category(right))
5260            .then_with(|| left.path.cmp(&right.path))
5261    });
5262    Ok(entries)
5263}
5264
5265fn short_status_unmerged_entries(index: &Index) -> Vec<ShortStatusEntry> {
5266    let mut by_path: BTreeMap<Vec<u8>, BTreeSet<u16>> = BTreeMap::new();
5267    for entry in &index.entries {
5268        let stage = entry.stage().as_u16();
5269        if stage > 0 {
5270            by_path
5271                .entry(entry.path.as_bytes().to_vec())
5272                .or_default()
5273                .insert(stage);
5274        }
5275    }
5276    by_path
5277        .into_iter()
5278        .map(|(path, stages)| {
5279            let (index, worktree) = short_status_unmerged_codes(&stages);
5280            ShortStatusEntry {
5281                index,
5282                worktree,
5283                path,
5284                head_mode: None,
5285                index_mode: None,
5286                worktree_mode: None,
5287                head_oid: None,
5288                index_oid: None,
5289                submodule: None,
5290            }
5291        })
5292        .collect()
5293}
5294
5295fn short_status_unmerged_codes(stages: &BTreeSet<u16>) -> (u8, u8) {
5296    match (
5297        stages.contains(&1),
5298        stages.contains(&2),
5299        stages.contains(&3),
5300    ) {
5301        (true, false, false) => (b'D', b'D'),
5302        (false, true, false) => (b'A', b'U'),
5303        (true, true, false) => (b'U', b'D'),
5304        (false, false, true) => (b'U', b'A'),
5305        (true, false, true) => (b'D', b'U'),
5306        (false, true, true) => (b'A', b'A'),
5307        (true, true, true) => (b'U', b'U'),
5308        (false, false, false) => (b'U', b'U'),
5309    }
5310}
5311
5312fn sparse_checkout_active_for_status(git_dir: &Path, index: &Index) -> bool {
5313    index.is_sparse()
5314        || index.entries.iter().any(IndexEntry::is_sparse_dir)
5315        || sparse_checkout_config_enabled(git_dir)
5316}
5317
5318fn sparse_checkout_active_for_borrowed_status(git_dir: &Path, index: &BorrowedIndex<'_>) -> bool {
5319    index
5320        .entries
5321        .iter()
5322        .any(|entry| entry.mode == SPARSE_DIR_MODE && entry.is_skip_worktree())
5323        || sparse_checkout_config_enabled(git_dir)
5324}
5325
5326fn sparse_checkout_config_enabled(git_dir: &Path) -> bool {
5327    GitConfig::read(git_dir.join("config"))
5328        .ok()
5329        .and_then(|config| config.get_bool("core", None, "sparseCheckout"))
5330        == Some(true)
5331        || GitConfig::read(git_dir.join("config.worktree"))
5332            .ok()
5333            .and_then(|config| config.get_bool("core", None, "sparseCheckout"))
5334            == Some(true)
5335}
5336
5337fn collect_status_entries_head_matches_index(
5338    index: &BTreeMap<Vec<u8>, TrackedEntry>,
5339    worktree: &BTreeMap<Vec<u8>, TrackedEntry>,
5340    tracked_presence: &HashSet<Vec<u8>>,
5341    stat_cache: &IndexStatCache,
5342    sparse_checkout_active: bool,
5343    submodule_dirt_map: &BTreeMap<Vec<u8>, u8>,
5344    untracked_mode: StatusUntrackedMode,
5345    entries: &mut Vec<ShortStatusEntry>,
5346) {
5347    for (path, index_entry) in index {
5348        let intent_to_add = stat_cache
5349            .index_entry(path)
5350            .is_some_and(IndexEntry::is_intent_to_add);
5351        let visible_index_entry = (!intent_to_add).then_some(index_entry);
5352        let worktree_entry = worktree.get(path);
5353        let worktree_present =
5354            worktree_entry.is_some() || tracked_presence.contains(path.as_slice());
5355        let skip_worktree = sparse_checkout_active
5356            && stat_cache
5357                .index_entry(path)
5358                .is_some_and(index_entry_skip_worktree);
5359        let submodule = status_submodule_from_entries(
5360            path,
5361            index_entry,
5362            worktree_entry,
5363            submodule_dirt_map,
5364            untracked_mode,
5365        );
5366        let worktree_code = match worktree_entry {
5367            None if intent_to_add => b' ',
5368            None if !worktree_present && skip_worktree => b' ',
5369            None if !worktree_present => b'D',
5370            Some(_) if intent_to_add => b'A',
5371            Some(worktree_entry) if Some(worktree_entry) != visible_index_entry => b'M',
5372            _ if submodule.is_some_and(|sub| sub.any()) => b'M',
5373            _ => b' ',
5374        };
5375        if worktree_code != b' ' {
5376            entries.push(ShortStatusEntry {
5377                index: b' ',
5378                worktree: worktree_code,
5379                path: path.clone(),
5380                head_mode: visible_index_entry.map(|entry| entry.mode),
5381                index_mode: visible_index_entry.map(|entry| entry.mode),
5382                worktree_mode: status_worktree_mode(
5383                    visible_index_entry,
5384                    worktree_entry,
5385                    worktree_present,
5386                ),
5387                head_oid: visible_index_entry.map(|entry| entry.oid),
5388                index_oid: visible_index_entry.map(|entry| entry.oid),
5389                submodule: submodule.filter(|sub| sub.any()),
5390            });
5391        }
5392    }
5393}
5394
5395struct StatusComparisonInputs<'a> {
5396    head: &'a BTreeMap<Vec<u8>, TrackedEntry>,
5397    index: &'a BTreeMap<Vec<u8>, TrackedEntry>,
5398    worktree: &'a BTreeMap<Vec<u8>, TrackedEntry>,
5399    tracked_presence: &'a HashSet<Vec<u8>>,
5400    stat_cache: &'a IndexStatCache,
5401    sparse_checkout_active: bool,
5402    submodule_dirt_map: &'a BTreeMap<Vec<u8>, u8>,
5403    ignores: &'a IgnoreMatcher,
5404}
5405
5406fn collect_status_entries_with_head(
5407    inputs: StatusComparisonInputs<'_>,
5408    untracked_mode: StatusUntrackedMode,
5409    entries: &mut Vec<ShortStatusEntry>,
5410) {
5411    let mut paths = BTreeSet::new();
5412    paths.extend(inputs.head.keys().cloned());
5413    paths.extend(inputs.index.keys().cloned());
5414    paths.extend(
5415        inputs
5416            .worktree
5417            .keys()
5418            .filter(|path| inputs.index.contains_key(*path))
5419            .cloned(),
5420    );
5421
5422    for path in paths {
5423        let head_entry = inputs.head.get(&path);
5424        let index_entry = inputs.index.get(&path);
5425        let intent_to_add = inputs
5426            .stat_cache
5427            .index_entry(&path)
5428            .is_some_and(IndexEntry::is_intent_to_add);
5429        let visible_index_entry = index_entry.filter(|_| !intent_to_add);
5430        let worktree_entry = inputs.worktree.get(&path);
5431        let worktree_present =
5432            worktree_entry.is_some() || inputs.tracked_presence.contains(path.as_slice());
5433        if head_entry.is_none()
5434            && index_entry.is_none()
5435            && worktree_entry.is_some()
5436            && inputs.ignores.is_ignored(&path, false)
5437        {
5438            continue;
5439        }
5440        let submodule = match visible_index_entry {
5441            Some(index_entry) => status_submodule_from_entries(
5442                &path,
5443                index_entry,
5444                worktree_entry,
5445                inputs.submodule_dirt_map,
5446                untracked_mode,
5447            ),
5448            None => None,
5449        };
5450        let skip_worktree = inputs.sparse_checkout_active
5451            && visible_index_entry.is_some_and(|_| {
5452                inputs
5453                    .stat_cache
5454                    .index_entry(&path)
5455                    .is_some_and(index_entry_skip_worktree)
5456            });
5457        let (index_code, worktree_code) =
5458            if head_entry.is_none() && index_entry.is_none() && worktree_entry.is_some() {
5459                (b'?', b'?')
5460            } else {
5461                let index_code = match (head_entry, visible_index_entry) {
5462                    (None, Some(_)) => b'A',
5463                    (Some(_), None) => b'D',
5464                    (Some(left), Some(right)) if left != right => b'M',
5465                    _ => b' ',
5466                };
5467                let worktree_code = match (visible_index_entry, worktree_entry) {
5468                    (None, Some(_)) if intent_to_add => b'A',
5469                    (None, Some(_)) => b'?',
5470                    (None, None) if intent_to_add => b' ',
5471                    (Some(_), None) if !worktree_present && skip_worktree => b' ',
5472                    (Some(_), None) if !worktree_present => b'D',
5473                    (Some(left), Some(right)) if left != right => b'M',
5474                    _ if submodule.is_some_and(|sub| sub.any()) => b'M',
5475                    _ => b' ',
5476                };
5477                (index_code, worktree_code)
5478            };
5479        if index_code != b' ' || worktree_code != b' ' {
5480            let worktree_mode = if skip_worktree && !worktree_present && worktree_entry.is_none() {
5481                visible_index_entry.map(|entry| entry.mode)
5482            } else {
5483                status_worktree_mode(visible_index_entry, worktree_entry, worktree_present)
5484            };
5485            entries.push(ShortStatusEntry {
5486                index: index_code,
5487                worktree: worktree_code,
5488                path,
5489                head_mode: head_entry.map(|entry| entry.mode),
5490                index_mode: visible_index_entry.map(|entry| entry.mode),
5491                worktree_mode,
5492                head_oid: head_entry.map(|entry| entry.oid),
5493                index_oid: visible_index_entry.map(|entry| entry.oid),
5494                submodule: submodule.filter(|sub| sub.any()),
5495            });
5496        }
5497    }
5498}
5499
5500fn status_worktree_mode(
5501    index_entry: Option<&TrackedEntry>,
5502    worktree_entry: Option<&TrackedEntry>,
5503    worktree_present: bool,
5504) -> Option<u32> {
5505    worktree_entry.map(|entry| entry.mode).or_else(|| {
5506        worktree_present
5507            .then(|| index_entry.map(|entry| entry.mode))
5508            .flatten()
5509    })
5510}
5511
5512fn status_submodule_from_entries(
5513    path: &[u8],
5514    index_entry: &TrackedEntry,
5515    worktree_entry: Option<&TrackedEntry>,
5516    submodule_dirt_map: &BTreeMap<Vec<u8>, u8>,
5517    _untracked_mode: StatusUntrackedMode,
5518) -> Option<SubmoduleStatus> {
5519    let worktree_entry = worktree_entry?;
5520    if !sley_index::is_gitlink(index_entry.mode) || !sley_index::is_gitlink(worktree_entry.mode) {
5521        return None;
5522    }
5523    let dirt = submodule_dirt_map.get(path).copied().unwrap_or(0);
5524    Some(SubmoduleStatus {
5525        new_commits: index_entry.oid != worktree_entry.oid,
5526        modified_content: dirt & DIRTY_SUBMODULE_MODIFIED != 0,
5527        untracked_content: dirt & DIRTY_SUBMODULE_UNTRACKED != 0,
5528    })
5529}
5530
5531fn short_status_tracked_only(
5532    worktree_root: &Path,
5533    git_dir: &Path,
5534    format: ObjectFormat,
5535    db: &FileObjectDatabase,
5536    index: &Index,
5537    stat_cache: &IndexStatCache,
5538    head_matches_index: bool,
5539    sparse_checkout_active: bool,
5540    untracked_mode: StatusUntrackedMode,
5541) -> Result<Vec<ShortStatusEntry>> {
5542    let normal_entry_count = index
5543        .entries
5544        .iter()
5545        .filter(|entry| entry.stage() == Stage::Normal)
5546        .count();
5547    if head_matches_index && normal_entry_count >= 512 {
5548        return short_status_tracked_only_head_matches_index_parallel(
5549            worktree_root,
5550            git_dir,
5551            format,
5552            index,
5553            stat_cache,
5554            sparse_checkout_active,
5555            untracked_mode,
5556        );
5557    }
5558    let head = if head_matches_index {
5559        None
5560    } else {
5561        Some(head_tree_entries(git_dir, format, db)?)
5562    };
5563    if !head_matches_index && normal_entry_count >= 512 {
5564        if let Some(head) = head.as_ref() {
5565            return short_status_tracked_only_with_head_parallel(
5566                worktree_root,
5567                git_dir,
5568                format,
5569                index,
5570                stat_cache,
5571                head,
5572                sparse_checkout_active,
5573                untracked_mode,
5574            );
5575        }
5576    }
5577    let mut clean_filter = None;
5578    let mut entries = Vec::new();
5579    for entry in index
5580        .entries
5581        .iter()
5582        .filter(|entry| entry.stage() == Stage::Normal)
5583    {
5584        let path = entry.path.as_bytes();
5585        let index_entry = TrackedEntry {
5586            mode: entry.mode,
5587            oid: entry.oid,
5588        };
5589        let head_entry = if head_matches_index {
5590            (!entry.is_intent_to_add()).then_some(&index_entry)
5591        } else {
5592            head.as_ref().and_then(|head| head.get(path))
5593        };
5594        let worktree_entry = worktree_entry_for_index_entry_with_attributes(
5595            worktree_root,
5596            git_dir,
5597            format,
5598            entry,
5599            stat_cache,
5600            &mut clean_filter,
5601        )?;
5602        let submodule = tracked_only_submodule_status(
5603            worktree_root,
5604            path,
5605            &index_entry,
5606            worktree_entry.as_ref(),
5607            untracked_mode,
5608        )?;
5609        let visible_index_entry = (!entry.is_intent_to_add()).then_some(&index_entry);
5610        let index_code = match (head_entry, visible_index_entry) {
5611            (None, Some(_)) => b'A',
5612            (Some(_), None) => b'D',
5613            (Some(head_entry), Some(index_entry)) if *head_entry != *index_entry => b'M',
5614            _ => b' ',
5615        };
5616        let worktree_code = match worktree_entry.as_ref() {
5617            None if entry.is_intent_to_add() => b' ',
5618            None if sparse_checkout_active && entry.is_skip_worktree() => b' ',
5619            None => b'D',
5620            Some(_) if entry.is_intent_to_add() => b'A',
5621            Some(worktree_entry) if Some(worktree_entry) != visible_index_entry => b'M',
5622            _ if submodule.is_some_and(|sub| sub.any()) => b'M',
5623            _ => b' ',
5624        };
5625        if index_code != b' ' || worktree_code != b' ' {
5626            entries.push(ShortStatusEntry {
5627                index: index_code,
5628                worktree: worktree_code,
5629                path: path.to_vec(),
5630                head_mode: head_entry.map(|entry| entry.mode),
5631                index_mode: visible_index_entry.map(|entry| entry.mode),
5632                worktree_mode: worktree_entry.as_ref().map(|entry| entry.mode),
5633                head_oid: head_entry.map(|entry| entry.oid),
5634                index_oid: visible_index_entry.map(|entry| entry.oid),
5635                submodule: submodule.filter(|sub| sub.any()),
5636            });
5637        }
5638    }
5639    if let Some(head) = head.as_ref() {
5640        let index_paths = index
5641            .entries
5642            .iter()
5643            .filter(|entry| entry.stage() == Stage::Normal)
5644            .map(|entry| entry.path.as_bytes().to_vec())
5645            .collect::<HashSet<_>>();
5646        for (path, head_entry) in head {
5647            if index_paths.contains(path.as_slice()) {
5648                continue;
5649            }
5650            entries.push(ShortStatusEntry {
5651                index: b'D',
5652                worktree: b' ',
5653                path: path.clone(),
5654                head_mode: Some(head_entry.mode),
5655                index_mode: None,
5656                worktree_mode: None,
5657                head_oid: Some(head_entry.oid),
5658                index_oid: None,
5659                submodule: None,
5660            });
5661        }
5662    }
5663    entries.sort_by(|left, right| {
5664        status_sort_category(left)
5665            .cmp(&status_sort_category(right))
5666            .then_with(|| left.path.cmp(&right.path))
5667    });
5668    Ok(entries)
5669}
5670
5671fn short_status_borrowed_head_matches_index_if_possible(
5672    worktree_root: &Path,
5673    git_dir: &Path,
5674    format: ObjectFormat,
5675    db: &FileObjectDatabase,
5676    untracked_mode: StatusUntrackedMode,
5677) -> Result<Option<Vec<ShortStatusEntry>>> {
5678    let index_path = repository_index_path(git_dir);
5679    let index_metadata = match fs::metadata(&index_path) {
5680        Ok(metadata) => metadata,
5681        Err(err)
5682            if err.kind() == std::io::ErrorKind::NotFound
5683                && matches!(untracked_mode, StatusUntrackedMode::None) =>
5684        {
5685            return Ok(Some(Vec::new()));
5686        }
5687        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
5688        Err(err) => return Err(err.into()),
5689    };
5690    let index_bytes = read_borrowed_index_bytes(&index_path)?;
5691    status_profile_mem(
5692        "after_index_bytes",
5693        &[
5694            ("index_file_bytes", index_metadata.len() as usize),
5695            ("index_bytes_len", index_bytes.as_ref().len()),
5696            (
5697                "index_bytes_mapped",
5698                usize::from(matches!(index_bytes, BorrowedIndexBytes::Mapped(_))),
5699            ),
5700        ],
5701    );
5702    let borrowed = match BorrowedIndex::parse(index_bytes.as_ref(), format) {
5703        Ok(index) => index,
5704        Err(GitError::Unsupported(_)) => return Ok(None),
5705        Err(err) => return Err(err),
5706    };
5707    status_profile_mem(
5708        "after_borrowed_parse",
5709        &[
5710            ("index_file_bytes", index_metadata.len() as usize),
5711            ("index_bytes_len", index_bytes.as_ref().len()),
5712            (
5713                "index_bytes_mapped",
5714                usize::from(matches!(index_bytes, BorrowedIndexBytes::Mapped(_))),
5715            ),
5716            ("borrowed_entries_len", borrowed.entries.len()),
5717            ("borrowed_entries_cap", borrowed.entries.capacity()),
5718            (
5719                "borrowed_entry_size",
5720                std::mem::size_of::<IndexEntryRef<'_>>(),
5721            ),
5722            (
5723                "borrowed_entries_cap_bytes",
5724                borrowed.entries.capacity() * std::mem::size_of::<IndexEntryRef<'_>>(),
5725            ),
5726            ("borrowed_extensions_len", borrowed.extensions.len()),
5727        ],
5728    );
5729    let sparse_checkout_active = sparse_checkout_active_for_borrowed_status(git_dir, &borrowed);
5730    if borrowed
5731        .entries
5732        .iter()
5733        .any(|entry| entry.mode == SPARSE_DIR_MODE && entry.is_skip_worktree())
5734    {
5735        return Ok(None);
5736    }
5737    if borrowed
5738        .entries
5739        .iter()
5740        .any(|entry| entry.stage() != Stage::Normal)
5741    {
5742        return Ok(None);
5743    }
5744    status_profile_mem(
5745        "after_sparse_scan",
5746        &[
5747            ("borrowed_entries_len", borrowed.entries.len()),
5748            (
5749                "borrowed_entries_cap_bytes",
5750                borrowed.entries.capacity() * std::mem::size_of::<IndexEntryRef<'_>>(),
5751            ),
5752            (
5753                "sparse_checkout_active",
5754                usize::from(sparse_checkout_active),
5755            ),
5756        ],
5757    );
5758    let Some(head_tree_oid) = resolve_head_tree_oid(git_dir, format, db)? else {
5759        return Ok(None);
5760    };
5761    status_profile_mem(
5762        "after_head_tree_oid",
5763        &[
5764            ("borrowed_entries_len", borrowed.entries.len()),
5765            (
5766                "borrowed_entries_cap_bytes",
5767                borrowed.entries.capacity() * std::mem::size_of::<IndexEntryRef<'_>>(),
5768            ),
5769        ],
5770    );
5771    let stage0_entry_count = borrowed
5772        .entries
5773        .iter()
5774        .filter(|entry| entry.stage() == Stage::Normal)
5775        .count();
5776    status_profile_mem(
5777        "after_stage0_count",
5778        &[
5779            ("stage0_entry_count", stage0_entry_count),
5780            ("borrowed_entries_len", borrowed.entries.len()),
5781        ],
5782    );
5783    if !head_matches_borrowed_index_from_cache_tree(
5784        &borrowed,
5785        format,
5786        &head_tree_oid,
5787        stage0_entry_count,
5788    )? {
5789        return Ok(None);
5790    }
5791    status_profile_mem(
5792        "after_head_matches_index",
5793        &[
5794            ("stage0_entry_count", stage0_entry_count),
5795            ("borrowed_entries_len", borrowed.entries.len()),
5796            (
5797                "borrowed_entries_cap_bytes",
5798                borrowed.entries.capacity() * std::mem::size_of::<IndexEntryRef<'_>>(),
5799            ),
5800        ],
5801    );
5802
5803    let index_mtime = file_mtime_parts(&index_metadata);
5804    let stat_cache = IndexStatCache::from_index_mtime_only(index_mtime);
5805    let profile_enabled = StatusProfileCounters::enabled();
5806    let mut profile = profile_enabled.then(|| StatusProfileCounters {
5807        fast_path_borrowed: true,
5808        ..StatusProfileCounters::default()
5809    });
5810
5811    if matches!(untracked_mode, StatusUntrackedMode::None) {
5812        let tracked_start = Instant::now();
5813        let entries = short_status_borrowed_tracked_only_head_matches_index_parallel(
5814            worktree_root,
5815            git_dir,
5816            format,
5817            &borrowed,
5818            &stat_cache,
5819            sparse_checkout_active,
5820            untracked_mode,
5821        )?;
5822        if let Some(profile) = profile.as_mut() {
5823            profile.tracked_elapsed_us = tracked_start.elapsed().as_micros();
5824            profile.emit();
5825        }
5826        return Ok(Some(entries));
5827    }
5828
5829    if stage0_entry_count < STATUS_BORROWED_OVERLAP_MIN_STAGE0 {
5830        let tracked_start = Instant::now();
5831        let mut entries = short_status_borrowed_tracked_only_head_matches_index_parallel(
5832            worktree_root,
5833            git_dir,
5834            format,
5835            &borrowed,
5836            &stat_cache,
5837            sparse_checkout_active,
5838            untracked_mode,
5839        )?;
5840        if let Some(profile) = profile.as_mut() {
5841            profile.tracked_elapsed_us = tracked_start.elapsed().as_micros();
5842        }
5843        let mut ignores = IgnoreMatcher::from_worktree_base(worktree_root)?;
5844        let untracked_start = Instant::now();
5845        let untracked_paths = status_untracked_paths_from_borrowed_index(
5846            worktree_root,
5847            git_dir,
5848            &borrowed,
5849            &mut ignores,
5850            untracked_mode,
5851            profile.as_mut(),
5852        )?;
5853        if let Some(profile) = profile.as_mut() {
5854            profile.untracked_elapsed_us = untracked_start.elapsed().as_micros();
5855            profile.untracked_rows = untracked_paths.len() as u64;
5856        }
5857        let render_start = Instant::now();
5858        append_untracked_status_entries(&mut entries, untracked_paths);
5859        if let Some(profile) = profile.as_mut() {
5860            profile.render_elapsed_us = render_start.elapsed().as_micros();
5861            profile.emit();
5862        }
5863        return Ok(Some(entries));
5864    }
5865
5866    if let Some(profile) = profile.as_mut() {
5867        profile.overlap_enabled = true;
5868    }
5869    if profile_enabled {
5870        let (mut entries, untracked_paths, untracked_profile) =
5871            std::thread::scope(|scope| -> Result<_> {
5872                let tracked = spawn_status_worker(scope, "status-tracked", || {
5873                    let start = Instant::now();
5874                    short_status_borrowed_tracked_only_head_matches_index_parallel(
5875                        worktree_root,
5876                        git_dir,
5877                        format,
5878                        &borrowed,
5879                        &stat_cache,
5880                        sparse_checkout_active,
5881                        untracked_mode,
5882                    )
5883                    .map(|entries| (entries, start.elapsed().as_micros()))
5884                })?;
5885                let untracked = spawn_status_worker(
5886                    scope,
5887                    "status-untracked",
5888                    || -> Result<(Vec<Vec<u8>>, StatusProfileCounters)> {
5889                        let mut local_profile = StatusProfileCounters::default();
5890                        let mut ignores = IgnoreMatcher::from_worktree_base(worktree_root)?;
5891                        let start = Instant::now();
5892                        let paths = status_untracked_paths_from_borrowed_index(
5893                            worktree_root,
5894                            git_dir,
5895                            &borrowed,
5896                            &mut ignores,
5897                            untracked_mode,
5898                            Some(&mut local_profile),
5899                        )?;
5900                        local_profile.untracked_elapsed_us = start.elapsed().as_micros();
5901                        local_profile.untracked_rows = paths.len() as u64;
5902                        Ok((paths, local_profile))
5903                    },
5904                )?;
5905                let (entries, tracked_elapsed_us) = tracked
5906                    .join()
5907                    .map_err(|_| GitError::Command("status worker panicked".into()))??;
5908                let (untracked_paths, untracked_profile) = untracked
5909                    .join()
5910                    .map_err(|_| GitError::Command("status worker panicked".into()))??;
5911                if let Some(profile) = profile.as_mut() {
5912                    profile.tracked_elapsed_us = tracked_elapsed_us;
5913                }
5914                Ok((entries, untracked_paths, Some(untracked_profile)))
5915            })?;
5916        if let Some(profile) = profile.as_mut() {
5917            if let Some(untracked_profile) = untracked_profile {
5918                profile.merge_untracked(untracked_profile);
5919            }
5920        }
5921        let render_start = Instant::now();
5922        append_untracked_status_entries(&mut entries, untracked_paths);
5923        if let Some(profile) = profile.as_mut() {
5924            profile.render_elapsed_us = render_start.elapsed().as_micros();
5925            profile.emit();
5926        }
5927        return Ok(Some(entries));
5928    }
5929    let (mut entries, untracked_paths) = std::thread::scope(|scope| -> Result<_> {
5930        let tracked = spawn_status_worker(scope, "status-tracked", || {
5931            short_status_borrowed_tracked_only_head_matches_index_parallel(
5932                worktree_root,
5933                git_dir,
5934                format,
5935                &borrowed,
5936                &stat_cache,
5937                sparse_checkout_active,
5938                untracked_mode,
5939            )
5940        })?;
5941        let untracked =
5942            spawn_status_worker(scope, "status-untracked", || -> Result<Vec<Vec<u8>>> {
5943                let mut ignores = IgnoreMatcher::from_worktree_base(worktree_root)?;
5944                status_untracked_paths_from_borrowed_index(
5945                    worktree_root,
5946                    git_dir,
5947                    &borrowed,
5948                    &mut ignores,
5949                    untracked_mode,
5950                    None,
5951                )
5952            })?;
5953        let entries = tracked
5954            .join()
5955            .map_err(|_| GitError::Command("status worker panicked".into()))??;
5956        let untracked_paths = untracked
5957            .join()
5958            .map_err(|_| GitError::Command("status worker panicked".into()))??;
5959        Ok((entries, untracked_paths))
5960    })?;
5961    let render_start = Instant::now();
5962    append_untracked_status_entries(&mut entries, untracked_paths);
5963    if let Some(profile) = profile.as_mut() {
5964        profile.render_elapsed_us = render_start.elapsed().as_micros();
5965        profile.emit();
5966    }
5967    Ok(Some(entries))
5968}
5969
5970fn stream_short_status_borrowed_head_matches_index_if_possible<F>(
5971    worktree_root: &Path,
5972    git_dir: &Path,
5973    format: ObjectFormat,
5974    db: &FileObjectDatabase,
5975    untracked_mode: StatusUntrackedMode,
5976    emit: &mut F,
5977) -> Result<Option<()>>
5978where
5979    F: for<'a> FnMut(ShortStatusRow<'a>) -> Result<StreamControl>,
5980{
5981    let index_path = repository_index_path(git_dir);
5982    let index_metadata = match fs::metadata(&index_path) {
5983        Ok(metadata) => metadata,
5984        Err(err)
5985            if err.kind() == std::io::ErrorKind::NotFound
5986                && matches!(untracked_mode, StatusUntrackedMode::None) =>
5987        {
5988            return Ok(Some(()));
5989        }
5990        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
5991        Err(err) => return Err(err.into()),
5992    };
5993    let index_bytes = read_borrowed_index_bytes(&index_path)?;
5994    status_profile_mem(
5995        "after_index_bytes",
5996        &[
5997            ("index_file_bytes", index_metadata.len() as usize),
5998            ("index_bytes_len", index_bytes.as_ref().len()),
5999            (
6000                "index_bytes_mapped",
6001                usize::from(matches!(index_bytes, BorrowedIndexBytes::Mapped(_))),
6002            ),
6003        ],
6004    );
6005    let borrowed = match BorrowedIndex::parse(index_bytes.as_ref(), format) {
6006        Ok(index) => index,
6007        Err(GitError::Unsupported(_)) => return Ok(None),
6008        Err(err) => return Err(err),
6009    };
6010    status_profile_mem(
6011        "after_borrowed_parse",
6012        &[
6013            ("index_file_bytes", index_metadata.len() as usize),
6014            ("index_bytes_len", index_bytes.as_ref().len()),
6015            (
6016                "index_bytes_mapped",
6017                usize::from(matches!(index_bytes, BorrowedIndexBytes::Mapped(_))),
6018            ),
6019            ("borrowed_entries_len", borrowed.entries.len()),
6020            ("borrowed_entries_cap", borrowed.entries.capacity()),
6021            (
6022                "borrowed_entry_size",
6023                std::mem::size_of::<IndexEntryRef<'_>>(),
6024            ),
6025            (
6026                "borrowed_entries_cap_bytes",
6027                borrowed.entries.capacity() * std::mem::size_of::<IndexEntryRef<'_>>(),
6028            ),
6029            ("borrowed_extensions_len", borrowed.extensions.len()),
6030        ],
6031    );
6032    let sparse_checkout_active = sparse_checkout_active_for_borrowed_status(git_dir, &borrowed);
6033    if borrowed
6034        .entries
6035        .iter()
6036        .any(|entry| entry.mode == SPARSE_DIR_MODE && entry.is_skip_worktree())
6037    {
6038        return Ok(None);
6039    }
6040    if borrowed
6041        .entries
6042        .iter()
6043        .any(|entry| entry.stage() != Stage::Normal)
6044    {
6045        return Ok(None);
6046    }
6047    status_profile_mem(
6048        "after_sparse_scan",
6049        &[
6050            ("borrowed_entries_len", borrowed.entries.len()),
6051            (
6052                "borrowed_entries_cap_bytes",
6053                borrowed.entries.capacity() * std::mem::size_of::<IndexEntryRef<'_>>(),
6054            ),
6055            (
6056                "sparse_checkout_active",
6057                usize::from(sparse_checkout_active),
6058            ),
6059        ],
6060    );
6061    let Some(head_tree_oid) = resolve_head_tree_oid(git_dir, format, db)? else {
6062        return Ok(None);
6063    };
6064    status_profile_mem(
6065        "after_head_tree_oid",
6066        &[
6067            ("borrowed_entries_len", borrowed.entries.len()),
6068            (
6069                "borrowed_entries_cap_bytes",
6070                borrowed.entries.capacity() * std::mem::size_of::<IndexEntryRef<'_>>(),
6071            ),
6072        ],
6073    );
6074    let stage0_entry_count = borrowed
6075        .entries
6076        .iter()
6077        .filter(|entry| entry.stage() == Stage::Normal)
6078        .count();
6079    status_profile_mem(
6080        "after_stage0_count",
6081        &[
6082            ("stage0_entry_count", stage0_entry_count),
6083            ("borrowed_entries_len", borrowed.entries.len()),
6084        ],
6085    );
6086    if !head_matches_borrowed_index_from_cache_tree(
6087        &borrowed,
6088        format,
6089        &head_tree_oid,
6090        stage0_entry_count,
6091    )? {
6092        return Ok(None);
6093    }
6094    status_profile_mem(
6095        "after_head_matches_index",
6096        &[
6097            ("stage0_entry_count", stage0_entry_count),
6098            ("borrowed_entries_len", borrowed.entries.len()),
6099            (
6100                "borrowed_entries_cap_bytes",
6101                borrowed.entries.capacity() * std::mem::size_of::<IndexEntryRef<'_>>(),
6102            ),
6103        ],
6104    );
6105
6106    let index_mtime = file_mtime_parts(&index_metadata);
6107    let stat_cache = IndexStatCache::from_index_mtime_only(index_mtime);
6108    let profile_enabled = StatusProfileCounters::enabled();
6109    let mut profile = profile_enabled.then(|| StatusProfileCounters {
6110        fast_path_borrowed: true,
6111        ..StatusProfileCounters::default()
6112    });
6113
6114    if matches!(untracked_mode, StatusUntrackedMode::None) {
6115        let tracked_start = Instant::now();
6116        let tracked_control =
6117            stream_short_status_borrowed_tracked_only_head_matches_index_parallel(
6118                worktree_root,
6119                git_dir,
6120                format,
6121                &borrowed,
6122                &stat_cache,
6123                sparse_checkout_active,
6124                untracked_mode,
6125                emit,
6126            )?;
6127        if let Some(profile) = profile.as_mut() {
6128            profile.tracked_elapsed_us = tracked_start.elapsed().as_micros();
6129        }
6130        if let Some(profile) = profile.as_ref() {
6131            profile.emit();
6132        }
6133        if tracked_control.is_stop() {
6134            return Ok(Some(()));
6135        }
6136        return Ok(Some(()));
6137    }
6138
6139    if stage0_entry_count < STATUS_BORROWED_OVERLAP_MIN_STAGE0 {
6140        let tracked_start = Instant::now();
6141        let tracked_control =
6142            stream_short_status_borrowed_tracked_only_head_matches_index_parallel(
6143                worktree_root,
6144                git_dir,
6145                format,
6146                &borrowed,
6147                &stat_cache,
6148                sparse_checkout_active,
6149                untracked_mode,
6150                emit,
6151            )?;
6152        if let Some(profile) = profile.as_mut() {
6153            profile.tracked_elapsed_us = tracked_start.elapsed().as_micros();
6154        }
6155        if tracked_control.is_stop() {
6156            if let Some(profile) = profile.as_ref() {
6157                profile.emit();
6158            }
6159            return Ok(Some(()));
6160        }
6161        let mut ignores = IgnoreMatcher::from_worktree_base(worktree_root)?;
6162        let untracked_start = Instant::now();
6163        stream_status_untracked_paths_from_borrowed_index(
6164            worktree_root,
6165            git_dir,
6166            &borrowed,
6167            &mut ignores,
6168            untracked_mode,
6169            profile.as_mut(),
6170            emit_untracked_status_entry(emit),
6171        )?;
6172        if let Some(profile) = profile.as_mut() {
6173            profile.untracked_elapsed_us = untracked_start.elapsed().as_micros();
6174            profile.emit();
6175        }
6176        return Ok(Some(()));
6177    }
6178
6179    if let Some(profile) = profile.as_mut() {
6180        profile.overlap_enabled = true;
6181    }
6182    let (tracked_control, untracked_paths, untracked_profile) =
6183        std::thread::scope(|scope| -> Result<_> {
6184            let untracked = spawn_status_worker(
6185                scope,
6186                "status-untracked",
6187                || -> Result<(Vec<Vec<u8>>, StatusProfileCounters)> {
6188                    let mut local_profile = StatusProfileCounters::default();
6189                    let mut ignores = IgnoreMatcher::from_worktree_base(worktree_root)?;
6190                    ignores.emit_memory_profile("after_untracked_ignore");
6191                    let start = Instant::now();
6192                    let paths = status_untracked_paths_from_borrowed_index(
6193                        worktree_root,
6194                        git_dir,
6195                        &borrowed,
6196                        &mut ignores,
6197                        untracked_mode,
6198                        profile_enabled.then_some(&mut local_profile),
6199                    )?;
6200                    status_profile_mem(
6201                        "after_untracked_collect",
6202                        &[
6203                            ("untracked_paths_len", paths.len()),
6204                            ("untracked_paths_cap", paths.capacity()),
6205                            (
6206                                "untracked_paths_cap_bytes",
6207                                paths.capacity() * std::mem::size_of::<Vec<u8>>(),
6208                            ),
6209                            (
6210                                "untracked_path_payload_bytes",
6211                                paths.iter().map(Vec::capacity).sum(),
6212                            ),
6213                        ],
6214                    );
6215                    local_profile.untracked_elapsed_us = start.elapsed().as_micros();
6216                    local_profile.untracked_rows = paths.len() as u64;
6217                    Ok((paths, local_profile))
6218                },
6219            )?;
6220            let tracked_start = Instant::now();
6221            let tracked_control =
6222                stream_short_status_borrowed_tracked_only_head_matches_index_parallel(
6223                    worktree_root,
6224                    git_dir,
6225                    format,
6226                    &borrowed,
6227                    &stat_cache,
6228                    sparse_checkout_active,
6229                    untracked_mode,
6230                    emit,
6231                )?;
6232            let tracked_elapsed_us = tracked_start.elapsed().as_micros();
6233            let (untracked_paths, untracked_profile) = untracked
6234                .join()
6235                .map_err(|_| GitError::Command("status worker panicked".into()))??;
6236            if let Some(profile) = profile.as_mut() {
6237                profile.tracked_elapsed_us = tracked_elapsed_us;
6238            }
6239            Ok((
6240                tracked_control,
6241                untracked_paths,
6242                profile_enabled.then_some(untracked_profile),
6243            ))
6244        })?;
6245    status_profile_mem(
6246        "after_join",
6247        &[
6248            ("untracked_paths_len", untracked_paths.len()),
6249            ("untracked_paths_cap", untracked_paths.capacity()),
6250            (
6251                "untracked_paths_cap_bytes",
6252                untracked_paths.capacity() * std::mem::size_of::<Vec<u8>>(),
6253            ),
6254            (
6255                "untracked_path_payload_bytes",
6256                untracked_paths.iter().map(Vec::capacity).sum(),
6257            ),
6258        ],
6259    );
6260    if tracked_control.is_stop() {
6261        if let Some(profile) = profile.as_mut()
6262            && let Some(untracked_profile) = untracked_profile
6263        {
6264            profile.merge_untracked(untracked_profile);
6265            profile.emit();
6266        }
6267        return Ok(Some(()));
6268    }
6269    if let Some(profile) = profile.as_mut()
6270        && let Some(untracked_profile) = untracked_profile
6271    {
6272        profile.merge_untracked(untracked_profile);
6273    }
6274    let render_start = Instant::now();
6275    for path in untracked_paths {
6276        let row = untracked_status_row(&path);
6277        if emit(row)?.is_stop() {
6278            break;
6279        }
6280    }
6281    if let Some(profile) = profile.as_mut() {
6282        profile.render_elapsed_us = render_start.elapsed().as_micros();
6283        profile.emit();
6284    }
6285    status_profile_mem("after_render", &[]);
6286    Ok(Some(()))
6287}
6288
6289fn short_status_borrowed_head_matches_index_count_if_possible(
6290    worktree_root: &Path,
6291    git_dir: &Path,
6292    format: ObjectFormat,
6293    db: &FileObjectDatabase,
6294    untracked_mode: StatusUntrackedMode,
6295) -> Result<Option<usize>> {
6296    let index_path = repository_index_path(git_dir);
6297    let index_metadata = match fs::metadata(&index_path) {
6298        Ok(metadata) => metadata,
6299        Err(err)
6300            if err.kind() == std::io::ErrorKind::NotFound
6301                && matches!(untracked_mode, StatusUntrackedMode::None) =>
6302        {
6303            return Ok(Some(0));
6304        }
6305        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None),
6306        Err(err) => return Err(err.into()),
6307    };
6308    let index_bytes = read_borrowed_index_bytes(&index_path)?;
6309    let borrowed = match BorrowedIndex::parse(index_bytes.as_ref(), format) {
6310        Ok(index) => index,
6311        Err(GitError::Unsupported(_)) => return Ok(None),
6312        Err(err) => return Err(err),
6313    };
6314    let sparse_checkout_active = sparse_checkout_active_for_borrowed_status(git_dir, &borrowed);
6315    if borrowed
6316        .entries
6317        .iter()
6318        .any(|entry| entry.mode == SPARSE_DIR_MODE && entry.is_skip_worktree())
6319    {
6320        return Ok(None);
6321    }
6322    let Some(head_tree_oid) = resolve_head_tree_oid(git_dir, format, db)? else {
6323        return Ok(None);
6324    };
6325    let stage0_entry_count = borrowed
6326        .entries
6327        .iter()
6328        .filter(|entry| entry.stage() == Stage::Normal)
6329        .count();
6330    if !head_matches_borrowed_index_from_cache_tree(
6331        &borrowed,
6332        format,
6333        &head_tree_oid,
6334        stage0_entry_count,
6335    )? {
6336        return Ok(None);
6337    }
6338
6339    let index_mtime = file_mtime_parts(&index_metadata);
6340    let stat_cache = IndexStatCache::from_index_mtime_only(index_mtime);
6341    let profile_enabled = StatusProfileCounters::enabled();
6342    let mut profile = profile_enabled.then(|| StatusProfileCounters {
6343        fast_path_borrowed: true,
6344        ..StatusProfileCounters::default()
6345    });
6346
6347    if matches!(untracked_mode, StatusUntrackedMode::None) {
6348        let tracked_start = Instant::now();
6349        let count = short_status_borrowed_tracked_only_head_matches_index_count_parallel(
6350            worktree_root,
6351            git_dir,
6352            format,
6353            &borrowed,
6354            &stat_cache,
6355            sparse_checkout_active,
6356            untracked_mode,
6357        )?;
6358        if let Some(profile) = profile.as_mut() {
6359            profile.tracked_elapsed_us = tracked_start.elapsed().as_micros();
6360            profile.emit();
6361        }
6362        return Ok(Some(count));
6363    }
6364
6365    if stage0_entry_count < STATUS_BORROWED_OVERLAP_MIN_STAGE0 {
6366        let tracked_start = Instant::now();
6367        let tracked_count = short_status_borrowed_tracked_only_head_matches_index_count_parallel(
6368            worktree_root,
6369            git_dir,
6370            format,
6371            &borrowed,
6372            &stat_cache,
6373            sparse_checkout_active,
6374            untracked_mode,
6375        )?;
6376        if let Some(profile) = profile.as_mut() {
6377            profile.tracked_elapsed_us = tracked_start.elapsed().as_micros();
6378        }
6379        let mut ignores = IgnoreMatcher::from_worktree_base(worktree_root)?;
6380        let untracked_start = Instant::now();
6381        let untracked_count = status_untracked_count_from_borrowed_index(
6382            worktree_root,
6383            git_dir,
6384            &borrowed,
6385            &mut ignores,
6386            untracked_mode,
6387            profile.as_mut(),
6388        )?;
6389        if let Some(profile) = profile.as_mut() {
6390            profile.untracked_elapsed_us = untracked_start.elapsed().as_micros();
6391            profile.untracked_rows = untracked_count as u64;
6392            profile.emit();
6393        }
6394        return Ok(Some(tracked_count + untracked_count));
6395    }
6396
6397    if let Some(profile) = profile.as_mut() {
6398        profile.overlap_enabled = true;
6399    }
6400    let (tracked_count, untracked_count, untracked_profile) =
6401        std::thread::scope(|scope| -> Result<_> {
6402            let tracked = spawn_status_worker(scope, "status-tracked", || {
6403                let start = Instant::now();
6404                short_status_borrowed_tracked_only_head_matches_index_count_parallel(
6405                    worktree_root,
6406                    git_dir,
6407                    format,
6408                    &borrowed,
6409                    &stat_cache,
6410                    sparse_checkout_active,
6411                    untracked_mode,
6412                )
6413                .map(|count| (count, start.elapsed().as_micros()))
6414            })?;
6415            let untracked = spawn_status_worker(
6416                scope,
6417                "status-untracked",
6418                || -> Result<(usize, StatusProfileCounters)> {
6419                    let mut local_profile = StatusProfileCounters::default();
6420                    let mut ignores = IgnoreMatcher::from_worktree_base(worktree_root)?;
6421                    let start = Instant::now();
6422                    let count = status_untracked_count_from_borrowed_index(
6423                        worktree_root,
6424                        git_dir,
6425                        &borrowed,
6426                        &mut ignores,
6427                        untracked_mode,
6428                        profile_enabled.then_some(&mut local_profile),
6429                    )?;
6430                    local_profile.untracked_elapsed_us = start.elapsed().as_micros();
6431                    local_profile.untracked_rows = count as u64;
6432                    Ok((count, local_profile))
6433                },
6434            )?;
6435            let (tracked_count, tracked_elapsed_us) = tracked
6436                .join()
6437                .map_err(|_| GitError::Command("status worker panicked".into()))??;
6438            let (untracked_count, untracked_profile) = untracked
6439                .join()
6440                .map_err(|_| GitError::Command("status worker panicked".into()))??;
6441            if let Some(profile) = profile.as_mut() {
6442                profile.tracked_elapsed_us = tracked_elapsed_us;
6443            }
6444            Ok((
6445                tracked_count,
6446                untracked_count,
6447                profile_enabled.then_some(untracked_profile),
6448            ))
6449        })?;
6450    if let Some(profile) = profile.as_mut() {
6451        if let Some(untracked_profile) = untracked_profile {
6452            profile.merge_untracked(untracked_profile);
6453        }
6454        profile.emit();
6455    }
6456    Ok(Some(tracked_count + untracked_count))
6457}
6458
6459fn emit_untracked_status_entry<'a, F>(
6460    emit: &'a mut F,
6461) -> impl FnMut(&[u8]) -> Result<StreamControl> + 'a
6462where
6463    F: for<'row> FnMut(ShortStatusRow<'row>) -> Result<StreamControl>,
6464{
6465    |path| emit(untracked_status_row(path))
6466}
6467
6468fn untracked_status_entry(path: Vec<u8>) -> ShortStatusEntry {
6469    ShortStatusEntry {
6470        index: b'?',
6471        worktree: b'?',
6472        path,
6473        head_mode: None,
6474        index_mode: None,
6475        worktree_mode: None,
6476        head_oid: None,
6477        index_oid: None,
6478        submodule: None,
6479    }
6480}
6481
6482fn untracked_status_row(path: &[u8]) -> ShortStatusRow<'_> {
6483    ShortStatusRow {
6484        index: b'?',
6485        worktree: b'?',
6486        path,
6487        head_mode: None,
6488        index_mode: None,
6489        worktree_mode: None,
6490        head_oid: None,
6491        index_oid: None,
6492        submodule: None,
6493    }
6494}
6495
6496fn append_untracked_status_entries(
6497    entries: &mut Vec<ShortStatusEntry>,
6498    untracked_paths: Vec<Vec<u8>>,
6499) {
6500    for path in untracked_paths {
6501        entries.push(untracked_status_entry(path));
6502    }
6503}
6504
6505#[derive(Debug, Clone, Copy)]
6506enum TrackedOnlyPrecheck {
6507    Deleted(usize),
6508    Slow(usize),
6509}
6510
6511#[derive(Debug)]
6512enum TrackedOnlyPrecheckOutcome {
6513    Clean,
6514    Deleted,
6515    Slow,
6516}
6517
6518fn short_status_tracked_only_head_matches_index_parallel(
6519    worktree_root: &Path,
6520    git_dir: &Path,
6521    format: ObjectFormat,
6522    index: &Index,
6523    stat_cache: &IndexStatCache,
6524    sparse_checkout_active: bool,
6525    untracked_mode: StatusUntrackedMode,
6526) -> Result<Vec<ShortStatusEntry>> {
6527    let prechecks = tracked_only_non_clean_prechecks_parallel(
6528        worktree_root,
6529        index,
6530        stat_cache,
6531        sparse_checkout_active,
6532    )?;
6533
6534    let mut clean_filter = None;
6535    let mut entries = Vec::new();
6536    for precheck in prechecks {
6537        match precheck {
6538            TrackedOnlyPrecheck::Deleted(idx) => {
6539                let entry = &index.entries[idx];
6540                if entry.is_intent_to_add() {
6541                    continue;
6542                }
6543                let path = entry.path.as_bytes();
6544                entries.push(ShortStatusEntry {
6545                    index: b' ',
6546                    worktree: b'D',
6547                    path: path.to_vec(),
6548                    head_mode: Some(entry.mode),
6549                    index_mode: Some(entry.mode),
6550                    worktree_mode: None,
6551                    head_oid: Some(entry.oid),
6552                    index_oid: Some(entry.oid),
6553                    submodule: None,
6554                });
6555            }
6556            TrackedOnlyPrecheck::Slow(idx) => {
6557                let entry = &index.entries[idx];
6558                let path = entry.path.as_bytes();
6559                let index_entry = TrackedEntry {
6560                    mode: entry.mode,
6561                    oid: entry.oid,
6562                };
6563                let worktree_entry = worktree_entry_for_index_entry_with_attributes(
6564                    worktree_root,
6565                    git_dir,
6566                    format,
6567                    entry,
6568                    stat_cache,
6569                    &mut clean_filter,
6570                )?;
6571                let submodule = tracked_only_submodule_status(
6572                    worktree_root,
6573                    path,
6574                    &index_entry,
6575                    worktree_entry.as_ref(),
6576                    untracked_mode,
6577                )?;
6578                let worktree_code = match worktree_entry.as_ref() {
6579                    None if entry.is_intent_to_add() => b' ',
6580                    None => b'D',
6581                    Some(_) if entry.is_intent_to_add() => b'A',
6582                    Some(worktree_entry) if *worktree_entry != index_entry => b'M',
6583                    _ if submodule.is_some_and(|sub| sub.any()) => b'M',
6584                    _ => b' ',
6585                };
6586                if worktree_code != b' ' {
6587                    entries.push(ShortStatusEntry {
6588                        index: b' ',
6589                        worktree: worktree_code,
6590                        path: path.to_vec(),
6591                        head_mode: (!entry.is_intent_to_add()).then_some(index_entry.mode),
6592                        index_mode: (!entry.is_intent_to_add()).then_some(index_entry.mode),
6593                        worktree_mode: worktree_entry.as_ref().map(|entry| entry.mode),
6594                        head_oid: (!entry.is_intent_to_add()).then_some(index_entry.oid),
6595                        index_oid: (!entry.is_intent_to_add()).then_some(index_entry.oid),
6596                        submodule: submodule.filter(|sub| sub.any()),
6597                    });
6598                }
6599            }
6600        }
6601    }
6602    entries.sort_by(|left, right| {
6603        status_sort_category(left)
6604            .cmp(&status_sort_category(right))
6605            .then_with(|| left.path.cmp(&right.path))
6606    });
6607    Ok(entries)
6608}
6609
6610fn short_status_borrowed_tracked_only_head_matches_index_parallel(
6611    worktree_root: &Path,
6612    git_dir: &Path,
6613    format: ObjectFormat,
6614    index: &BorrowedIndex<'_>,
6615    stat_cache: &IndexStatCache,
6616    sparse_checkout_active: bool,
6617    untracked_mode: StatusUntrackedMode,
6618) -> Result<Vec<ShortStatusEntry>> {
6619    let prechecks = tracked_only_borrowed_non_clean_prechecks_parallel(
6620        worktree_root,
6621        index,
6622        stat_cache,
6623        sparse_checkout_active,
6624    )?;
6625
6626    let mut clean_filter = None;
6627    let mut entries = Vec::new();
6628    for precheck in prechecks {
6629        match precheck {
6630            TrackedOnlyPrecheck::Deleted(idx) => {
6631                let entry = &index.entries[idx];
6632                if entry.is_intent_to_add() {
6633                    continue;
6634                }
6635                entries.push(ShortStatusEntry {
6636                    index: b' ',
6637                    worktree: b'D',
6638                    path: entry.path.to_vec(),
6639                    head_mode: Some(entry.mode),
6640                    index_mode: Some(entry.mode),
6641                    worktree_mode: None,
6642                    head_oid: Some(entry.oid),
6643                    index_oid: Some(entry.oid),
6644                    submodule: None,
6645                });
6646            }
6647            TrackedOnlyPrecheck::Slow(idx) => {
6648                let entry = &index.entries[idx];
6649                let index_entry = TrackedEntry {
6650                    mode: entry.mode,
6651                    oid: entry.oid,
6652                };
6653                let worktree_entry = worktree_entry_for_index_entry_ref_with_attributes(
6654                    worktree_root,
6655                    git_dir,
6656                    format,
6657                    entry,
6658                    stat_cache,
6659                    &mut clean_filter,
6660                )?;
6661                let submodule = tracked_only_submodule_status(
6662                    worktree_root,
6663                    entry.path,
6664                    &index_entry,
6665                    worktree_entry.as_ref(),
6666                    untracked_mode,
6667                )?;
6668                let worktree_code = match worktree_entry.as_ref() {
6669                    None if entry.is_intent_to_add() => b' ',
6670                    None => b'D',
6671                    Some(_) if entry.is_intent_to_add() => b'A',
6672                    Some(worktree_entry) if *worktree_entry != index_entry => b'M',
6673                    _ if submodule.is_some_and(|sub| sub.any()) => b'M',
6674                    _ => b' ',
6675                };
6676                if worktree_code != b' ' {
6677                    entries.push(ShortStatusEntry {
6678                        index: b' ',
6679                        worktree: worktree_code,
6680                        path: entry.path.to_vec(),
6681                        head_mode: (!entry.is_intent_to_add()).then_some(index_entry.mode),
6682                        index_mode: (!entry.is_intent_to_add()).then_some(index_entry.mode),
6683                        worktree_mode: worktree_entry.as_ref().map(|entry| entry.mode),
6684                        head_oid: (!entry.is_intent_to_add()).then_some(index_entry.oid),
6685                        index_oid: (!entry.is_intent_to_add()).then_some(index_entry.oid),
6686                        submodule: submodule.filter(|sub| sub.any()),
6687                    });
6688                }
6689            }
6690        }
6691    }
6692    entries.sort_by(|left, right| {
6693        status_sort_category(left)
6694            .cmp(&status_sort_category(right))
6695            .then_with(|| left.path.cmp(&right.path))
6696    });
6697    Ok(entries)
6698}
6699
6700fn stream_short_status_borrowed_tracked_only_head_matches_index_parallel<F>(
6701    worktree_root: &Path,
6702    git_dir: &Path,
6703    format: ObjectFormat,
6704    index: &BorrowedIndex<'_>,
6705    stat_cache: &IndexStatCache,
6706    sparse_checkout_active: bool,
6707    untracked_mode: StatusUntrackedMode,
6708    emit: &mut F,
6709) -> Result<StreamControl>
6710where
6711    F: for<'a> FnMut(ShortStatusRow<'a>) -> Result<StreamControl>,
6712{
6713    let prechecks = tracked_only_borrowed_non_clean_prechecks_parallel(
6714        worktree_root,
6715        index,
6716        stat_cache,
6717        sparse_checkout_active,
6718    )?;
6719
6720    let mut clean_filter = None;
6721    for precheck in prechecks {
6722        match precheck {
6723            TrackedOnlyPrecheck::Deleted(idx) => {
6724                let entry = &index.entries[idx];
6725                if entry.is_intent_to_add() {
6726                    continue;
6727                }
6728                if emit(ShortStatusRow {
6729                    index: b' ',
6730                    worktree: b'D',
6731                    path: entry.path,
6732                    head_mode: Some(entry.mode),
6733                    index_mode: Some(entry.mode),
6734                    worktree_mode: None,
6735                    head_oid: Some(entry.oid),
6736                    index_oid: Some(entry.oid),
6737                    submodule: None,
6738                })?
6739                .is_stop()
6740                {
6741                    return Ok(StreamControl::Stop);
6742                }
6743            }
6744            TrackedOnlyPrecheck::Slow(idx) => {
6745                let entry = &index.entries[idx];
6746                let index_entry = TrackedEntry {
6747                    mode: entry.mode,
6748                    oid: entry.oid,
6749                };
6750                let worktree_entry = worktree_entry_for_index_entry_ref_with_attributes(
6751                    worktree_root,
6752                    git_dir,
6753                    format,
6754                    entry,
6755                    stat_cache,
6756                    &mut clean_filter,
6757                )?;
6758                let submodule = tracked_only_submodule_status(
6759                    worktree_root,
6760                    entry.path,
6761                    &index_entry,
6762                    worktree_entry.as_ref(),
6763                    untracked_mode,
6764                )?;
6765                let worktree_code = match worktree_entry.as_ref() {
6766                    None if entry.is_intent_to_add() => b' ',
6767                    None => b'D',
6768                    Some(_) if entry.is_intent_to_add() => b'A',
6769                    Some(worktree_entry) if *worktree_entry != index_entry => b'M',
6770                    _ if submodule.is_some_and(|sub| sub.any()) => b'M',
6771                    _ => b' ',
6772                };
6773                if worktree_code != b' ' {
6774                    if emit(ShortStatusRow {
6775                        index: b' ',
6776                        worktree: worktree_code,
6777                        path: entry.path,
6778                        head_mode: (!entry.is_intent_to_add()).then_some(index_entry.mode),
6779                        index_mode: (!entry.is_intent_to_add()).then_some(index_entry.mode),
6780                        worktree_mode: worktree_entry.as_ref().map(|entry| entry.mode),
6781                        head_oid: (!entry.is_intent_to_add()).then_some(index_entry.oid),
6782                        index_oid: (!entry.is_intent_to_add()).then_some(index_entry.oid),
6783                        submodule: submodule.filter(|sub| sub.any()),
6784                    })?
6785                    .is_stop()
6786                    {
6787                        return Ok(StreamControl::Stop);
6788                    }
6789                }
6790            }
6791        }
6792    }
6793    Ok(StreamControl::Continue)
6794}
6795
6796fn short_status_borrowed_tracked_only_head_matches_index_count_parallel(
6797    worktree_root: &Path,
6798    git_dir: &Path,
6799    format: ObjectFormat,
6800    index: &BorrowedIndex<'_>,
6801    stat_cache: &IndexStatCache,
6802    sparse_checkout_active: bool,
6803    untracked_mode: StatusUntrackedMode,
6804) -> Result<usize> {
6805    let prechecks = tracked_only_borrowed_non_clean_prechecks_parallel(
6806        worktree_root,
6807        index,
6808        stat_cache,
6809        sparse_checkout_active,
6810    )?;
6811
6812    let mut clean_filter = None;
6813    let mut count = 0usize;
6814    for precheck in prechecks {
6815        match precheck {
6816            TrackedOnlyPrecheck::Deleted(_) => count += 1,
6817            TrackedOnlyPrecheck::Slow(idx) => {
6818                let entry = &index.entries[idx];
6819                let index_entry = TrackedEntry {
6820                    mode: entry.mode,
6821                    oid: entry.oid,
6822                };
6823                let worktree_entry = worktree_entry_for_index_entry_ref_with_attributes(
6824                    worktree_root,
6825                    git_dir,
6826                    format,
6827                    entry,
6828                    stat_cache,
6829                    &mut clean_filter,
6830                )?;
6831                let submodule = tracked_only_submodule_status(
6832                    worktree_root,
6833                    entry.path,
6834                    &index_entry,
6835                    worktree_entry.as_ref(),
6836                    untracked_mode,
6837                )?;
6838                let worktree_code = match worktree_entry.as_ref() {
6839                    None => b'D',
6840                    Some(worktree_entry) if *worktree_entry != index_entry => b'M',
6841                    _ if submodule.is_some_and(|sub| sub.any()) => b'M',
6842                    _ => b' ',
6843                };
6844                if worktree_code != b' ' {
6845                    count += 1;
6846                }
6847            }
6848        }
6849    }
6850    Ok(count)
6851}
6852
6853fn short_status_tracked_only_with_head_parallel(
6854    worktree_root: &Path,
6855    git_dir: &Path,
6856    format: ObjectFormat,
6857    index: &Index,
6858    stat_cache: &IndexStatCache,
6859    head: &BTreeMap<Vec<u8>, TrackedEntry>,
6860    sparse_checkout_active: bool,
6861    untracked_mode: StatusUntrackedMode,
6862) -> Result<Vec<ShortStatusEntry>> {
6863    let prechecks = tracked_only_non_clean_prechecks_parallel(
6864        worktree_root,
6865        index,
6866        stat_cache,
6867        sparse_checkout_active,
6868    )?;
6869    let mut precheck_cursor = 0usize;
6870    let mut clean_filter = None;
6871    let mut entries = Vec::new();
6872
6873    for (idx, entry) in index.entries.iter().enumerate() {
6874        if entry.stage() != Stage::Normal {
6875            continue;
6876        }
6877        let path = entry.path.as_bytes();
6878        let index_entry = TrackedEntry {
6879            mode: entry.mode,
6880            oid: entry.oid,
6881        };
6882        let head_entry = head.get(path);
6883        let visible_index_entry = (!entry.is_intent_to_add()).then_some(&index_entry);
6884        let index_code = match (head_entry, visible_index_entry) {
6885            (None, Some(_)) => b'A',
6886            (Some(_), None) => b'D',
6887            (Some(head_entry), Some(index_entry)) if *head_entry != *index_entry => b'M',
6888            _ => b' ',
6889        };
6890        let precheck = prechecks
6891            .get(precheck_cursor)
6892            .copied()
6893            .and_then(|precheck| {
6894                if tracked_only_precheck_index(precheck) == idx {
6895                    precheck_cursor += 1;
6896                    Some(precheck)
6897                } else {
6898                    None
6899                }
6900            });
6901        let (worktree_code, worktree_mode, submodule) = match precheck {
6902            None if entry.is_intent_to_add() => (b' ', None, None),
6903            None => (b' ', Some(index_entry.mode), None),
6904            Some(TrackedOnlyPrecheck::Deleted(_)) if entry.is_intent_to_add() => {
6905                (b' ', None, None)
6906            }
6907            Some(TrackedOnlyPrecheck::Deleted(_)) => (b'D', None, None),
6908            Some(TrackedOnlyPrecheck::Slow(_)) => {
6909                let worktree_entry = worktree_entry_for_index_entry_with_attributes(
6910                    worktree_root,
6911                    git_dir,
6912                    format,
6913                    entry,
6914                    stat_cache,
6915                    &mut clean_filter,
6916                )?;
6917                let submodule = tracked_only_submodule_status(
6918                    worktree_root,
6919                    path,
6920                    &index_entry,
6921                    worktree_entry.as_ref(),
6922                    untracked_mode,
6923                )?;
6924                let worktree_code = match worktree_entry.as_ref() {
6925                    None if entry.is_intent_to_add() => b' ',
6926                    None => b'D',
6927                    Some(_) if entry.is_intent_to_add() => b'A',
6928                    Some(worktree_entry) if *worktree_entry != index_entry => b'M',
6929                    _ if submodule.is_some_and(|sub| sub.any()) => b'M',
6930                    _ => b' ',
6931                };
6932                (
6933                    worktree_code,
6934                    worktree_entry.as_ref().map(|entry| entry.mode),
6935                    submodule.filter(|sub| sub.any()),
6936                )
6937            }
6938        };
6939        if index_code != b' ' || worktree_code != b' ' {
6940            entries.push(ShortStatusEntry {
6941                index: index_code,
6942                worktree: worktree_code,
6943                path: path.to_vec(),
6944                head_mode: head_entry.map(|entry| entry.mode),
6945                index_mode: visible_index_entry.map(|entry| entry.mode),
6946                worktree_mode,
6947                head_oid: head_entry.map(|entry| entry.oid),
6948                index_oid: visible_index_entry.map(|entry| entry.oid),
6949                submodule,
6950            });
6951        }
6952    }
6953
6954    let index_paths = index
6955        .entries
6956        .iter()
6957        .filter(|entry| entry.stage() == Stage::Normal)
6958        .map(|entry| entry.path.as_bytes().to_vec())
6959        .collect::<HashSet<_>>();
6960    for (path, head_entry) in head {
6961        if index_paths.contains(path.as_slice()) {
6962            continue;
6963        }
6964        entries.push(ShortStatusEntry {
6965            index: b'D',
6966            worktree: b' ',
6967            path: path.clone(),
6968            head_mode: Some(head_entry.mode),
6969            index_mode: None,
6970            worktree_mode: None,
6971            head_oid: Some(head_entry.oid),
6972            index_oid: None,
6973            submodule: None,
6974        });
6975    }
6976    entries.sort_by(|left, right| {
6977        status_sort_category(left)
6978            .cmp(&status_sort_category(right))
6979            .then_with(|| left.path.cmp(&right.path))
6980    });
6981    Ok(entries)
6982}
6983
6984fn tracked_only_precheck_index(precheck: TrackedOnlyPrecheck) -> usize {
6985    match precheck {
6986        TrackedOnlyPrecheck::Deleted(idx) | TrackedOnlyPrecheck::Slow(idx) => idx,
6987    }
6988}
6989
6990fn stage0_index_entry_count<E>(entries: &[E], mut stage: impl FnMut(&E) -> Stage) -> usize {
6991    entries
6992        .iter()
6993        .filter(|entry| stage(entry) == Stage::Normal)
6994        .count()
6995}
6996
6997fn stage0_index_chunk_ranges<E>(
6998    entries: &[E],
6999    chunk_size: usize,
7000    mut stage: impl FnMut(&E) -> Stage,
7001) -> Vec<std::ops::Range<usize>> {
7002    debug_assert!(chunk_size > 0);
7003    let mut ranges = Vec::new();
7004    let mut start = None;
7005    let mut end = 0usize;
7006    let mut normals_in_chunk = 0usize;
7007    for (idx, entry) in entries.iter().enumerate() {
7008        if stage(entry) != Stage::Normal {
7009            continue;
7010        }
7011        if start.is_none() {
7012            start = Some(idx);
7013        }
7014        end = idx + 1;
7015        normals_in_chunk += 1;
7016        if normals_in_chunk == chunk_size {
7017            ranges.push(start.expect("chunk start must exist")..end);
7018            start = None;
7019            normals_in_chunk = 0;
7020        }
7021    }
7022    if let Some(start) = start {
7023        ranges.push(start..end);
7024    }
7025    ranges
7026}
7027
7028fn tracked_only_non_clean_prechecks_parallel(
7029    worktree_root: &Path,
7030    index: &Index,
7031    stat_cache: &IndexStatCache,
7032    sparse_checkout_active: bool,
7033) -> Result<Vec<TrackedOnlyPrecheck>> {
7034    let normal_count = stage0_index_entry_count(&index.entries, IndexEntry::stage);
7035    if normal_count == 0 {
7036        return Ok(Vec::new());
7037    }
7038    let max_workers = std::thread::available_parallelism()
7039        .map(|count| count.get())
7040        .unwrap_or(1)
7041        .min(4);
7042    let worker_count = max_workers.min(normal_count.div_ceil(512)).max(1);
7043    if worker_count == 1 {
7044        let mut prechecks = Vec::new();
7045        let mut absolute = PathBuf::new();
7046        for (idx, entry) in index.entries.iter().enumerate() {
7047            if entry.stage() != Stage::Normal {
7048                continue;
7049            }
7050            match tracked_only_stat_precheck(
7051                worktree_root,
7052                entry,
7053                stat_cache,
7054                sparse_checkout_active,
7055                &mut absolute,
7056            )? {
7057                TrackedOnlyPrecheckOutcome::Clean => {}
7058                TrackedOnlyPrecheckOutcome::Deleted => {
7059                    prechecks.push(TrackedOnlyPrecheck::Deleted(idx));
7060                }
7061                TrackedOnlyPrecheckOutcome::Slow => {
7062                    prechecks.push(TrackedOnlyPrecheck::Slow(idx));
7063                }
7064            }
7065        }
7066        return Ok(prechecks);
7067    }
7068    let chunk_size = normal_count.div_ceil(worker_count);
7069    let chunk_ranges = stage0_index_chunk_ranges(&index.entries, chunk_size, IndexEntry::stage);
7070    let mut prechecks = std::thread::scope(|scope| -> Result<Vec<TrackedOnlyPrecheck>> {
7071        let mut handles = Vec::new();
7072        for range in chunk_ranges {
7073            handles.push(spawn_status_worker(
7074                scope,
7075                "status-precheck",
7076                move || -> Result<Vec<TrackedOnlyPrecheck>> {
7077                    let mut prechecks = Vec::new();
7078                    let mut absolute = PathBuf::new();
7079                    for idx in range {
7080                        let entry = &index.entries[idx];
7081                        if entry.stage() != Stage::Normal {
7082                            continue;
7083                        }
7084                        match tracked_only_stat_precheck(
7085                            worktree_root,
7086                            entry,
7087                            stat_cache,
7088                            sparse_checkout_active,
7089                            &mut absolute,
7090                        )? {
7091                            TrackedOnlyPrecheckOutcome::Clean => {}
7092                            TrackedOnlyPrecheckOutcome::Deleted => {
7093                                prechecks.push(TrackedOnlyPrecheck::Deleted(idx));
7094                            }
7095                            TrackedOnlyPrecheckOutcome::Slow => {
7096                                prechecks.push(TrackedOnlyPrecheck::Slow(idx));
7097                            }
7098                        }
7099                    }
7100                    Ok(prechecks)
7101                },
7102            )?);
7103        }
7104        let mut prechecks = Vec::new();
7105        for handle in handles {
7106            let mut chunk = handle
7107                .join()
7108                .map_err(|_| GitError::Command("status worker panicked".into()))??;
7109            prechecks.append(&mut chunk);
7110        }
7111        Ok(prechecks)
7112    })?;
7113    prechecks.sort_by_key(|precheck| tracked_only_precheck_index(*precheck));
7114    Ok(prechecks)
7115}
7116
7117fn tracked_only_borrowed_non_clean_prechecks_parallel(
7118    worktree_root: &Path,
7119    index: &BorrowedIndex<'_>,
7120    stat_cache: &IndexStatCache,
7121    sparse_checkout_active: bool,
7122) -> Result<Vec<TrackedOnlyPrecheck>> {
7123    let normal_count = stage0_index_entry_count(&index.entries, IndexEntryRef::stage);
7124    if normal_count == 0 {
7125        return Ok(Vec::new());
7126    }
7127    let max_workers = std::thread::available_parallelism()
7128        .map(|count| count.get())
7129        .unwrap_or(1)
7130        .min(4);
7131    let worker_count = max_workers.min(normal_count.div_ceil(512)).max(1);
7132    if worker_count == 1 {
7133        let mut prechecks = Vec::new();
7134        let mut absolute = PathBuf::new();
7135        for (idx, entry) in index.entries.iter().enumerate() {
7136            if entry.stage() != Stage::Normal {
7137                continue;
7138            }
7139            match tracked_only_borrowed_stat_precheck(
7140                worktree_root,
7141                entry,
7142                stat_cache,
7143                sparse_checkout_active,
7144                &mut absolute,
7145            )? {
7146                TrackedOnlyPrecheckOutcome::Clean => {}
7147                TrackedOnlyPrecheckOutcome::Deleted => {
7148                    prechecks.push(TrackedOnlyPrecheck::Deleted(idx));
7149                }
7150                TrackedOnlyPrecheckOutcome::Slow => {
7151                    prechecks.push(TrackedOnlyPrecheck::Slow(idx));
7152                }
7153            }
7154        }
7155        return Ok(prechecks);
7156    }
7157    let chunk_size = normal_count.div_ceil(worker_count);
7158    let chunk_ranges = stage0_index_chunk_ranges(&index.entries, chunk_size, IndexEntryRef::stage);
7159    let mut prechecks = std::thread::scope(|scope| -> Result<Vec<TrackedOnlyPrecheck>> {
7160        let mut handles = Vec::new();
7161        for range in chunk_ranges {
7162            handles.push(spawn_status_worker(
7163                scope,
7164                "status-precheck",
7165                move || -> Result<Vec<TrackedOnlyPrecheck>> {
7166                    let mut prechecks = Vec::new();
7167                    let mut absolute = PathBuf::new();
7168                    for idx in range {
7169                        let entry = &index.entries[idx];
7170                        if entry.stage() != Stage::Normal {
7171                            continue;
7172                        }
7173                        match tracked_only_borrowed_stat_precheck(
7174                            worktree_root,
7175                            entry,
7176                            stat_cache,
7177                            sparse_checkout_active,
7178                            &mut absolute,
7179                        )? {
7180                            TrackedOnlyPrecheckOutcome::Clean => {}
7181                            TrackedOnlyPrecheckOutcome::Deleted => {
7182                                prechecks.push(TrackedOnlyPrecheck::Deleted(idx));
7183                            }
7184                            TrackedOnlyPrecheckOutcome::Slow => {
7185                                prechecks.push(TrackedOnlyPrecheck::Slow(idx));
7186                            }
7187                        }
7188                    }
7189                    Ok(prechecks)
7190                },
7191            )?);
7192        }
7193        let mut prechecks = Vec::new();
7194        for handle in handles {
7195            let mut chunk = handle
7196                .join()
7197                .map_err(|_| GitError::Command("status worker panicked".into()))??;
7198            prechecks.append(&mut chunk);
7199        }
7200        Ok(prechecks)
7201    })?;
7202    prechecks.sort_by_key(|precheck| tracked_only_precheck_index(*precheck));
7203    Ok(prechecks)
7204}
7205
7206fn tracked_only_stat_precheck(
7207    worktree_root: &Path,
7208    index_entry: &IndexEntry,
7209    stat_cache: &IndexStatCache,
7210    sparse_checkout_active: bool,
7211    absolute: &mut PathBuf,
7212) -> Result<TrackedOnlyPrecheckOutcome> {
7213    if sley_index::is_gitlink(index_entry.mode) {
7214        return Ok(TrackedOnlyPrecheckOutcome::Slow);
7215    }
7216    let git_path = index_entry.path.as_bytes();
7217    set_worktree_path_from_repo_path(worktree_root, git_path, absolute)?;
7218    let metadata = match fs::symlink_metadata(&absolute) {
7219        Ok(metadata) => metadata,
7220        Err(err)
7221            if matches!(
7222                err.kind(),
7223                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
7224            ) =>
7225        {
7226            if sparse_checkout_active && index_entry.is_skip_worktree() {
7227                return Ok(TrackedOnlyPrecheckOutcome::Clean);
7228            }
7229            return Ok(TrackedOnlyPrecheckOutcome::Deleted);
7230        }
7231        Err(err) => return Err(err.into()),
7232    };
7233    let file_type = metadata.file_type();
7234    if file_type.is_dir() || !(file_type.is_file() || file_type.is_symlink()) {
7235        return Ok(TrackedOnlyPrecheckOutcome::Slow);
7236    }
7237    if stat_cache
7238        .reuse_index_entry(index_entry, &metadata)
7239        .is_some()
7240    {
7241        Ok(TrackedOnlyPrecheckOutcome::Clean)
7242    } else {
7243        Ok(TrackedOnlyPrecheckOutcome::Slow)
7244    }
7245}
7246
7247fn tracked_only_borrowed_stat_precheck(
7248    worktree_root: &Path,
7249    index_entry: &IndexEntryRef<'_>,
7250    stat_cache: &IndexStatCache,
7251    sparse_checkout_active: bool,
7252    absolute: &mut PathBuf,
7253) -> Result<TrackedOnlyPrecheckOutcome> {
7254    if sley_index::is_gitlink(index_entry.mode) {
7255        return Ok(TrackedOnlyPrecheckOutcome::Slow);
7256    }
7257    set_worktree_path_from_repo_path(worktree_root, index_entry.path, absolute)?;
7258    let metadata = match fs::symlink_metadata(&absolute) {
7259        Ok(metadata) => metadata,
7260        Err(err)
7261            if matches!(
7262                err.kind(),
7263                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
7264            ) =>
7265        {
7266            if sparse_checkout_active && index_entry.is_skip_worktree() {
7267                return Ok(TrackedOnlyPrecheckOutcome::Clean);
7268            }
7269            return Ok(TrackedOnlyPrecheckOutcome::Deleted);
7270        }
7271        Err(err) => return Err(err.into()),
7272    };
7273    let file_type = metadata.file_type();
7274    if file_type.is_dir() || !(file_type.is_file() || file_type.is_symlink()) {
7275        return Ok(TrackedOnlyPrecheckOutcome::Slow);
7276    }
7277    if stat_cache
7278        .reuse_index_entry_ref(index_entry, &metadata)
7279        .is_some()
7280    {
7281        Ok(TrackedOnlyPrecheckOutcome::Clean)
7282    } else {
7283        Ok(TrackedOnlyPrecheckOutcome::Slow)
7284    }
7285}
7286
7287fn set_worktree_path_from_repo_path(
7288    worktree_root: &Path,
7289    git_path: &[u8],
7290    out: &mut PathBuf,
7291) -> Result<()> {
7292    out.clear();
7293    out.push(worktree_root);
7294    push_repo_path(out, git_path)
7295}
7296
7297#[cfg(unix)]
7298fn push_repo_path(out: &mut PathBuf, path: &[u8]) -> Result<()> {
7299    use std::os::unix::ffi::OsStrExt;
7300
7301    out.push(Path::new(std::ffi::OsStr::from_bytes(path)));
7302    Ok(())
7303}
7304
7305#[cfg(not(unix))]
7306fn push_repo_path(out: &mut PathBuf, path: &[u8]) -> Result<()> {
7307    let path = std::str::from_utf8(path)
7308        .map_err(|_| GitError::InvalidPath("index path is not utf8".into()))?;
7309    for component in path.split('/') {
7310        out.push(component);
7311    }
7312    Ok(())
7313}
7314
7315fn tracked_only_submodule_status(
7316    worktree_root: &Path,
7317    path: &[u8],
7318    index_entry: &TrackedEntry,
7319    worktree_entry: Option<&TrackedEntry>,
7320    _untracked_mode: StatusUntrackedMode,
7321) -> Result<Option<SubmoduleStatus>> {
7322    let Some(worktree_entry) = worktree_entry else {
7323        return Ok(None);
7324    };
7325    if !sley_index::is_gitlink(index_entry.mode) || !sley_index::is_gitlink(worktree_entry.mode) {
7326        return Ok(None);
7327    }
7328    let absolute = worktree_root.join(repo_path_to_os_path(path)?);
7329    let dirt = if absolute.is_dir() {
7330        submodule_dirt(&absolute)
7331    } else {
7332        0
7333    };
7334    Ok(Some(SubmoduleStatus {
7335        new_commits: index_entry.oid != worktree_entry.oid,
7336        modified_content: dirt & DIRTY_SUBMODULE_MODIFIED != 0,
7337        untracked_content: dirt & DIRTY_SUBMODULE_UNTRACKED != 0,
7338    }))
7339}
7340
7341fn status_sort_category(entry: &ShortStatusEntry) -> u8 {
7342    match (entry.index, entry.worktree) {
7343        (b'?', b'?') => 1,
7344        (b'!', b'!') => 2,
7345        _ => 0,
7346    }
7347}
7348
7349pub fn untracked_paths(
7350    worktree_root: impl AsRef<Path>,
7351    git_dir: impl AsRef<Path>,
7352    format: ObjectFormat,
7353) -> Result<Vec<Vec<u8>>> {
7354    untracked_paths_with_options(
7355        worktree_root,
7356        git_dir,
7357        format,
7358        UntrackedPathOptions::default(),
7359    )
7360}
7361
7362/// Pathspec filter for untracked collection. Mirrors git `ls-files` pathspec
7363/// semantics: literal paths, recursive directory prefixes, and fnmatch globs.
7364#[derive(Debug, Clone, PartialEq, Eq)]
7365pub struct UntrackedPathspecFilter {
7366    pub path: Vec<u8>,
7367    pub recursive: bool,
7368    pub is_glob: bool,
7369}
7370
7371#[derive(Debug, Clone, PartialEq, Eq, Default)]
7372pub struct UntrackedPathOptions {
7373    pub directory: bool,
7374    pub no_empty_directory: bool,
7375    pub preserve_ignored_directories: bool,
7376    pub exclude_standard: bool,
7377    pub ignored_only: bool,
7378    pub exclude_patterns: Vec<Vec<u8>>,
7379    pub exclude_per_directory: Vec<String>,
7380    pub pathspecs: Vec<UntrackedPathspecFilter>,
7381}
7382
7383// The wildmatch engine and the single-item pathspec matcher now live in the
7384// shared `sley-pathspec` crate. Re-export them so existing `sley-worktree`
7385// callers (and the t3070 `ls-files` path) keep their public surface unchanged.
7386pub use sley_pathspec::{
7387    PathspecMatchMagic, WM_CASEFOLD, WM_PATHNAME, pathspec_is_glob, pathspec_item_matches,
7388    wildmatch,
7389};
7390
7391/// Whether `path` matches an `ls-files` pathspec (literal, directory prefix, or glob).
7392pub fn untracked_pathspec_matches(spec: &UntrackedPathspecFilter, path: &[u8]) -> bool {
7393    if spec.path.is_empty() {
7394        return true;
7395    }
7396    let path_no_slash = path.strip_suffix(b"/").unwrap_or(path);
7397    if path == spec.path.as_slice() || path_no_slash == spec.path.as_slice() {
7398        return true;
7399    }
7400    if spec.recursive
7401        && let Some(rest) = path
7402            .strip_prefix(spec.path.as_slice())
7403            .and_then(|rest| rest.strip_prefix(b"/"))
7404        && !rest.is_empty()
7405    {
7406        return true;
7407    }
7408    if spec.is_glob {
7409        return untracked_wildmatch(&spec.path, path)
7410            || untracked_wildmatch(&spec.path, path_no_slash);
7411    }
7412    false
7413}
7414
7415/// Whether a directory walk must descend into `parent` to satisfy active pathspecs.
7416pub fn untracked_pathspec_needs_descent(parent: &[u8], specs: &[UntrackedPathspecFilter]) -> bool {
7417    if specs.is_empty() {
7418        return false;
7419    }
7420    let parent_prefix = if parent.is_empty() {
7421        Vec::new()
7422    } else {
7423        let mut prefix = parent.to_vec();
7424        prefix.push(b'/');
7425        prefix
7426    };
7427    for spec in specs {
7428        if !parent.is_empty()
7429            && spec.path.starts_with(&parent_prefix)
7430            && spec.path.as_slice() != parent
7431        {
7432            return true;
7433        }
7434        if spec.is_glob && glob_pathspec_may_match_under(&spec.path, parent) {
7435            return true;
7436        }
7437        if spec.recursive
7438            && !parent.is_empty()
7439            && parent.starts_with(spec.path.as_slice())
7440            && parent != spec.path.as_slice()
7441        {
7442            return true;
7443        }
7444    }
7445    false
7446}
7447
7448/// Whether some pathspec selects the directory `git_path` *as a whole* (so an
7449/// untracked directory can roll up to `dir/` under `--directory`), as opposed to
7450/// only matching something strictly below it (which forces descent). A
7451/// directory-prefix pathspec covering the directory, an exact directory match, or
7452/// a glob matching the directory's own name all count; a deeper glob such as
7453/// `dir/*.c` or an exact file path inside the directory does not.
7454fn untracked_pathspec_selects_directory(
7455    specs: &[UntrackedPathspecFilter],
7456    git_path: &[u8],
7457) -> bool {
7458    specs
7459        .iter()
7460        .any(|spec| untracked_pathspec_matches(spec, git_path))
7461}
7462
7463fn glob_pathspec_may_match_under(pattern: &[u8], dir: &[u8]) -> bool {
7464    let literal_prefix = literal_prefix_before_glob(pattern);
7465    if literal_prefix.is_empty() {
7466        return true;
7467    }
7468    if dir.is_empty() {
7469        return true;
7470    }
7471    let mut dir_prefix = dir.to_vec();
7472    dir_prefix.push(b'/');
7473    if literal_prefix.starts_with(&dir_prefix) {
7474        return true;
7475    }
7476    if dir_prefix.starts_with(&literal_prefix) {
7477        return true;
7478    }
7479    literal_prefix
7480        .strip_suffix(b"/")
7481        .is_some_and(|prefix| prefix == dir)
7482}
7483
7484fn literal_prefix_before_glob(pattern: &[u8]) -> Vec<u8> {
7485    let mut prefix = Vec::new();
7486    for &byte in pattern {
7487        if matches!(byte, b'*' | b'?' | b'[') {
7488            break;
7489        }
7490        prefix.push(byte);
7491    }
7492    prefix
7493}
7494
7495fn insert_untracked_directory(paths: &mut BTreeSet<Vec<u8>>, git_path: &[u8]) {
7496    let mut directory = git_path.to_vec();
7497    if directory.last() != Some(&b'/') {
7498        directory.push(b'/');
7499    }
7500    paths.insert(directory);
7501}
7502
7503/// fnmatch-style glob where `*` and `?` match any byte including `/`.
7504fn untracked_wildmatch(pattern: &[u8], text: &[u8]) -> bool {
7505    // Untracked-walk pathspec globs match with PATHMATCH semantics (`*` crosses
7506    // `/`), matching git's default (non-GLOB-magic) pathspec behavior.
7507    wildmatch(pattern, text, 0)
7508}
7509
7510#[derive(Debug, Clone, PartialEq, Eq)]
7511pub struct IgnoreMatch {
7512    pub source: Vec<u8>,
7513    pub line_number: usize,
7514    pub pattern: Vec<u8>,
7515    pub ignored: bool,
7516}
7517
7518#[derive(Debug, Clone, PartialEq, Eq)]
7519pub enum AttributeState {
7520    Set,
7521    Unset,
7522    Value(Vec<u8>),
7523}
7524
7525#[derive(Debug, Clone, PartialEq, Eq)]
7526pub struct AttributeCheck {
7527    pub attribute: Vec<u8>,
7528    pub state: Option<AttributeState>,
7529}
7530
7531pub fn untracked_paths_with_options(
7532    worktree_root: impl AsRef<Path>,
7533    git_dir: impl AsRef<Path>,
7534    format: ObjectFormat,
7535    options: UntrackedPathOptions,
7536) -> Result<Vec<Vec<u8>>> {
7537    let worktree_root = worktree_root.as_ref();
7538    let git_dir = git_dir.as_ref();
7539    let db = FileObjectDatabase::from_git_dir(git_dir, format);
7540    let (index, stat_cache, _) = read_index_entries_with_stat_cache(git_dir, format, &db)?;
7541    let all_index_paths = read_all_index_paths(git_dir, format)?;
7542    let ignores = IgnoreMatcher::from_sources(
7543        worktree_root,
7544        options.exclude_standard,
7545        &options.exclude_patterns,
7546        &options.exclude_per_directory,
7547    )?;
7548    if options.ignored_only {
7549        return ignored_untracked_paths(
7550            worktree_root,
7551            git_dir,
7552            &index,
7553            &ignores,
7554            options.directory,
7555        );
7556    }
7557    if options.directory {
7558        let mut paths = BTreeSet::new();
7559        collect_untracked_directory_paths(
7560            worktree_root,
7561            git_dir,
7562            worktree_root,
7563            &index,
7564            &ignores,
7565            &options,
7566            &mut paths,
7567        )?;
7568        return Ok(paths.into_iter().collect());
7569    }
7570    let worktree = worktree_entries_with_stat_cache(
7571        worktree_root,
7572        git_dir,
7573        format,
7574        Some(&stat_cache),
7575        None,
7576        None,
7577    )?;
7578    Ok(ls_files_untracked_paths_from_worktree(
7579        &worktree,
7580        &index,
7581        &all_index_paths,
7582        &ignores,
7583    ))
7584}
7585
7586/// Untracked paths for `ls-files --others` (without `--directory`): every
7587/// untracked file is listed individually, except embedded-repository boundaries
7588/// which are emitted as `dir/` to match git's non-submodule `.git` handling.
7589fn ls_files_untracked_paths_from_worktree(
7590    worktree: &BTreeMap<Vec<u8>, TrackedEntry>,
7591    index: &BTreeMap<Vec<u8>, TrackedEntry>,
7592    all_index_paths: &BTreeSet<Vec<u8>>,
7593    ignores: &IgnoreMatcher,
7594) -> Vec<Vec<u8>> {
7595    let mut paths = BTreeSet::new();
7596    for (path, entry) in worktree {
7597        if index.contains_key(path)
7598            || all_index_paths.contains(path)
7599            || ignores.is_ignored(path, false)
7600        {
7601            continue;
7602        }
7603        if entry.mode == 0o040000 && entry.oid.is_null() {
7604            insert_untracked_directory(&mut paths, path);
7605            continue;
7606        }
7607        paths.insert(path.clone());
7608    }
7609    paths.into_iter().collect()
7610}
7611
7612pub fn path_matches_standard_ignore(
7613    worktree_root: impl AsRef<Path>,
7614    path: &[u8],
7615    is_dir: bool,
7616) -> Result<bool> {
7617    path_matches_ignore(worktree_root, path, is_dir, true, &[])
7618}
7619
7620pub fn standard_ignore_match(
7621    worktree_root: impl AsRef<Path>,
7622    path: &[u8],
7623    is_dir: bool,
7624) -> Result<Option<IgnoreMatch>> {
7625    let ignores = IgnoreMatcher::from_worktree_root(worktree_root.as_ref())?;
7626    Ok(ignores.match_for(path, is_dir).map(IgnorePattern::to_match))
7627}
7628
7629pub fn standard_attributes_for_path(
7630    worktree_root: impl AsRef<Path>,
7631    path: &[u8],
7632    requested: &[Vec<u8>],
7633    all: bool,
7634) -> Result<Vec<AttributeCheck>> {
7635    let matcher = AttributeMatcher::from_worktree_root(worktree_root.as_ref())?;
7636    Ok(matcher.attributes_for_path(path, requested, all))
7637}
7638
7639/// A reusable matcher for standard worktree attributes (global or
7640/// `core.attributesFile`, every in-tree `.gitattributes`, and
7641/// `$GIT_DIR/info/attributes`).
7642///
7643/// This is behaviourally identical to [`standard_attributes_for_path`] except
7644/// the attribute sources are read once and reused for each path.
7645pub struct StandardAttributeMatcher {
7646    matcher: AttributeMatcher,
7647}
7648
7649impl StandardAttributeMatcher {
7650    pub fn from_worktree_root(worktree_root: impl AsRef<Path>) -> Result<Self> {
7651        Ok(Self {
7652            matcher: AttributeMatcher::from_worktree_root(worktree_root.as_ref())?,
7653        })
7654    }
7655
7656    pub fn attributes_for_path(
7657        &self,
7658        path: &[u8],
7659        requested: &[Vec<u8>],
7660        all: bool,
7661    ) -> Vec<AttributeCheck> {
7662        self.matcher.attributes_for_path(path, requested, all)
7663    }
7664}
7665
7666pub fn standard_attributes_for_path_in_repo(
7667    attr_root: impl AsRef<Path>,
7668    git_dir: impl AsRef<Path>,
7669    path: &[u8],
7670    requested: &[Vec<u8>],
7671    all: bool,
7672    include_worktree_attributes: bool,
7673    ignore_case: bool,
7674) -> Result<Vec<AttributeCheck>> {
7675    let attr_root = attr_root.as_ref();
7676    let git_dir = git_dir.as_ref();
7677    let mut matcher = AttributeMatcher::default();
7678    matcher.configure_case_sensitivity(git_dir);
7679    matcher.ignore_case = ignore_case;
7680    if !matcher.read_configured_attributes(attr_root, git_dir) {
7681        matcher.read_default_global_attributes();
7682    }
7683    if include_worktree_attributes {
7684        collect_attribute_patterns(attr_root, attr_root, &mut matcher)?;
7685    }
7686    read_attribute_patterns(
7687        git_dir.join("info").join("attributes"),
7688        &mut matcher,
7689        &[],
7690        b"info/attributes",
7691        false,
7692    );
7693    Ok(matcher.attributes_for_path(path, requested, all))
7694}
7695
7696pub fn standard_attributes_for_path_from_tree(
7697    worktree_root: impl AsRef<Path>,
7698    git_dir: impl AsRef<Path>,
7699    db: &FileObjectDatabase,
7700    format: ObjectFormat,
7701    tree_oid: &ObjectId,
7702    path: &[u8],
7703    requested: &[Vec<u8>],
7704    all: bool,
7705) -> Result<Vec<AttributeCheck>> {
7706    let mut matcher = AttributeMatcher::default();
7707    let worktree_root = worktree_root.as_ref();
7708    let git_dir = git_dir.as_ref();
7709    matcher.configure_case_sensitivity(git_dir);
7710    if !matcher.read_configured_attributes(worktree_root, git_dir) {
7711        matcher.read_default_global_attributes();
7712    }
7713    collect_attribute_patterns_from_tree(db, format, tree_oid, Vec::new(), &mut matcher)?;
7714    read_attribute_patterns(
7715        git_dir.join("info").join("attributes"),
7716        &mut matcher,
7717        &[],
7718        b"info/attributes",
7719        false,
7720    );
7721    Ok(matcher.attributes_for_path(path, requested, all))
7722}
7723
7724pub fn standard_attributes_for_path_from_index(
7725    worktree_root: impl AsRef<Path>,
7726    git_dir: impl AsRef<Path>,
7727    format: ObjectFormat,
7728    path: &[u8],
7729    requested: &[Vec<u8>],
7730    all: bool,
7731) -> Result<Vec<AttributeCheck>> {
7732    let worktree_root = worktree_root.as_ref();
7733    let git_dir = git_dir.as_ref();
7734    let mut matcher = AttributeMatcher::default();
7735    matcher.configure_case_sensitivity(git_dir);
7736    if !matcher.read_configured_attributes(worktree_root, git_dir) {
7737        matcher.read_default_global_attributes();
7738    }
7739    let db = FileObjectDatabase::from_git_dir(git_dir, format);
7740    collect_attribute_patterns_from_index(git_dir, format, &db, &mut matcher)?;
7741    read_attribute_patterns(
7742        git_dir.join("info").join("attributes"),
7743        &mut matcher,
7744        &[],
7745        b"info/attributes",
7746        false,
7747    );
7748    Ok(matcher.attributes_for_path(path, requested, all))
7749}
7750
7751pub fn path_matches_ignore(
7752    worktree_root: impl AsRef<Path>,
7753    path: &[u8],
7754    is_dir: bool,
7755    exclude_standard: bool,
7756    exclude_patterns: &[Vec<u8>],
7757) -> Result<bool> {
7758    path_matches_ignore_with_per_directory(
7759        worktree_root,
7760        path,
7761        is_dir,
7762        exclude_standard,
7763        exclude_patterns,
7764        &[],
7765    )
7766}
7767
7768pub fn path_matches_ignore_with_per_directory(
7769    worktree_root: impl AsRef<Path>,
7770    path: &[u8],
7771    is_dir: bool,
7772    exclude_standard: bool,
7773    exclude_patterns: &[Vec<u8>],
7774    exclude_per_directory: &[String],
7775) -> Result<bool> {
7776    let ignores = IgnoreMatcher::from_sources(
7777        worktree_root.as_ref(),
7778        exclude_standard,
7779        exclude_patterns,
7780        exclude_per_directory,
7781    )?;
7782    Ok(ignores.is_ignored(path, is_dir))
7783}
7784
7785pub fn ignored_index_entries<'a>(
7786    worktree_root: impl AsRef<Path>,
7787    entries: &'a [IndexEntry],
7788    exclude_standard: bool,
7789    exclude_patterns: &[Vec<u8>],
7790    exclude_per_directory: &[String],
7791) -> Result<Vec<&'a IndexEntry>> {
7792    let ignores = IgnoreMatcher::from_sources(
7793        worktree_root.as_ref(),
7794        exclude_standard,
7795        exclude_patterns,
7796        exclude_per_directory,
7797    )?;
7798    Ok(entries
7799        .iter()
7800        .filter(|entry| ignores.is_ignored(entry.path.as_bytes(), false))
7801        .collect())
7802}
7803
7804fn collect_untracked_directory_paths(
7805    root: &Path,
7806    git_dir: &Path,
7807    dir: &Path,
7808    index: &BTreeMap<Vec<u8>, TrackedEntry>,
7809    ignores: &IgnoreMatcher,
7810    options: &UntrackedPathOptions,
7811    paths: &mut BTreeSet<Vec<u8>>,
7812) -> Result<()> {
7813    if is_same_path(dir, git_dir) {
7814        return Ok(());
7815    }
7816    let mut entries = fs::read_dir(dir)?.collect::<std::result::Result<Vec<_>, _>>()?;
7817    entries.sort_by_key(|entry| entry.file_name());
7818    for entry in entries {
7819        let path = entry.path();
7820        if is_dot_git_entry(&path) {
7821            continue;
7822        }
7823        if is_embedded_git_internals(root, &path) {
7824            continue;
7825        }
7826        if is_same_path(&path, git_dir) {
7827            continue;
7828        }
7829        let metadata = entry.metadata()?;
7830        let relative = path.strip_prefix(root).map_err(|_| {
7831            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
7832        })?;
7833        let git_path = git_path_bytes(relative)?;
7834        if index
7835            .get(&git_path)
7836            .is_some_and(|entry| sley_index::is_gitlink(entry.mode))
7837        {
7838            continue;
7839        }
7840        if ignores.is_ignored(&git_path, metadata.is_dir()) {
7841            continue;
7842        }
7843        if metadata.is_dir() {
7844            if is_nested_repository_boundary(&path, git_dir) {
7845                insert_untracked_directory(paths, &git_path);
7846                continue;
7847            }
7848            let has_tracked_below = index_has_path_under(index, &git_path);
7849            let needs_descent = untracked_pathspec_needs_descent(&git_path, &options.pathspecs);
7850            if has_tracked_below {
7851                collect_untracked_directory_paths(
7852                    root, git_dir, &path, index, ignores, options, paths,
7853                )?;
7854            } else if active_repository_worktree_dir(&path, git_dir) {
7855                insert_untracked_directory(paths, &git_path);
7856            } else if needs_descent {
7857                // A pathspec reaches into this wholly-untracked directory. Git's
7858                // `--directory` still rolls it up to `dir/` when a pathspec selects
7859                // the directory *as a whole* (a directory-prefix that covers it, or
7860                // a glob matching its name). It descends only when a pathspec
7861                // targets something strictly below it that does not select the
7862                // directory itself (e.g. a deeper glob like `dir/*.c` or an exact
7863                // file path).
7864                if untracked_pathspec_selects_directory(&options.pathspecs, &git_path) {
7865                    insert_untracked_directory(paths, &git_path);
7866                    continue;
7867                }
7868                collect_untracked_directory_paths(
7869                    root, git_dir, &path, index, ignores, options, paths,
7870                )?;
7871            } else if options.preserve_ignored_directories
7872                && directory_has_ignored(&path, root, git_dir, ignores)?
7873            {
7874                collect_untracked_directory_paths(
7875                    root, git_dir, &path, index, ignores, options, paths,
7876                )?;
7877            } else if !options.no_empty_directory
7878                || directory_has_file(&path, root, git_dir, ignores)?
7879            {
7880                insert_untracked_directory(paths, &git_path);
7881            }
7882        } else if !index.contains_key(&git_path)
7883            && (metadata.is_file() || metadata.file_type().is_symlink())
7884            && (options.pathspecs.is_empty()
7885                || options
7886                    .pathspecs
7887                    .iter()
7888                    .any(|spec| untracked_pathspec_matches(spec, &git_path)))
7889        {
7890            // A file reached here was found by descending into its parent
7891            // directory, which happens only when that directory is not eligible
7892            // for rollup (it contains tracked content, has ignored entries `-d`
7893            // must preserve, or a pathspec selects something strictly below it).
7894            // Git's `--directory` rollup is a directory-level decision made when
7895            // the whole directory matches; an individually-reached file is always
7896            // listed individually.
7897            paths.insert(git_path);
7898        }
7899    }
7900    Ok(())
7901}
7902
7903fn index_has_path_under(index: &BTreeMap<Vec<u8>, TrackedEntry>, directory: &[u8]) -> bool {
7904    // The index map is sorted, so a single range query finds whether any tracked
7905    // path lives under `directory/` in O(log n) — scanning every key was O(n) per
7906    // untracked directory (quadratic over a deep untracked tree).
7907    let mut prefix = directory.to_vec();
7908    prefix.push(b'/');
7909    index
7910        .range::<[u8], _>((
7911            std::ops::Bound::Included(prefix.as_slice()),
7912            std::ops::Bound::Unbounded,
7913        ))
7914        .next()
7915        .is_some_and(|(path, _)| path.starts_with(&prefix))
7916}
7917
7918/// Derives normal-mode untracked paths (directory rollup) from the worktree map
7919/// produced by the single status walk, avoiding a third filesystem traversal.
7920fn normal_untracked_paths_from_worktree(
7921    worktree: &BTreeMap<Vec<u8>, TrackedEntry>,
7922    index: &BTreeMap<Vec<u8>, TrackedEntry>,
7923    ignores: &IgnoreMatcher,
7924) -> Vec<Vec<u8>> {
7925    let mut paths = BTreeSet::new();
7926    for (path, entry) in worktree {
7927        if index.contains_key(path) || path_or_parent_is_ignored(ignores, path, false) {
7928            continue;
7929        }
7930        if entry.mode == 0o040000 && entry.oid.is_null() {
7931            insert_untracked_directory(&mut paths, path);
7932            continue;
7933        }
7934        paths.insert(untracked_normal_rollup_path(path, index, ignores));
7935    }
7936    paths.into_iter().collect()
7937}
7938
7939fn path_or_parent_is_ignored(ignores: &IgnoreMatcher, path: &[u8], is_dir: bool) -> bool {
7940    if ignores.is_ignored(path, is_dir) {
7941        return true;
7942    }
7943    for (index, byte) in path.iter().enumerate() {
7944        if *byte == b'/' && index > 0 && ignores.is_ignored(&path[..index], true) {
7945            return true;
7946        }
7947    }
7948    false
7949}
7950
7951fn status_untracked_paths_from_index(
7952    root: &Path,
7953    git_dir: &Path,
7954    index: &Index,
7955    stat_cache: &IndexStatCache,
7956    ignores: &mut IgnoreMatcher,
7957    untracked_mode: StatusUntrackedMode,
7958    profile: Option<&mut StatusProfileCounters>,
7959) -> Result<Vec<Vec<u8>>> {
7960    if matches!(untracked_mode, StatusUntrackedMode::None) {
7961        return Ok(Vec::new());
7962    }
7963    let mut paths = Vec::new();
7964    let tracked_dirs = stage0_tracked_directories(index);
7965    let tracked = IndexStatusLookup {
7966        stat_cache,
7967        tracked_dirs: &tracked_dirs,
7968    };
7969    let mut context = StatusUntrackedWalk {
7970        git_dir,
7971        tracked: &tracked,
7972        ignores,
7973        untracked_mode,
7974        profile,
7975    };
7976    collect_status_untracked_paths(&mut context, root, &[], &mut paths)?;
7977    paths.sort();
7978    paths.dedup();
7979    Ok(paths)
7980}
7981
7982fn status_untracked_paths_from_borrowed_index(
7983    root: &Path,
7984    git_dir: &Path,
7985    index: &BorrowedIndex<'_>,
7986    ignores: &mut IgnoreMatcher,
7987    untracked_mode: StatusUntrackedMode,
7988    profile: Option<&mut StatusProfileCounters>,
7989) -> Result<Vec<Vec<u8>>> {
7990    if matches!(untracked_mode, StatusUntrackedMode::None) {
7991        return Ok(Vec::new());
7992    }
7993    let mut paths = Vec::new();
7994    let tracked = BorrowedIndexLookup::new(&index.entries);
7995    let mut context = StatusUntrackedWalk {
7996        git_dir,
7997        tracked: &tracked,
7998        ignores,
7999        untracked_mode,
8000        profile,
8001    };
8002    collect_status_untracked_paths(&mut context, root, &[], &mut paths)?;
8003    paths.sort();
8004    paths.dedup();
8005    Ok(paths)
8006}
8007
8008fn stream_status_untracked_paths_from_borrowed_index<F>(
8009    root: &Path,
8010    git_dir: &Path,
8011    index: &BorrowedIndex<'_>,
8012    ignores: &mut IgnoreMatcher,
8013    untracked_mode: StatusUntrackedMode,
8014    profile: Option<&mut StatusProfileCounters>,
8015    mut emit: F,
8016) -> Result<()>
8017where
8018    F: for<'a> FnMut(&'a [u8]) -> Result<StreamControl>,
8019{
8020    if matches!(untracked_mode, StatusUntrackedMode::None) {
8021        return Ok(());
8022    }
8023    let tracked = BorrowedIndexLookup::new(&index.entries);
8024    let mut context = StatusUntrackedWalk {
8025        git_dir,
8026        tracked: &tracked,
8027        ignores,
8028        untracked_mode,
8029        profile,
8030    };
8031    stream_status_untracked_paths(&mut context, root, &[], &mut emit).map(|_| ())
8032}
8033
8034fn status_untracked_count_from_borrowed_index(
8035    root: &Path,
8036    git_dir: &Path,
8037    index: &BorrowedIndex<'_>,
8038    ignores: &mut IgnoreMatcher,
8039    untracked_mode: StatusUntrackedMode,
8040    profile: Option<&mut StatusProfileCounters>,
8041) -> Result<usize> {
8042    if matches!(untracked_mode, StatusUntrackedMode::None) {
8043        return Ok(0);
8044    }
8045    let tracked = BorrowedIndexLookup::new(&index.entries);
8046    let mut context = StatusUntrackedWalk {
8047        git_dir,
8048        tracked: &tracked,
8049        ignores,
8050        untracked_mode,
8051        profile,
8052    };
8053    let mut count = 0usize;
8054    count_status_untracked_paths(&mut context, root, &[], &mut count)?;
8055    Ok(count)
8056}
8057
8058trait StatusTrackedLookup {
8059    fn tracked_kind(&self, git_path: &[u8]) -> Option<StatusTrackedKind>;
8060    fn tracked_directory_kind(&self, git_path: &[u8]) -> Option<StatusTrackedDirectoryKind>;
8061}
8062
8063#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8064enum StatusTrackedKind {
8065    File,
8066    Gitlink,
8067    SkipWorktree,
8068}
8069
8070impl StatusTrackedKind {
8071    fn from_mode_and_skip(mode: u32, skip_worktree: bool) -> Self {
8072        if sley_index::is_gitlink(mode) {
8073            Self::Gitlink
8074        } else if skip_worktree {
8075            Self::SkipWorktree
8076        } else {
8077            Self::File
8078        }
8079    }
8080}
8081
8082#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8083enum StatusTrackedDirectoryKind {
8084    ContainsTracked,
8085    TrackedExcluded,
8086}
8087
8088struct IndexStatusLookup<'a> {
8089    stat_cache: &'a IndexStatCache,
8090    tracked_dirs: &'a HashSet<&'a [u8]>,
8091}
8092
8093impl StatusTrackedLookup for IndexStatusLookup<'_> {
8094    fn tracked_kind(&self, git_path: &[u8]) -> Option<StatusTrackedKind> {
8095        self.stat_cache.entries.get(git_path).map(|entry| {
8096            StatusTrackedKind::from_mode_and_skip(entry.mode, entry.is_skip_worktree())
8097        })
8098    }
8099
8100    fn tracked_directory_kind(&self, git_path: &[u8]) -> Option<StatusTrackedDirectoryKind> {
8101        self.tracked_dirs
8102            .contains(git_path)
8103            .then_some(StatusTrackedDirectoryKind::ContainsTracked)
8104    }
8105}
8106
8107struct BorrowedIndexLookup<'a> {
8108    entries: &'a [IndexEntryRef<'a>],
8109    exact_cursor: Cell<usize>,
8110    directory_prefix: RefCell<Vec<u8>>,
8111}
8112
8113impl<'a> BorrowedIndexLookup<'a> {
8114    fn new(entries: &'a [IndexEntryRef<'a>]) -> Self {
8115        Self {
8116            entries,
8117            exact_cursor: Cell::new(0),
8118            directory_prefix: RefCell::new(Vec::new()),
8119        }
8120    }
8121}
8122
8123impl StatusTrackedLookup for BorrowedIndexLookup<'_> {
8124    fn tracked_kind(&self, git_path: &[u8]) -> Option<StatusTrackedKind> {
8125        let mut start = self.exact_cursor.get().min(self.entries.len());
8126        if start == self.entries.len() || self.entries[start].path > git_path {
8127            start = self.entries.partition_point(|entry| entry.path < git_path);
8128        } else {
8129            while start < self.entries.len() && self.entries[start].path < git_path {
8130                start += 1;
8131            }
8132        }
8133        self.exact_cursor.set(start);
8134        self.entries[start..]
8135            .iter()
8136            .take_while(|entry| entry.path == git_path)
8137            .find(|entry| entry.stage() == Stage::Normal)
8138            .map(|entry| {
8139                StatusTrackedKind::from_mode_and_skip(entry.mode, entry.is_skip_worktree())
8140            })
8141    }
8142
8143    fn tracked_directory_kind(&self, git_path: &[u8]) -> Option<StatusTrackedDirectoryKind> {
8144        let mut prefix_buf = self.directory_prefix.borrow_mut();
8145        prefix_buf.clear();
8146        prefix_buf.extend_from_slice(git_path);
8147        prefix_buf.push(b'/');
8148        let prefix = prefix_buf.as_slice();
8149        let start = self.entries.partition_point(|entry| entry.path < prefix);
8150        let mut saw_normal = false;
8151        for entry in self.entries[start..]
8152            .iter()
8153            .take_while(|entry| entry.path.starts_with(prefix))
8154        {
8155            if entry.stage() != Stage::Normal {
8156                continue;
8157            }
8158            saw_normal = true;
8159            if !entry.is_skip_worktree() {
8160                return Some(StatusTrackedDirectoryKind::ContainsTracked);
8161            }
8162        }
8163        saw_normal.then_some(StatusTrackedDirectoryKind::TrackedExcluded)
8164    }
8165}
8166
8167struct StatusUntrackedWalk<'a, T: StatusTrackedLookup + ?Sized> {
8168    git_dir: &'a Path,
8169    tracked: &'a T,
8170    ignores: &'a mut IgnoreMatcher,
8171    untracked_mode: StatusUntrackedMode,
8172    profile: Option<&'a mut StatusProfileCounters>,
8173}
8174
8175fn collect_status_untracked_paths<T: StatusTrackedLookup + ?Sized>(
8176    context: &mut StatusUntrackedWalk<'_, T>,
8177    dir: &Path,
8178    dir_git_path: &[u8],
8179    paths: &mut Vec<Vec<u8>>,
8180) -> Result<()> {
8181    if is_same_path(dir, context.git_dir) {
8182        return Ok(());
8183    }
8184    let ignore_len = context.ignores.patterns.len();
8185    let mut entries = read_dir_entries_with_ignore_patterns(
8186        dir,
8187        dir_git_path,
8188        context.ignores,
8189        context.profile.as_deref_mut(),
8190    )?;
8191    entries.sort_by_key(|entry| entry.file_name());
8192    let result = (|| -> Result<()> {
8193        let mut git_path = dir_git_path.to_vec();
8194        for entry in entries {
8195            let file_name = entry.file_name();
8196            if file_name == std::ffi::OsStr::new(".git") {
8197                continue;
8198            }
8199            let path_len = git_path_push_component(&mut git_path, &file_name);
8200            let entry_result = (|| -> Result<()> {
8201                if let Some(tracked_kind) = context.tracked.tracked_kind(&git_path) {
8202                    if let Some(profile) = context.profile.as_deref_mut() {
8203                        profile.tracked_exact_hits += 1;
8204                    }
8205                    if !matches!(context.untracked_mode, StatusUntrackedMode::All)
8206                        || tracked_kind == StatusTrackedKind::Gitlink
8207                    {
8208                        return Ok(());
8209                    }
8210                    if let Some(profile) = context.profile.as_deref_mut() {
8211                        profile.file_type_calls += 1;
8212                    }
8213                    let file_type = entry.file_type()?;
8214                    if file_type.is_dir() {
8215                        let path = entry.path();
8216                        if !is_same_path(&path, context.git_dir) {
8217                            collect_status_untracked_paths(context, &path, &git_path, paths)?;
8218                        }
8219                    }
8220                    return Ok(());
8221                }
8222                if let Some(profile) = context.profile.as_deref_mut() {
8223                    profile.file_type_calls += 1;
8224                }
8225                let file_type = entry.file_type()?;
8226                let is_dir = file_type.is_dir();
8227                if file_type.is_file() || file_type.is_symlink() {
8228                    if !context.ignores.is_ignored_profiled(
8229                        &git_path,
8230                        false,
8231                        context.profile.as_deref_mut(),
8232                    ) {
8233                        paths.push(git_path.clone());
8234                    }
8235                    return Ok(());
8236                } else if is_dir {
8237                    let path = entry.path();
8238                    if context.ignores.is_ignored_profiled(
8239                        &git_path,
8240                        true,
8241                        context.profile.as_deref_mut(),
8242                    ) {
8243                        return Ok(());
8244                    }
8245                    if is_same_path(&path, context.git_dir) {
8246                        return Ok(());
8247                    }
8248                    let tracked_directory = context.tracked.tracked_directory_kind(&git_path);
8249                    if let Some(directory_kind) = tracked_directory {
8250                        if let Some(profile) = context.profile.as_deref_mut() {
8251                            profile.tracked_dir_prefix_hits += 1;
8252                            if directory_kind == StatusTrackedDirectoryKind::TrackedExcluded {
8253                                profile.tracked_skip_worktree_prefix_hits += 1;
8254                            }
8255                        }
8256                    }
8257                    match context.untracked_mode {
8258                        StatusUntrackedMode::All => {
8259                            if tracked_directory.is_none()
8260                                && is_nested_repository_boundary(&path, context.git_dir)
8261                            {
8262                                push_untracked_directory(paths, &git_path);
8263                            } else {
8264                                collect_status_untracked_paths(context, &path, &git_path, paths)?;
8265                            }
8266                        }
8267                        StatusUntrackedMode::Normal => {
8268                            if tracked_directory.is_some() {
8269                                collect_status_untracked_paths(context, &path, &git_path, paths)?;
8270                            } else if is_nested_repository_boundary(&path, context.git_dir) {
8271                                push_untracked_directory(paths, &git_path);
8272                            } else if status_untracked_directory_has_file(
8273                                context, &path, &git_path,
8274                            )? {
8275                                push_untracked_directory(paths, &git_path);
8276                            }
8277                        }
8278                        StatusUntrackedMode::None => {}
8279                    }
8280                }
8281                Ok(())
8282            })();
8283            git_path.truncate(path_len);
8284            entry_result?;
8285        }
8286        Ok(())
8287    })();
8288    context.ignores.truncate(ignore_len);
8289    result
8290}
8291
8292fn stream_status_untracked_paths<T, F>(
8293    context: &mut StatusUntrackedWalk<'_, T>,
8294    dir: &Path,
8295    dir_git_path: &[u8],
8296    emit: &mut F,
8297) -> Result<StreamControl>
8298where
8299    T: StatusTrackedLookup + ?Sized,
8300    F: for<'a> FnMut(&'a [u8]) -> Result<StreamControl>,
8301{
8302    if is_same_path(dir, context.git_dir) {
8303        return Ok(StreamControl::Continue);
8304    }
8305    let ignore_len = context.ignores.patterns.len();
8306    let mut entries = read_dir_entries_with_ignore_patterns(
8307        dir,
8308        dir_git_path,
8309        context.ignores,
8310        context.profile.as_deref_mut(),
8311    )?;
8312    entries.sort_by_key(|entry| entry.file_name());
8313    let result = (|| -> Result<StreamControl> {
8314        let mut git_path = dir_git_path.to_vec();
8315        for entry in entries {
8316            let file_name = entry.file_name();
8317            if file_name == std::ffi::OsStr::new(".git") {
8318                continue;
8319            }
8320            let path_len = git_path_push_component(&mut git_path, &file_name);
8321            let entry_result = (|| -> Result<StreamControl> {
8322                if let Some(tracked_kind) = context.tracked.tracked_kind(&git_path) {
8323                    if let Some(profile) = context.profile.as_deref_mut() {
8324                        profile.tracked_exact_hits += 1;
8325                    }
8326                    if !matches!(context.untracked_mode, StatusUntrackedMode::All)
8327                        || tracked_kind == StatusTrackedKind::Gitlink
8328                    {
8329                        return Ok(StreamControl::Continue);
8330                    }
8331                    if let Some(profile) = context.profile.as_deref_mut() {
8332                        profile.file_type_calls += 1;
8333                    }
8334                    let file_type = entry.file_type()?;
8335                    if file_type.is_dir() {
8336                        let path = entry.path();
8337                        if !is_same_path(&path, context.git_dir) {
8338                            if stream_status_untracked_paths(context, &path, &git_path, emit)?
8339                                .is_stop()
8340                            {
8341                                return Ok(StreamControl::Stop);
8342                            }
8343                        }
8344                    }
8345                    return Ok(StreamControl::Continue);
8346                }
8347                if let Some(profile) = context.profile.as_deref_mut() {
8348                    profile.file_type_calls += 1;
8349                }
8350                let file_type = entry.file_type()?;
8351                let is_dir = file_type.is_dir();
8352                if file_type.is_file() || file_type.is_symlink() {
8353                    if !context.ignores.is_ignored_profiled(
8354                        &git_path,
8355                        false,
8356                        context.profile.as_deref_mut(),
8357                    ) {
8358                        if emit_status_untracked_path(context, &git_path, emit)?.is_stop() {
8359                            return Ok(StreamControl::Stop);
8360                        }
8361                    }
8362                    return Ok(StreamControl::Continue);
8363                } else if is_dir {
8364                    if context.ignores.is_ignored_profiled(
8365                        &git_path,
8366                        true,
8367                        context.profile.as_deref_mut(),
8368                    ) {
8369                        return Ok(StreamControl::Continue);
8370                    }
8371                    let path = entry.path();
8372                    if is_same_path(&path, context.git_dir) {
8373                        return Ok(StreamControl::Continue);
8374                    }
8375                    let tracked_directory = context.tracked.tracked_directory_kind(&git_path);
8376                    if let Some(directory_kind) = tracked_directory {
8377                        if let Some(profile) = context.profile.as_deref_mut() {
8378                            profile.tracked_dir_prefix_hits += 1;
8379                            if directory_kind == StatusTrackedDirectoryKind::TrackedExcluded {
8380                                profile.tracked_skip_worktree_prefix_hits += 1;
8381                            }
8382                        }
8383                    }
8384                    match context.untracked_mode {
8385                        StatusUntrackedMode::All => {
8386                            if tracked_directory.is_none()
8387                                && is_nested_repository_boundary(&path, context.git_dir)
8388                            {
8389                                let directory_len = git_path.len();
8390                                if git_path.last() != Some(&b'/') {
8391                                    git_path.push(b'/');
8392                                }
8393                                let control = emit_status_untracked_path(context, &git_path, emit)?;
8394                                git_path.truncate(directory_len);
8395                                if control.is_stop() {
8396                                    return Ok(StreamControl::Stop);
8397                                }
8398                            } else {
8399                                if stream_status_untracked_paths(context, &path, &git_path, emit)?
8400                                    .is_stop()
8401                                {
8402                                    return Ok(StreamControl::Stop);
8403                                }
8404                            }
8405                        }
8406                        StatusUntrackedMode::Normal => {
8407                            if tracked_directory.is_some() {
8408                                if stream_status_untracked_paths(context, &path, &git_path, emit)?
8409                                    .is_stop()
8410                                {
8411                                    return Ok(StreamControl::Stop);
8412                                }
8413                            } else if is_nested_repository_boundary(&path, context.git_dir)
8414                                || status_untracked_directory_has_file(context, &path, &git_path)?
8415                            {
8416                                let directory_len = git_path.len();
8417                                if git_path.last() != Some(&b'/') {
8418                                    git_path.push(b'/');
8419                                }
8420                                let control = emit_status_untracked_path(context, &git_path, emit)?;
8421                                git_path.truncate(directory_len);
8422                                if control.is_stop() {
8423                                    return Ok(StreamControl::Stop);
8424                                }
8425                            }
8426                        }
8427                        StatusUntrackedMode::None => {}
8428                    }
8429                }
8430                Ok(StreamControl::Continue)
8431            })();
8432            git_path.truncate(path_len);
8433            if entry_result?.is_stop() {
8434                return Ok(StreamControl::Stop);
8435            }
8436        }
8437        Ok(StreamControl::Continue)
8438    })();
8439    context.ignores.truncate(ignore_len);
8440    result
8441}
8442
8443fn count_status_untracked_paths<T: StatusTrackedLookup + ?Sized>(
8444    context: &mut StatusUntrackedWalk<'_, T>,
8445    dir: &Path,
8446    dir_git_path: &[u8],
8447    count: &mut usize,
8448) -> Result<()> {
8449    if is_same_path(dir, context.git_dir) {
8450        return Ok(());
8451    }
8452    let ignore_len = context.ignores.patterns.len();
8453    let mut entries = read_dir_entries_with_ignore_patterns(
8454        dir,
8455        dir_git_path,
8456        context.ignores,
8457        context.profile.as_deref_mut(),
8458    )?;
8459    entries.sort_by_key(|entry| entry.file_name());
8460    let result = (|| -> Result<()> {
8461        let mut git_path = dir_git_path.to_vec();
8462        for entry in entries {
8463            let file_name = entry.file_name();
8464            if file_name == std::ffi::OsStr::new(".git") {
8465                continue;
8466            }
8467            let path_len = git_path_push_component(&mut git_path, &file_name);
8468            let entry_result = (|| -> Result<()> {
8469                if let Some(tracked_kind) = context.tracked.tracked_kind(&git_path) {
8470                    if let Some(profile) = context.profile.as_deref_mut() {
8471                        profile.tracked_exact_hits += 1;
8472                    }
8473                    if !matches!(context.untracked_mode, StatusUntrackedMode::All)
8474                        || tracked_kind == StatusTrackedKind::Gitlink
8475                    {
8476                        return Ok(());
8477                    }
8478                    if let Some(profile) = context.profile.as_deref_mut() {
8479                        profile.file_type_calls += 1;
8480                    }
8481                    let file_type = entry.file_type()?;
8482                    if file_type.is_dir() {
8483                        let path = entry.path();
8484                        if !is_same_path(&path, context.git_dir) {
8485                            count_status_untracked_paths(context, &path, &git_path, count)?;
8486                        }
8487                    }
8488                    return Ok(());
8489                }
8490                if let Some(profile) = context.profile.as_deref_mut() {
8491                    profile.file_type_calls += 1;
8492                }
8493                let file_type = entry.file_type()?;
8494                let is_dir = file_type.is_dir();
8495                if file_type.is_file() || file_type.is_symlink() {
8496                    if !context.ignores.is_ignored_profiled(
8497                        &git_path,
8498                        false,
8499                        context.profile.as_deref_mut(),
8500                    ) {
8501                        *count += 1;
8502                    }
8503                    return Ok(());
8504                } else if is_dir {
8505                    let path = entry.path();
8506                    if context.ignores.is_ignored_profiled(
8507                        &git_path,
8508                        true,
8509                        context.profile.as_deref_mut(),
8510                    ) {
8511                        return Ok(());
8512                    }
8513                    if is_same_path(&path, context.git_dir) {
8514                        return Ok(());
8515                    }
8516                    let tracked_directory = context.tracked.tracked_directory_kind(&git_path);
8517                    if let Some(directory_kind) = tracked_directory {
8518                        if let Some(profile) = context.profile.as_deref_mut() {
8519                            profile.tracked_dir_prefix_hits += 1;
8520                            if directory_kind == StatusTrackedDirectoryKind::TrackedExcluded {
8521                                profile.tracked_skip_worktree_prefix_hits += 1;
8522                            }
8523                        }
8524                    }
8525                    match context.untracked_mode {
8526                        StatusUntrackedMode::All => {
8527                            if tracked_directory.is_none()
8528                                && is_nested_repository_boundary(&path, context.git_dir)
8529                            {
8530                                *count += 1;
8531                            } else {
8532                                count_status_untracked_paths(context, &path, &git_path, count)?;
8533                            }
8534                        }
8535                        StatusUntrackedMode::Normal => {
8536                            if tracked_directory.is_some() {
8537                                count_status_untracked_paths(context, &path, &git_path, count)?;
8538                            } else if is_nested_repository_boundary(&path, context.git_dir)
8539                                || status_untracked_directory_has_file(context, &path, &git_path)?
8540                            {
8541                                *count += 1;
8542                            }
8543                        }
8544                        StatusUntrackedMode::None => {}
8545                    }
8546                }
8547                Ok(())
8548            })();
8549            git_path.truncate(path_len);
8550            entry_result?;
8551        }
8552        Ok(())
8553    })();
8554    context.ignores.truncate(ignore_len);
8555    result
8556}
8557
8558fn emit_status_untracked_path<T, F>(
8559    context: &mut StatusUntrackedWalk<'_, T>,
8560    path: &[u8],
8561    emit: &mut F,
8562) -> Result<StreamControl>
8563where
8564    T: StatusTrackedLookup + ?Sized,
8565    F: for<'a> FnMut(&'a [u8]) -> Result<StreamControl>,
8566{
8567    if let Some(profile) = context.profile.as_deref_mut() {
8568        profile.untracked_rows += 1;
8569    }
8570    emit(path)
8571}
8572
8573fn stage0_tracked_directories(index: &Index) -> HashSet<&[u8]> {
8574    let mut directories = HashSet::new();
8575    for entry in index
8576        .entries
8577        .iter()
8578        .filter(|entry| entry.stage() == Stage::Normal)
8579    {
8580        let path = entry.path.as_bytes();
8581        for (idx, byte) in path.iter().enumerate() {
8582            if *byte == b'/' && idx > 0 {
8583                directories.insert(&path[..idx]);
8584            }
8585        }
8586    }
8587    directories
8588}
8589
8590fn status_untracked_directory_has_file<T: StatusTrackedLookup + ?Sized>(
8591    context: &mut StatusUntrackedWalk<'_, T>,
8592    dir: &Path,
8593    dir_git_path: &[u8],
8594) -> Result<bool> {
8595    if is_same_path(dir, context.git_dir) {
8596        return Ok(false);
8597    }
8598    let ignore_len = context.ignores.patterns.len();
8599    let mut entries = read_dir_entries_with_ignore_patterns(
8600        dir,
8601        dir_git_path,
8602        context.ignores,
8603        context.profile.as_deref_mut(),
8604    )?;
8605    entries.sort_by_key(|entry| entry.file_name());
8606    let result = (|| -> Result<bool> {
8607        let mut git_path = dir_git_path.to_vec();
8608        for entry in entries {
8609            let file_name = entry.file_name();
8610            if file_name == std::ffi::OsStr::new(".git") {
8611                continue;
8612            }
8613            let path_len = git_path_push_component(&mut git_path, &file_name);
8614            let entry_result = (|| -> Result<Option<bool>> {
8615                if let Some(profile) = context.profile.as_deref_mut() {
8616                    profile.file_type_calls += 1;
8617                }
8618                let file_type = entry.file_type()?;
8619                let is_dir = file_type.is_dir();
8620                if context.ignores.is_ignored_profiled(
8621                    &git_path,
8622                    is_dir,
8623                    context.profile.as_deref_mut(),
8624                ) {
8625                    return Ok(None);
8626                }
8627                if file_type.is_file() || file_type.is_symlink() {
8628                    return Ok(Some(true));
8629                }
8630                if is_dir {
8631                    let path = entry.path();
8632                    if is_same_path(&path, context.git_dir) {
8633                        return Ok(None);
8634                    }
8635                    if is_nested_repository_boundary(&path, context.git_dir) {
8636                        return Ok(Some(true));
8637                    }
8638                    if status_untracked_directory_has_file(context, &path, &git_path)? {
8639                        return Ok(Some(true));
8640                    }
8641                }
8642                Ok(None)
8643            })();
8644            git_path.truncate(path_len);
8645            if let Some(has_file) = entry_result? {
8646                return Ok(has_file);
8647            }
8648        }
8649        Ok(false)
8650    })();
8651    context.ignores.truncate(ignore_len);
8652    result
8653}
8654
8655fn read_dir_entries_with_ignore_patterns(
8656    dir: &Path,
8657    base: &[u8],
8658    matcher: &mut IgnoreMatcher,
8659    mut profile: Option<&mut StatusProfileCounters>,
8660) -> Result<Vec<fs::DirEntry>> {
8661    let mut entries = Vec::new();
8662    let mut ignore_path = None;
8663    if let Some(profile) = profile.as_deref_mut() {
8664        profile.read_dir_calls += 1;
8665    }
8666    for entry in fs::read_dir(dir)? {
8667        let entry = entry?;
8668        if let Some(profile) = profile.as_deref_mut() {
8669            profile.dir_entries_seen += 1;
8670        }
8671        if entry.file_name() == std::ffi::OsStr::new(".gitignore") {
8672            ignore_path = Some(entry.path());
8673        }
8674        entries.push(entry);
8675    }
8676    if let Some(profile) = profile {
8677        profile.read_dir_entry_vec_cap_bytes +=
8678            (entries.capacity() * std::mem::size_of::<fs::DirEntry>()) as u64;
8679        profile.read_dir_entry_vec_max_len =
8680            profile.read_dir_entry_vec_max_len.max(entries.len() as u64);
8681        profile.read_dir_entry_vec_max_cap = profile
8682            .read_dir_entry_vec_max_cap
8683            .max(entries.capacity() as u64);
8684    }
8685    if let Some(path) = ignore_path {
8686        let mut source = base.to_vec();
8687        if !source.is_empty() {
8688            source.push(b'/');
8689        }
8690        source.extend_from_slice(b".gitignore");
8691        read_per_directory_ignore_patterns_into_matcher(path, matcher, base, &source)?;
8692    }
8693    Ok(entries)
8694}
8695
8696fn build_untracked_cache(
8697    worktree_root: &Path,
8698    git_dir: &Path,
8699    format: ObjectFormat,
8700    index: &Index,
8701    untracked_mode: StatusUntrackedMode,
8702) -> Result<UntrackedCache> {
8703    let stat_cache = IndexStatCache::from_index(index, &repository_index_path(git_dir));
8704    let tracked_dirs = stage0_tracked_directories(index);
8705    let tracked = IndexStatusLookup {
8706        stat_cache: &stat_cache,
8707        tracked_dirs: &tracked_dirs,
8708    };
8709    let mut ignores = IgnoreMatcher::from_worktree_base(worktree_root)?;
8710    let mut cache = UntrackedCache::new(
8711        format,
8712        untracked_cache_ident(worktree_root),
8713        untracked_cache_dir_flags(untracked_mode),
8714    );
8715    cache.info_exclude = untracked_cache_oid_stat(&git_dir.join("info").join("exclude"), format)?;
8716    cache.excludes_file = UntrackedCacheOidStat::new(format);
8717    cache.root = Some(build_untracked_cache_dir(
8718        worktree_root,
8719        git_dir,
8720        worktree_root,
8721        &[],
8722        b"",
8723        &tracked,
8724        &mut ignores,
8725        untracked_mode,
8726        format,
8727        false,
8728    )?);
8729    Ok(cache)
8730}
8731
8732fn emit_untracked_cache_trace(old: Option<&UntrackedCache>, new: &UntrackedCache) {
8733    sley_core::trace2::perf_read_directory_data("path", "");
8734    let dir_count = new
8735        .root
8736        .as_ref()
8737        .map(count_untracked_cache_dirs)
8738        .unwrap_or(0);
8739    let Some(old) = old else {
8740        sley_core::trace2::perf_read_directory_data("node-creation", dir_count.saturating_sub(1));
8741        sley_core::trace2::perf_read_directory_data("gitignore-invalidation", 1);
8742        sley_core::trace2::perf_read_directory_data("directory-invalidation", 0);
8743        sley_core::trace2::perf_read_directory_data("opendir", dir_count);
8744        return;
8745    };
8746    let Some(old_root) = old.root.as_ref() else {
8747        sley_core::trace2::perf_read_directory_data("node-creation", dir_count.saturating_sub(1));
8748        sley_core::trace2::perf_read_directory_data("gitignore-invalidation", 1);
8749        sley_core::trace2::perf_read_directory_data("directory-invalidation", 0);
8750        sley_core::trace2::perf_read_directory_data("opendir", dir_count);
8751        return;
8752    };
8753    let Some(new_root) = new.root.as_ref() else {
8754        return;
8755    };
8756    if old.ident != new.ident || old.dir_flags != new.dir_flags {
8757        sley_core::trace2::perf_read_directory_data("node-creation", dir_count.saturating_sub(1));
8758        sley_core::trace2::perf_read_directory_data("gitignore-invalidation", 1);
8759        sley_core::trace2::perf_read_directory_data("directory-invalidation", 0);
8760        sley_core::trace2::perf_read_directory_data("opendir", dir_count);
8761        return;
8762    }
8763    if old.info_exclude.oid != new.info_exclude.oid
8764        || old.excludes_file.oid != new.excludes_file.oid
8765    {
8766        sley_core::trace2::perf_read_directory_data("node-creation", 0);
8767        sley_core::trace2::perf_read_directory_data("gitignore-invalidation", 1);
8768        sley_core::trace2::perf_read_directory_data("directory-invalidation", 0);
8769        sley_core::trace2::perf_read_directory_data("opendir", dir_count);
8770        return;
8771    }
8772    if old_root.exclude_oid != new_root.exclude_oid {
8773        sley_core::trace2::perf_read_directory_data("node-creation", 0);
8774        sley_core::trace2::perf_read_directory_data("gitignore-invalidation", 1);
8775        sley_core::trace2::perf_read_directory_data("directory-invalidation", 1);
8776        sley_core::trace2::perf_read_directory_data("opendir", dir_count);
8777        return;
8778    }
8779    let invalid_dir_count = count_invalid_untracked_cache_dirs(old_root);
8780    if invalid_dir_count > 0 {
8781        sley_core::trace2::perf_read_directory_data("node-creation", 0);
8782        sley_core::trace2::perf_read_directory_data("gitignore-invalidation", 0);
8783        sley_core::trace2::perf_read_directory_data("directory-invalidation", 0);
8784        sley_core::trace2::perf_read_directory_data("opendir", invalid_dir_count);
8785        return;
8786    }
8787    if old_root.stat != new_root.stat {
8788        sley_core::trace2::perf_read_directory_data("node-creation", 0);
8789        sley_core::trace2::perf_read_directory_data("gitignore-invalidation", 0);
8790        sley_core::trace2::perf_read_directory_data("directory-invalidation", 1);
8791        sley_core::trace2::perf_read_directory_data("opendir", 1);
8792        return;
8793    }
8794    if old.root == new.root {
8795        sley_core::trace2::perf_read_directory_data("node-creation", 0);
8796        sley_core::trace2::perf_read_directory_data("gitignore-invalidation", 0);
8797        sley_core::trace2::perf_read_directory_data("directory-invalidation", 0);
8798        sley_core::trace2::perf_read_directory_data("opendir", 0);
8799        return;
8800    }
8801    sley_core::trace2::perf_read_directory_data("node-creation", 0);
8802    sley_core::trace2::perf_read_directory_data("gitignore-invalidation", 0);
8803    sley_core::trace2::perf_read_directory_data("directory-invalidation", 1);
8804    sley_core::trace2::perf_read_directory_data("opendir", dir_count);
8805}
8806
8807fn count_untracked_cache_dirs(dir: &UntrackedCacheDir) -> usize {
8808    1 + dir
8809        .dirs
8810        .iter()
8811        .map(count_untracked_cache_dirs)
8812        .sum::<usize>()
8813}
8814
8815fn count_invalid_untracked_cache_dirs(dir: &UntrackedCacheDir) -> usize {
8816    usize::from(!dir.valid)
8817        + dir
8818            .dirs
8819            .iter()
8820            .map(count_invalid_untracked_cache_dirs)
8821            .sum::<usize>()
8822}
8823
8824#[allow(clippy::too_many_arguments)]
8825fn build_untracked_cache_dir<T: StatusTrackedLookup + ?Sized>(
8826    worktree_root: &Path,
8827    git_dir: &Path,
8828    dir: &Path,
8829    dir_git_path: &[u8],
8830    name: &[u8],
8831    tracked: &T,
8832    ignores: &mut IgnoreMatcher,
8833    untracked_mode: StatusUntrackedMode,
8834    format: ObjectFormat,
8835    check_only: bool,
8836) -> Result<UntrackedCacheDir> {
8837    let ignore_len = ignores.patterns.len();
8838    let mut entries = read_dir_entries_with_ignore_patterns(dir, dir_git_path, ignores, None)?;
8839    entries.sort_by_key(|entry| entry.file_name());
8840    let exclude_path = if dir_git_path.is_empty() {
8841        b".gitignore".to_vec()
8842    } else {
8843        let mut path = dir_git_path.to_vec();
8844        path.push(b'/');
8845        path.extend_from_slice(b".gitignore");
8846        path
8847    };
8848    let exclude_oid = if tracked.tracked_kind(&exclude_path).is_some() {
8849        None
8850    } else {
8851        per_directory_ignore_oid(dir, format)?
8852    };
8853    let mut node = UntrackedCacheDir {
8854        name: name.to_vec(),
8855        stat: fs::symlink_metadata(dir)
8856            .map(|metadata| untracked_cache_stat_data(&metadata))
8857            .unwrap_or_default(),
8858        exclude_oid,
8859        valid: true,
8860        check_only,
8861        recurse: true,
8862        ..UntrackedCacheDir::default()
8863    };
8864    let result = (|| -> Result<()> {
8865        let mut git_path = dir_git_path.to_vec();
8866        for entry in entries {
8867            let file_name = entry.file_name();
8868            if file_name == std::ffi::OsStr::new(".git") {
8869                continue;
8870            }
8871            let path_len = git_path_push_component(&mut git_path, &file_name);
8872            let entry_result = (|| -> Result<()> {
8873                if tracked.tracked_kind(&git_path).is_some() {
8874                    return Ok(());
8875                }
8876                let file_type = entry.file_type()?;
8877                let is_dir = file_type.is_dir();
8878                if ignores.is_ignored(&git_path, is_dir) {
8879                    return Ok(());
8880                }
8881                if file_type.is_file() || file_type.is_symlink() {
8882                    node.untracked.push(component_name_bytes(&file_name));
8883                    return Ok(());
8884                }
8885                if !is_dir {
8886                    return Ok(());
8887                }
8888                let path = entry.path();
8889                if is_same_path(&path, git_dir) {
8890                    return Ok(());
8891                }
8892                let component = component_name_bytes(&file_name);
8893                let tracked_directory = tracked.tracked_directory_kind(&git_path);
8894                let child_check_only = matches!(untracked_mode, StatusUntrackedMode::Normal)
8895                    && tracked_directory.is_none();
8896                let child = build_untracked_cache_dir(
8897                    worktree_root,
8898                    git_dir,
8899                    &path,
8900                    &git_path,
8901                    &component,
8902                    tracked,
8903                    ignores,
8904                    untracked_mode,
8905                    format,
8906                    child_check_only,
8907                )?;
8908                let child_has_untracked = !child.untracked.is_empty()
8909                    || child
8910                        .dirs
8911                        .iter()
8912                        .any(|dir| !dir.untracked.is_empty() || !dir.dirs.is_empty());
8913                match untracked_mode {
8914                    StatusUntrackedMode::All => {
8915                        node.dirs.push(child);
8916                    }
8917                    StatusUntrackedMode::Normal => {
8918                        if tracked_directory.is_some() {
8919                            node.dirs.push(child);
8920                        } else {
8921                            if child_has_untracked {
8922                                let mut directory = component.clone();
8923                                directory.push(b'/');
8924                                node.untracked.push(directory);
8925                            }
8926                            node.dirs.push(child);
8927                        }
8928                    }
8929                    StatusUntrackedMode::None => {}
8930                }
8931                Ok(())
8932            })();
8933            git_path.truncate(path_len);
8934            entry_result?;
8935        }
8936        Ok(())
8937    })();
8938    ignores.truncate(ignore_len);
8939    result?;
8940    if worktree_root == dir {
8941        node.name.clear();
8942    }
8943    Ok(node)
8944}
8945
8946fn component_name_bytes(name: &std::ffi::OsStr) -> Vec<u8> {
8947    #[cfg(unix)]
8948    {
8949        use std::os::unix::ffi::OsStrExt;
8950        name.as_bytes().to_vec()
8951    }
8952    #[cfg(not(unix))]
8953    {
8954        name.to_string_lossy().as_bytes().to_vec()
8955    }
8956}
8957
8958fn per_directory_ignore_oid(dir: &Path, format: ObjectFormat) -> Result<Option<ObjectId>> {
8959    let path = dir.join(".gitignore");
8960    match fs::read(&path) {
8961        Ok(bytes) => Ok(Some(untracked_cache_exclude_oid(bytes, format)?)),
8962        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
8963        Err(err) => Err(err.into()),
8964    }
8965}
8966
8967fn untracked_cache_oid_stat(path: &Path, format: ObjectFormat) -> Result<UntrackedCacheOidStat> {
8968    let stat = fs::symlink_metadata(path)
8969        .map(|metadata| untracked_cache_stat_data(&metadata))
8970        .unwrap_or_default();
8971    let oid = match fs::read(path) {
8972        Ok(bytes) => untracked_cache_exclude_oid(bytes, format)?,
8973        Err(err) if err.kind() == std::io::ErrorKind::NotFound => ObjectId::null(format),
8974        Err(err) => return Err(err.into()),
8975    };
8976    Ok(UntrackedCacheOidStat { stat, oid })
8977}
8978
8979fn untracked_cache_exclude_oid(mut bytes: Vec<u8>, format: ObjectFormat) -> Result<ObjectId> {
8980    if !bytes.is_empty() {
8981        bytes.push(b'\n');
8982    }
8983    EncodedObject::new(ObjectType::Blob, bytes).object_id(format)
8984}
8985
8986#[cfg(unix)]
8987fn untracked_cache_stat_data(metadata: &fs::Metadata) -> UntrackedCacheStatData {
8988    use std::os::unix::fs::MetadataExt;
8989    UntrackedCacheStatData {
8990        ctime_seconds: metadata.ctime().min(u32::MAX as i64).max(0) as u32,
8991        ctime_nanoseconds: metadata.ctime_nsec().min(u32::MAX as i64).max(0) as u32,
8992        mtime_seconds: metadata.mtime().min(u32::MAX as i64).max(0) as u32,
8993        mtime_nanoseconds: metadata.mtime_nsec().min(u32::MAX as i64).max(0) as u32,
8994        dev: metadata.dev() as u32,
8995        ino: metadata.ino() as u32,
8996        uid: metadata.uid(),
8997        gid: metadata.gid(),
8998        size: metadata.size().min(u32::MAX as u64) as u32,
8999    }
9000}
9001
9002#[cfg(not(unix))]
9003fn untracked_cache_stat_data(metadata: &fs::Metadata) -> UntrackedCacheStatData {
9004    let (mtime_seconds, mtime_nanoseconds) = file_mtime_parts(metadata).unwrap_or((0, 0));
9005    UntrackedCacheStatData {
9006        mtime_seconds: mtime_seconds.min(u64::from(u32::MAX)) as u32,
9007        mtime_nanoseconds: mtime_nanoseconds.min(u64::from(u32::MAX)) as u32,
9008        size: metadata.len().min(u64::from(u32::MAX)) as u32,
9009        ..UntrackedCacheStatData::default()
9010    }
9011}
9012
9013fn untracked_cache_dir_flags(untracked_mode: StatusUntrackedMode) -> u32 {
9014    match untracked_mode {
9015        StatusUntrackedMode::All => 0,
9016        StatusUntrackedMode::Normal | StatusUntrackedMode::None => {
9017            sley_index::untracked_cache_normal_flags()
9018        }
9019    }
9020}
9021
9022fn untracked_cache_ident(worktree_root: &Path) -> Vec<u8> {
9023    let mut ident = format!(
9024        "Location {}, system {}",
9025        worktree_root.display(),
9026        untracked_cache_system_name()
9027    )
9028    .into_bytes();
9029    ident.push(0);
9030    ident
9031}
9032
9033fn untracked_cache_system_name() -> String {
9034    fs::read_to_string("/proc/sys/kernel/ostype")
9035        .ok()
9036        .map(|name| name.trim().to_string())
9037        .filter(|name| !name.is_empty())
9038        .unwrap_or_else(|| {
9039            let os = std::env::consts::OS;
9040            let mut chars = os.chars();
9041            match chars.next() {
9042                Some(first) => first.to_uppercase().chain(chars).collect(),
9043                None => "Unknown".to_string(),
9044            }
9045        })
9046}
9047
9048fn push_untracked_directory(paths: &mut Vec<Vec<u8>>, git_path: &[u8]) {
9049    paths.push(untracked_directory_path(git_path));
9050}
9051
9052fn untracked_directory_path(git_path: &[u8]) -> Vec<u8> {
9053    let mut directory = git_path.to_vec();
9054    if directory.last() != Some(&b'/') {
9055        directory.push(b'/');
9056    }
9057    directory
9058}
9059
9060fn untracked_normal_rollup_path(
9061    file_path: &[u8],
9062    index: &BTreeMap<Vec<u8>, TrackedEntry>,
9063    ignores: &IgnoreMatcher,
9064) -> Vec<u8> {
9065    let segments = file_path
9066        .split(|byte| *byte == b'/')
9067        .filter(|segment| !segment.is_empty())
9068        .collect::<Vec<_>>();
9069    if segments.len() <= 1 {
9070        return file_path.to_vec();
9071    }
9072    let mut prefix = Vec::new();
9073    for segment in &segments[..segments.len() - 1] {
9074        if !prefix.is_empty() {
9075            prefix.push(b'/');
9076        }
9077        prefix.extend_from_slice(segment);
9078        if index_has_path_under(index, &prefix) {
9079            break;
9080        }
9081        if !ignores.is_ignored(&prefix, true) {
9082            let mut directory = prefix;
9083            directory.push(b'/');
9084            return directory;
9085        }
9086    }
9087    file_path.to_vec()
9088}
9089
9090fn ignored_traditional_rollup_path(
9091    root: &Path,
9092    git_dir: &Path,
9093    path: &[u8],
9094    index: &BTreeMap<Vec<u8>, TrackedEntry>,
9095    ignores: &IgnoreMatcher,
9096) -> Result<Vec<u8>> {
9097    let rolled = untracked_normal_rollup_path(path, index, ignores);
9098    if rolled == path {
9099        return Ok(rolled);
9100    }
9101    let Some(directory_path) = rolled.strip_suffix(b"/") else {
9102        return Ok(rolled);
9103    };
9104    if ignores.is_ignored(directory_path, true) {
9105        return Ok(rolled);
9106    }
9107    let mut absolute = PathBuf::new();
9108    set_worktree_path_from_repo_path(root, directory_path, &mut absolute)?;
9109    if directory_has_file(&absolute, root, git_dir, ignores)? {
9110        return Ok(path.to_vec());
9111    }
9112    Ok(rolled)
9113}
9114
9115fn directory_has_file(
9116    dir: &Path,
9117    root: &Path,
9118    git_dir: &Path,
9119    ignores: &IgnoreMatcher,
9120) -> Result<bool> {
9121    if is_same_path(dir, git_dir) {
9122        return Ok(false);
9123    }
9124    for entry in fs::read_dir(dir)? {
9125        let entry = entry?;
9126        let path = entry.path();
9127        if is_dot_git_entry(&path) {
9128            continue;
9129        }
9130        if is_embedded_git_internals(root, &path) {
9131            continue;
9132        }
9133        if is_same_path(&path, git_dir) {
9134            continue;
9135        }
9136        let metadata = entry.metadata()?;
9137        let relative = path.strip_prefix(root).map_err(|_| {
9138            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
9139        })?;
9140        let git_path = git_path_bytes(relative)?;
9141        if ignores.is_ignored(&git_path, metadata.is_dir()) {
9142            continue;
9143        }
9144        if metadata.is_file() || metadata.file_type().is_symlink() {
9145            return Ok(true);
9146        }
9147        if metadata.is_dir() {
9148            if is_nested_repository_boundary(&path, git_dir) {
9149                continue;
9150            }
9151            if directory_has_file(&path, root, git_dir, ignores)? {
9152                return Ok(true);
9153            }
9154        }
9155    }
9156    Ok(false)
9157}
9158
9159fn directory_has_ignored(
9160    dir: &Path,
9161    root: &Path,
9162    git_dir: &Path,
9163    ignores: &IgnoreMatcher,
9164) -> Result<bool> {
9165    if is_same_path(dir, git_dir) {
9166        return Ok(false);
9167    }
9168    for entry in fs::read_dir(dir)? {
9169        let entry = entry?;
9170        let path = entry.path();
9171        if is_dot_git_entry(&path) {
9172            continue;
9173        }
9174        if is_same_path(&path, git_dir) {
9175            continue;
9176        }
9177        let metadata = entry.metadata()?;
9178        let relative = path.strip_prefix(root).map_err(|_| {
9179            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
9180        })?;
9181        let git_path = git_path_bytes(relative)?;
9182        if ignores.is_ignored(&git_path, metadata.is_dir()) {
9183            return Ok(true);
9184        }
9185        if metadata.is_dir() && directory_has_ignored(&path, root, git_dir, ignores)? {
9186            return Ok(true);
9187        }
9188    }
9189    Ok(false)
9190}
9191
9192fn ignored_untracked_paths(
9193    root: &Path,
9194    git_dir: &Path,
9195    index: &BTreeMap<Vec<u8>, TrackedEntry>,
9196    ignores: &IgnoreMatcher,
9197    directory: bool,
9198) -> Result<Vec<Vec<u8>>> {
9199    let mut paths = BTreeSet::new();
9200    let context = IgnoredUntrackedContext {
9201        root,
9202        git_dir,
9203        index,
9204        ignores,
9205        directory,
9206    };
9207    collect_ignored_untracked_paths(&context, root, false, &mut paths)?;
9208    Ok(paths.into_iter().collect())
9209}
9210
9211fn ignored_traditional_path_is_empty_directory(root: &Path, path: &[u8]) -> Result<bool> {
9212    let Some(path) = path.strip_suffix(b"/") else {
9213        return Ok(false);
9214    };
9215    let mut absolute = PathBuf::new();
9216    set_worktree_path_from_repo_path(root, path, &mut absolute)?;
9217    match fs::read_dir(&absolute) {
9218        Ok(mut entries) => Ok(entries.next().is_none()),
9219        Err(err) if err.kind() == std::io::ErrorKind::NotADirectory => Ok(false),
9220        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(false),
9221        Err(err) => Err(err.into()),
9222    }
9223}
9224
9225struct IgnoredUntrackedContext<'a> {
9226    root: &'a Path,
9227    git_dir: &'a Path,
9228    index: &'a BTreeMap<Vec<u8>, TrackedEntry>,
9229    ignores: &'a IgnoreMatcher,
9230    directory: bool,
9231}
9232
9233fn collect_ignored_untracked_paths(
9234    context: &IgnoredUntrackedContext<'_>,
9235    dir: &Path,
9236    parent_ignored: bool,
9237    paths: &mut BTreeSet<Vec<u8>>,
9238) -> Result<()> {
9239    if is_same_path(dir, context.git_dir) {
9240        return Ok(());
9241    }
9242    let mut entries = fs::read_dir(dir)?.collect::<std::result::Result<Vec<_>, _>>()?;
9243    entries.sort_by_key(|entry| entry.file_name());
9244    for entry in entries {
9245        let path = entry.path();
9246        if is_dot_git_entry(&path) {
9247            continue;
9248        }
9249        if is_same_path(&path, context.git_dir) {
9250            continue;
9251        }
9252        let metadata = entry.metadata()?;
9253        let relative = path.strip_prefix(context.root).map_err(|_| {
9254            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
9255        })?;
9256        let git_path = git_path_bytes(relative)?;
9257        if metadata.is_dir() {
9258            let ignored = parent_ignored || context.ignores.is_ignored(&git_path, true);
9259            if ignored && !index_has_path_under(context.index, &git_path) {
9260                if context.directory || is_nested_repository_boundary(&path, context.git_dir) {
9261                    let mut directory_path = git_path;
9262                    directory_path.push(b'/');
9263                    paths.insert(directory_path);
9264                } else {
9265                    collect_ignored_untracked_paths(context, &path, true, paths)?;
9266                }
9267            } else {
9268                if is_nested_repository_boundary(&path, context.git_dir) {
9269                    continue;
9270                }
9271                collect_ignored_untracked_paths(context, &path, ignored, paths)?;
9272            }
9273        } else if !context.index.contains_key(&git_path)
9274            && (metadata.is_file() || metadata.file_type().is_symlink())
9275            && (parent_ignored || context.ignores.is_ignored(&git_path, false))
9276        {
9277            paths.insert(git_path);
9278        }
9279    }
9280    Ok(())
9281}
9282
9283#[derive(Debug, Default)]
9284struct IgnoreMatcher {
9285    patterns: Vec<IgnorePattern>,
9286    buckets: IgnorePatternBuckets,
9287}
9288
9289#[derive(Debug, Default)]
9290struct IgnorePatternBuckets {
9291    literal_basename: HashMap<Vec<u8>, Vec<usize>>,
9292    directory_literal_basename: HashMap<Vec<u8>, Vec<usize>>,
9293    literal_path_basename: HashMap<Vec<u8>, Vec<usize>>,
9294    directory_literal_path_basename: HashMap<Vec<u8>, Vec<usize>>,
9295    path_suffix_basename: HashMap<Vec<u8>, Vec<usize>>,
9296    directory_path_suffix_basename: HashMap<Vec<u8>, Vec<usize>>,
9297    glob_path_literal_basename: HashMap<Vec<u8>, Vec<usize>>,
9298    glob_directory_literal_basename: HashMap<Vec<u8>, Vec<usize>>,
9299    glob_path_suffix_basename: Vec<usize>,
9300    glob_path_prefix_basename: Vec<usize>,
9301    glob_directory_suffix_basename: Vec<usize>,
9302    glob_directory_prefix_basename: Vec<usize>,
9303    suffix_basename: HashMap<u8, Vec<usize>>,
9304    prefix_basename: HashMap<u8, Vec<usize>>,
9305    other: Vec<usize>,
9306}
9307
9308impl IgnorePatternBuckets {
9309    fn push(&mut self, index: usize, pattern: &IgnorePattern) {
9310        match pattern.bucket_kind() {
9311            IgnoreBucketKind::LiteralBasename => self
9312                .literal_basename
9313                .entry(pattern.pattern.clone())
9314                .or_default()
9315                .push(index),
9316            IgnoreBucketKind::DirectoryLiteralBasename => self
9317                .directory_literal_basename
9318                .entry(pattern.pattern.clone())
9319                .or_default()
9320                .push(index),
9321            IgnoreBucketKind::LiteralPathBasename => self
9322                .literal_path_basename
9323                .entry(path_basename(&pattern.pattern).to_vec())
9324                .or_default()
9325                .push(index),
9326            IgnoreBucketKind::DirectoryLiteralPathBasename => self
9327                .directory_literal_path_basename
9328                .entry(path_basename(&pattern.pattern).to_vec())
9329                .or_default()
9330                .push(index),
9331            IgnoreBucketKind::PathSuffixBasename => {
9332                let suffix = pattern
9333                    .pattern
9334                    .strip_prefix(b"**/")
9335                    .unwrap_or(&pattern.pattern);
9336                self.path_suffix_basename
9337                    .entry(path_basename(suffix).to_vec())
9338                    .or_default()
9339                    .push(index);
9340            }
9341            IgnoreBucketKind::DirectoryPathSuffixBasename => {
9342                let suffix = pattern
9343                    .pattern
9344                    .strip_prefix(b"**/")
9345                    .unwrap_or(&pattern.pattern);
9346                self.directory_path_suffix_basename
9347                    .entry(path_basename(suffix).to_vec())
9348                    .or_default()
9349                    .push(index);
9350            }
9351            IgnoreBucketKind::GlobPathLiteralBasename => self
9352                .glob_path_literal_basename
9353                .entry(path_basename(&pattern.pattern).to_vec())
9354                .or_default()
9355                .push(index),
9356            IgnoreBucketKind::GlobDirectoryLiteralBasename => self
9357                .glob_directory_literal_basename
9358                .entry(path_basename(&pattern.pattern).to_vec())
9359                .or_default()
9360                .push(index),
9361            IgnoreBucketKind::GlobPathSuffixBasename => self.glob_path_suffix_basename.push(index),
9362            IgnoreBucketKind::GlobPathPrefixBasename => self.glob_path_prefix_basename.push(index),
9363            IgnoreBucketKind::GlobDirectorySuffixBasename => {
9364                self.glob_directory_suffix_basename.push(index)
9365            }
9366            IgnoreBucketKind::GlobDirectoryPrefixBasename => {
9367                self.glob_directory_prefix_basename.push(index)
9368            }
9369            IgnoreBucketKind::SuffixBasename => self
9370                .suffix_basename
9371                .entry(*pattern.pattern.last().expect("suffix literal is non-empty"))
9372                .or_default()
9373                .push(index),
9374            IgnoreBucketKind::PrefixBasename => self
9375                .prefix_basename
9376                .entry(pattern.pattern[0])
9377                .or_default()
9378                .push(index),
9379            IgnoreBucketKind::Other => self.other.push(index),
9380        }
9381    }
9382
9383    fn truncate(&mut self, len: usize) {
9384        fn truncate_indices(indices: &mut Vec<usize>, len: usize) {
9385            let keep = indices.partition_point(|index| *index < len);
9386            indices.truncate(keep);
9387        }
9388        for indices in self.literal_basename.values_mut() {
9389            truncate_indices(indices, len);
9390        }
9391        for indices in self.directory_literal_basename.values_mut() {
9392            truncate_indices(indices, len);
9393        }
9394        for indices in self.literal_path_basename.values_mut() {
9395            truncate_indices(indices, len);
9396        }
9397        for indices in self.directory_literal_path_basename.values_mut() {
9398            truncate_indices(indices, len);
9399        }
9400        for indices in self.path_suffix_basename.values_mut() {
9401            truncate_indices(indices, len);
9402        }
9403        for indices in self.directory_path_suffix_basename.values_mut() {
9404            truncate_indices(indices, len);
9405        }
9406        for indices in self.glob_path_literal_basename.values_mut() {
9407            truncate_indices(indices, len);
9408        }
9409        for indices in self.glob_directory_literal_basename.values_mut() {
9410            truncate_indices(indices, len);
9411        }
9412        truncate_indices(&mut self.glob_path_suffix_basename, len);
9413        truncate_indices(&mut self.glob_path_prefix_basename, len);
9414        truncate_indices(&mut self.glob_directory_suffix_basename, len);
9415        truncate_indices(&mut self.glob_directory_prefix_basename, len);
9416        for indices in self.suffix_basename.values_mut() {
9417            truncate_indices(indices, len);
9418        }
9419        for indices in self.prefix_basename.values_mut() {
9420            truncate_indices(indices, len);
9421        }
9422        truncate_indices(&mut self.other, len);
9423    }
9424
9425    fn profile_map_count(&self) -> usize {
9426        self.literal_basename.len()
9427            + self.directory_literal_basename.len()
9428            + self.literal_path_basename.len()
9429            + self.directory_literal_path_basename.len()
9430            + self.path_suffix_basename.len()
9431            + self.directory_path_suffix_basename.len()
9432            + self.glob_path_literal_basename.len()
9433            + self.glob_directory_literal_basename.len()
9434            + self.suffix_basename.len()
9435            + self.prefix_basename.len()
9436    }
9437
9438    fn profile_index_count(&self) -> usize {
9439        fn map_indices<K>(map: &HashMap<K, Vec<usize>>) -> usize {
9440            map.values().map(Vec::len).sum()
9441        }
9442        map_indices(&self.literal_basename)
9443            + map_indices(&self.directory_literal_basename)
9444            + map_indices(&self.literal_path_basename)
9445            + map_indices(&self.directory_literal_path_basename)
9446            + map_indices(&self.path_suffix_basename)
9447            + map_indices(&self.directory_path_suffix_basename)
9448            + map_indices(&self.glob_path_literal_basename)
9449            + map_indices(&self.glob_directory_literal_basename)
9450            + self.glob_path_suffix_basename.len()
9451            + self.glob_path_prefix_basename.len()
9452            + self.glob_directory_suffix_basename.len()
9453            + self.glob_directory_prefix_basename.len()
9454            + map_indices(&self.suffix_basename)
9455            + map_indices(&self.prefix_basename)
9456            + self.other.len()
9457    }
9458
9459    fn profile_index_vec_bytes(&self) -> usize {
9460        fn map_bytes<K>(map: &HashMap<K, Vec<usize>>) -> usize {
9461            map.values()
9462                .map(|indices| indices.capacity() * std::mem::size_of::<usize>())
9463                .sum()
9464        }
9465        map_bytes(&self.literal_basename)
9466            + map_bytes(&self.directory_literal_basename)
9467            + map_bytes(&self.literal_path_basename)
9468            + map_bytes(&self.directory_literal_path_basename)
9469            + map_bytes(&self.path_suffix_basename)
9470            + map_bytes(&self.directory_path_suffix_basename)
9471            + map_bytes(&self.glob_path_literal_basename)
9472            + map_bytes(&self.glob_directory_literal_basename)
9473            + self.glob_path_suffix_basename.capacity() * std::mem::size_of::<usize>()
9474            + self.glob_path_prefix_basename.capacity() * std::mem::size_of::<usize>()
9475            + self.glob_directory_suffix_basename.capacity() * std::mem::size_of::<usize>()
9476            + self.glob_directory_prefix_basename.capacity() * std::mem::size_of::<usize>()
9477            + map_bytes(&self.suffix_basename)
9478            + map_bytes(&self.prefix_basename)
9479            + self.other.capacity() * std::mem::size_of::<usize>()
9480    }
9481}
9482
9483#[derive(Debug)]
9484struct IgnorePattern {
9485    base: Vec<u8>,
9486    pattern: Vec<u8>,
9487    original: Vec<u8>,
9488    source: Vec<u8>,
9489    line_number: usize,
9490    negated: bool,
9491    directory_only: bool,
9492    anchored: bool,
9493    has_slash: bool,
9494    /// How `pattern` should be matched against a slash-free segment. Most
9495    /// `.gitignore` entries are literals or simple `*.ext` / `prefix*` globs, all
9496    /// of which match without the allocating wildcard DP engine; only genuinely
9497    /// complex globs fall through to [`wildcard_path_matches`].
9498    match_kind: MatchKind,
9499    glob_literal_prefix_len: usize,
9500}
9501
9502/// Classification of an [`IgnorePattern`] that lets common shapes skip the
9503/// general wildcard matcher. Literal/prefix/suffix variants match a slash-free
9504/// segment; [`MatchKind::PathSuffix`] handles the common `**/literal/path`
9505/// shape, and the remaining complex patterns defer to the full engine.
9506#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9507enum MatchKind {
9508    /// No metacharacters: matches by byte equality.
9509    Literal,
9510    /// `*X` with `X` literal: matches a segment ending in `X`.
9511    Suffix,
9512    /// `X*` with `X` literal: matches a segment starting with `X`.
9513    Prefix,
9514    /// `**/X/Y` with a literal suffix: matches a path ending at `X/Y`.
9515    PathSuffix,
9516    /// Anything else: defer to [`wildcard_path_matches`].
9517    Glob,
9518}
9519
9520fn path_basename(path: &[u8]) -> &[u8] {
9521    path.rsplit(|byte| *byte == b'/').next().unwrap_or(path)
9522}
9523
9524fn path_component_has_glob_meta(component: &[u8]) -> bool {
9525    component
9526        .iter()
9527        .any(|byte| matches!(byte, b'*' | b'?' | b'[' | b'\\'))
9528}
9529
9530fn final_component_match_kind(pattern: &[u8]) -> MatchKind {
9531    classify_ignore_pattern(path_basename(pattern))
9532}
9533
9534fn visit_directory_match_components(path: &[u8], is_dir: bool, mut visit: impl FnMut(&[u8])) {
9535    let mut start = 0usize;
9536    for (index, byte) in path.iter().enumerate() {
9537        if *byte == b'/' {
9538            if index > start {
9539                visit(&path[start..index]);
9540            }
9541            start = index + 1;
9542        }
9543    }
9544    if is_dir && start < path.len() {
9545        visit(&path[start..]);
9546    }
9547}
9548
9549#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9550enum IgnoreBucketKind {
9551    LiteralBasename,
9552    DirectoryLiteralBasename,
9553    LiteralPathBasename,
9554    DirectoryLiteralPathBasename,
9555    PathSuffixBasename,
9556    DirectoryPathSuffixBasename,
9557    GlobPathLiteralBasename,
9558    GlobDirectoryLiteralBasename,
9559    GlobPathSuffixBasename,
9560    GlobPathPrefixBasename,
9561    GlobDirectorySuffixBasename,
9562    GlobDirectoryPrefixBasename,
9563    SuffixBasename,
9564    PrefixBasename,
9565    Other,
9566}
9567
9568/// Classify `pattern` for [`MatchKind`]. `*X`/`X*` fast paths require the literal
9569/// part to be slash-free so that `ends_with`/`starts_with` on a single segment is
9570/// exactly equivalent to the glob (`*` never crosses `/`).
9571fn classify_ignore_pattern(pattern: &[u8]) -> MatchKind {
9572    if let Some(suffix) = pattern.strip_prefix(b"**/")
9573        && !suffix.is_empty()
9574        && !suffix
9575            .iter()
9576            .any(|byte| matches!(byte, b'*' | b'?' | b'[' | b'\\'))
9577    {
9578        return MatchKind::PathSuffix;
9579    }
9580    let stars = pattern.iter().filter(|byte| **byte == b'*').count();
9581    let other_meta = pattern
9582        .iter()
9583        .any(|byte| matches!(byte, b'?' | b'[' | b'\\'));
9584    if stars == 0 && !other_meta {
9585        return MatchKind::Literal;
9586    }
9587    if stars == 1 && !other_meta {
9588        let literal = if pattern.first() == Some(&b'*') {
9589            Some((&pattern[1..], MatchKind::Suffix))
9590        } else if pattern.last() == Some(&b'*') {
9591            Some((&pattern[..pattern.len() - 1], MatchKind::Prefix))
9592        } else {
9593            None
9594        };
9595        if let Some((literal, kind)) = literal
9596            && !literal.is_empty()
9597            && !literal.contains(&b'/')
9598        {
9599            return kind;
9600        }
9601    }
9602    MatchKind::Glob
9603}
9604
9605impl IgnoreMatcher {
9606    fn emit_memory_profile(&self, label: &str) {
9607        let pattern_payload_bytes = self
9608            .patterns
9609            .iter()
9610            .map(|pattern| {
9611                pattern.base.capacity()
9612                    + pattern.pattern.capacity()
9613                    + pattern.original.capacity()
9614                    + pattern.source.capacity()
9615            })
9616            .sum();
9617        status_profile_mem(
9618            label,
9619            &[
9620                ("ignore_patterns_len", self.patterns.len()),
9621                ("ignore_patterns_cap", self.patterns.capacity()),
9622                (
9623                    "ignore_pattern_struct_bytes",
9624                    self.patterns.capacity() * std::mem::size_of::<IgnorePattern>(),
9625                ),
9626                ("ignore_pattern_payload_bytes", pattern_payload_bytes),
9627                ("ignore_bucket_map_count", self.buckets.profile_map_count()),
9628                (
9629                    "ignore_bucket_index_count",
9630                    self.buckets.profile_index_count(),
9631                ),
9632                (
9633                    "ignore_bucket_index_vec_bytes",
9634                    self.buckets.profile_index_vec_bytes(),
9635                ),
9636            ],
9637        );
9638    }
9639
9640    fn from_sources(
9641        root: &Path,
9642        exclude_standard: bool,
9643        patterns: &[Vec<u8>],
9644        per_directory: &[String],
9645    ) -> Result<Self> {
9646        let mut matcher = if exclude_standard {
9647            Self::from_worktree_root(root)?
9648        } else {
9649            Self::default()
9650        };
9651        matcher.extend_patterns(patterns);
9652        matcher.extend_per_directory_patterns(root, per_directory)?;
9653        Ok(matcher)
9654    }
9655
9656    /// Builds only the repository-wide ignore sources — `core.excludesFile` (or the
9657    /// default global) and `$GIT_DIR/info/exclude` — *without* walking the worktree
9658    /// for `.gitignore`. The caller folds each directory's `.gitignore` into the
9659    /// matcher as it descends (see [`read_dir_ignore_patterns`]), so status reads
9660    /// the tree exactly once instead of doing a separate full-tree ignore pass.
9661    fn from_worktree_base(root: &Path) -> Result<Self> {
9662        let mut matcher = Self::default();
9663        if !read_core_excludes_file(root, &mut matcher.patterns) {
9664            read_default_global_excludes_file(&mut matcher.patterns);
9665        }
9666        read_ignore_patterns(
9667            root.join(".git").join("info").join("exclude"),
9668            &mut matcher.patterns,
9669            &[],
9670            b".git/info/exclude",
9671        );
9672        matcher.rebuild_buckets();
9673        Ok(matcher)
9674    }
9675
9676    fn from_worktree_root(root: &Path) -> Result<Self> {
9677        let mut matcher = Self::default();
9678        if !read_core_excludes_file(root, &mut matcher.patterns) {
9679            read_default_global_excludes_file(&mut matcher.patterns);
9680        }
9681        read_ignore_patterns(
9682            root.join(".git").join("info").join("exclude"),
9683            &mut matcher.patterns,
9684            &[],
9685            b".git/info/exclude",
9686        );
9687        matcher.rebuild_buckets();
9688        collect_per_directory_patterns_into_matcher(
9689            root,
9690            root,
9691            &[String::from(".gitignore")],
9692            &mut matcher,
9693        )?;
9694        Ok(matcher)
9695    }
9696
9697    fn extend_patterns(&mut self, patterns: &[Vec<u8>]) {
9698        for pattern in patterns {
9699            self.push_raw_pattern(pattern, &[], &[], 0);
9700        }
9701    }
9702
9703    fn extend_per_directory_patterns(&mut self, root: &Path, names: &[String]) -> Result<()> {
9704        if names.is_empty() {
9705            return Ok(());
9706        }
9707        collect_per_directory_patterns_into_matcher(root, root, names, self)?;
9708        Ok(())
9709    }
9710
9711    fn is_ignored(&self, path: &[u8], is_dir: bool) -> bool {
9712        self.is_ignored_profiled(path, is_dir, None)
9713    }
9714
9715    fn match_for(&self, path: &[u8], is_dir: bool) -> Option<&IgnorePattern> {
9716        self.match_index_for(path, is_dir, None)
9717            .and_then(|index| self.patterns.get(index))
9718    }
9719
9720    fn is_ignored_profiled(
9721        &self,
9722        path: &[u8],
9723        is_dir: bool,
9724        mut profile: Option<&mut StatusProfileCounters>,
9725    ) -> bool {
9726        if let Some(profile) = profile.as_deref_mut() {
9727            profile.ignore_checks += 1;
9728        }
9729        self.match_index_for(path, is_dir, profile)
9730            .is_some_and(|index| !self.patterns[index].negated)
9731    }
9732
9733    fn match_index_for(
9734        &self,
9735        path: &[u8],
9736        is_dir: bool,
9737        mut profile: Option<&mut StatusProfileCounters>,
9738    ) -> Option<usize> {
9739        let basename = path_basename(path);
9740        let mut best = None;
9741        if let Some(indices) = self.buckets.literal_basename.get(basename) {
9742            self.match_bucket_candidates(indices, path, basename, is_dir, &mut best, &mut profile);
9743        }
9744        if let Some(indices) = self.buckets.literal_path_basename.get(basename) {
9745            self.match_bucket_candidates(indices, path, basename, is_dir, &mut best, &mut profile);
9746        }
9747        if let Some(indices) = self.buckets.path_suffix_basename.get(basename) {
9748            self.match_bucket_candidates(indices, path, basename, is_dir, &mut best, &mut profile);
9749        }
9750        if let Some(indices) = self.buckets.glob_path_literal_basename.get(basename) {
9751            self.match_bucket_candidates(indices, path, basename, is_dir, &mut best, &mut profile);
9752        }
9753        self.match_final_component_candidates(
9754            &self.buckets.glob_path_suffix_basename,
9755            MatchKind::Suffix,
9756            basename,
9757            path,
9758            basename,
9759            is_dir,
9760            &mut best,
9761            &mut profile,
9762        );
9763        self.match_final_component_candidates(
9764            &self.buckets.glob_path_prefix_basename,
9765            MatchKind::Prefix,
9766            basename,
9767            path,
9768            basename,
9769            is_dir,
9770            &mut best,
9771            &mut profile,
9772        );
9773        visit_directory_match_components(path, is_dir, |component| {
9774            if let Some(indices) = self.buckets.directory_literal_basename.get(component) {
9775                self.match_bucket_candidates(
9776                    indices,
9777                    path,
9778                    basename,
9779                    is_dir,
9780                    &mut best,
9781                    &mut profile,
9782                );
9783            }
9784            if let Some(indices) = self.buckets.directory_literal_path_basename.get(component) {
9785                self.match_bucket_candidates(
9786                    indices,
9787                    path,
9788                    basename,
9789                    is_dir,
9790                    &mut best,
9791                    &mut profile,
9792                );
9793            }
9794            if let Some(indices) = self.buckets.directory_path_suffix_basename.get(component) {
9795                self.match_bucket_candidates(
9796                    indices,
9797                    path,
9798                    basename,
9799                    is_dir,
9800                    &mut best,
9801                    &mut profile,
9802                );
9803            }
9804            if let Some(indices) = self.buckets.glob_directory_literal_basename.get(component) {
9805                self.match_bucket_candidates(
9806                    indices,
9807                    path,
9808                    basename,
9809                    is_dir,
9810                    &mut best,
9811                    &mut profile,
9812                );
9813            }
9814            self.match_final_component_candidates(
9815                &self.buckets.glob_directory_suffix_basename,
9816                MatchKind::Suffix,
9817                component,
9818                path,
9819                basename,
9820                is_dir,
9821                &mut best,
9822                &mut profile,
9823            );
9824            self.match_final_component_candidates(
9825                &self.buckets.glob_directory_prefix_basename,
9826                MatchKind::Prefix,
9827                component,
9828                path,
9829                basename,
9830                is_dir,
9831                &mut best,
9832                &mut profile,
9833            );
9834        });
9835        if let Some(last) = basename.last()
9836            && let Some(indices) = self.buckets.suffix_basename.get(last)
9837        {
9838            self.match_bucket_candidates(indices, path, basename, is_dir, &mut best, &mut profile);
9839        }
9840        if let Some(first) = basename.first()
9841            && let Some(indices) = self.buckets.prefix_basename.get(first)
9842        {
9843            self.match_bucket_candidates(indices, path, basename, is_dir, &mut best, &mut profile);
9844        }
9845        self.match_bucket_candidates(
9846            &self.buckets.other,
9847            path,
9848            basename,
9849            is_dir,
9850            &mut best,
9851            &mut profile,
9852        );
9853        best
9854    }
9855
9856    fn match_bucket_candidates(
9857        &self,
9858        indices: &[usize],
9859        path: &[u8],
9860        basename: &[u8],
9861        is_dir: bool,
9862        best: &mut Option<usize>,
9863        profile: &mut Option<&mut StatusProfileCounters>,
9864    ) {
9865        for &index in indices.iter().rev() {
9866            if best.is_some_and(|best| index <= best) {
9867                break;
9868            }
9869            let pattern = &self.patterns[index];
9870            if !pattern.base_matches(path) {
9871                continue;
9872            }
9873            if !pattern.glob_literal_prefix_matches(path, basename, is_dir) {
9874                continue;
9875            }
9876            if let Some(profile) = profile.as_deref_mut() {
9877                profile.ignore_pattern_tests += 1;
9878                if pattern.match_kind == MatchKind::Glob {
9879                    profile.ignore_glob_fallback_tests += 1;
9880                }
9881            }
9882            if pattern.matches_with_basename(path, basename, is_dir) {
9883                *best = Some(index);
9884                break;
9885            }
9886        }
9887    }
9888
9889    fn match_final_component_candidates(
9890        &self,
9891        indices: &[usize],
9892        kind: MatchKind,
9893        component: &[u8],
9894        path: &[u8],
9895        basename: &[u8],
9896        is_dir: bool,
9897        best: &mut Option<usize>,
9898        profile: &mut Option<&mut StatusProfileCounters>,
9899    ) {
9900        for &index in indices.iter().rev() {
9901            if best.is_some_and(|best| index <= best) {
9902                break;
9903            }
9904            let pattern = &self.patterns[index];
9905            if !pattern.base_matches(path) {
9906                continue;
9907            }
9908            let final_component = path_basename(&pattern.pattern);
9909            let candidate = match kind {
9910                MatchKind::Suffix => component.ends_with(&final_component[1..]),
9911                MatchKind::Prefix => {
9912                    component.starts_with(&final_component[..final_component.len() - 1])
9913                }
9914                _ => false,
9915            };
9916            if !candidate {
9917                continue;
9918            }
9919            if !pattern.glob_literal_prefix_matches(path, basename, is_dir) {
9920                continue;
9921            }
9922            if let Some(profile) = profile.as_deref_mut() {
9923                profile.ignore_pattern_tests += 1;
9924                if pattern.match_kind == MatchKind::Glob {
9925                    profile.ignore_glob_fallback_tests += 1;
9926                }
9927            }
9928            if pattern.matches_with_basename(path, basename, is_dir) {
9929                *best = Some(index);
9930                break;
9931            }
9932        }
9933    }
9934
9935    fn push_pattern(&mut self, pattern: IgnorePattern) {
9936        let index = self.patterns.len();
9937        self.buckets.push(index, &pattern);
9938        self.patterns.push(pattern);
9939    }
9940
9941    fn push_raw_pattern(&mut self, raw: &[u8], base: &[u8], source: &[u8], line_number: usize) {
9942        if let Some(pattern) = parse_ignore_pattern(raw, base, source, line_number) {
9943            self.push_pattern(pattern);
9944        }
9945    }
9946
9947    fn truncate(&mut self, len: usize) {
9948        if self.patterns.len() == len {
9949            return;
9950        }
9951        self.patterns.truncate(len);
9952        self.buckets.truncate(len);
9953    }
9954
9955    fn rebuild_buckets(&mut self) {
9956        let mut buckets = IgnorePatternBuckets::default();
9957        for (index, pattern) in self.patterns.iter().enumerate() {
9958            buckets.push(index, pattern);
9959        }
9960        self.buckets = buckets;
9961    }
9962}
9963
9964/// Decides whether a worktree path is included by a [`SparseCheckout`].
9965///
9966/// In [`SparseCheckoutMode::Full`] the sparse patterns are compiled with the
9967/// same `.gitignore` grammar used elsewhere in this crate ([`IgnorePattern`]);
9968/// a path is *in cone* when the last matching pattern is positive. In
9969/// [`SparseCheckoutMode::Cone`] the patterns are reduced to a set of recursive
9970/// directory prefixes plus a flag for whether top-level files are kept, and
9971/// inclusion is decided by literal prefix containment.
9972#[derive(Debug)]
9973enum SparseMatcher {
9974    Full { patterns: Vec<IgnorePattern> },
9975    Cone(ConeMatcher),
9976}
9977
9978#[derive(Debug, Default)]
9979struct ConeMatcher {
9980    /// `true` when files directly at the repository root are in cone (`/*`).
9981    root_files: bool,
9982    /// Directory prefixes (without leading or trailing `/`) whose entire
9983    /// subtree is in cone, e.g. `dir1/dir2`.
9984    recursive_dirs: Vec<Vec<u8>>,
9985    /// Parent directories that are in cone only for their direct files
9986    /// (the `/dir/*` guard Git emits so intermediate directories keep their
9987    /// own files). Stored without leading or trailing `/`.
9988    parent_dirs: Vec<Vec<u8>>,
9989}
9990
9991impl SparseMatcher {
9992    fn new(sparse: &SparseCheckout, mode: SparseCheckoutMode) -> Self {
9993        let resolved = match mode {
9994            SparseCheckoutMode::Auto => {
9995                if patterns_are_cone(&sparse.patterns) {
9996                    SparseCheckoutMode::Cone
9997                } else {
9998                    SparseCheckoutMode::Full
9999                }
10000            }
10001            other => other,
10002        };
10003        match resolved {
10004            SparseCheckoutMode::Cone => SparseMatcher::Cone(ConeMatcher::compile(&sparse.patterns)),
10005            // `Auto` has been resolved above; everything else is full matching.
10006            _ => {
10007                let mut patterns = Vec::new();
10008                for pattern in &sparse.patterns {
10009                    push_ignore_pattern(&mut patterns, pattern, &[], b"sparse-checkout", 0);
10010                }
10011                SparseMatcher::Full { patterns }
10012            }
10013        }
10014    }
10015
10016    /// Returns `true` when the given file path should be present in the
10017    /// worktree under this sparse specification.
10018    fn includes_file(&self, path: &[u8]) -> bool {
10019        match self {
10020            SparseMatcher::Full { patterns } => {
10021                let mut end = path.len();
10022                let mut is_dir = false;
10023                while end > 0 {
10024                    let candidate = &path[..end];
10025                    let mut matched = None;
10026                    for pattern in patterns {
10027                        if pattern.matches(candidate, is_dir) {
10028                            matched = Some(!pattern.negated);
10029                        }
10030                    }
10031                    if let Some(included) = matched {
10032                        return included;
10033                    }
10034                    let Some(slash) = candidate.iter().rposition(|byte| *byte == b'/') else {
10035                        break;
10036                    };
10037                    end = slash;
10038                    is_dir = true;
10039                }
10040                false
10041            }
10042            SparseMatcher::Cone(cone) => cone.includes_file(path),
10043        }
10044    }
10045}
10046
10047impl ConeMatcher {
10048    fn compile(patterns: &[Vec<u8>]) -> Self {
10049        let mut matcher = ConeMatcher::default();
10050        let mut positive_dirs = Vec::new();
10051        let mut guarded_parent_dirs = BTreeSet::new();
10052        for raw in patterns {
10053            let line = sparse_clean_line(raw);
10054            if line.is_empty() || line.starts_with(b"#") {
10055                continue;
10056            }
10057            if line.starts_with(b"!") {
10058                if let Some(rest) = line.strip_prefix(b"!/")
10059                    && let Some(dir) = rest.strip_suffix(b"/*/")
10060                    && !dir.is_empty()
10061                {
10062                    guarded_parent_dirs.insert(unescape_sparse_cone_dir(dir));
10063                }
10064                continue;
10065            }
10066            if line == b"/*" {
10067                matcher.root_files = true;
10068                continue;
10069            }
10070            // `/dir/` -> recursive subtree.
10071            if let Some(rest) = line.strip_prefix(b"/")
10072                && let Some(dir) = rest.strip_suffix(b"/")
10073                && !dir.is_empty()
10074            {
10075                positive_dirs.push(unescape_sparse_cone_dir(dir));
10076                continue;
10077            }
10078            // `/dir/*` -> direct files of `dir` only (parent guard).
10079            if let Some(rest) = line.strip_prefix(b"/")
10080                && let Some(dir) = rest.strip_suffix(b"/*")
10081                && !dir.is_empty()
10082            {
10083                matcher.parent_dirs.push(unescape_sparse_cone_dir(dir));
10084                continue;
10085            }
10086        }
10087        for dir in positive_dirs {
10088            if guarded_parent_dirs.contains(&dir) {
10089                matcher.parent_dirs.push(dir);
10090            } else {
10091                matcher.recursive_dirs.push(dir);
10092            }
10093        }
10094        matcher
10095    }
10096
10097    fn includes_file(&self, path: &[u8]) -> bool {
10098        let parent = match path.iter().rposition(|byte| *byte == b'/') {
10099            Some(index) => &path[..index],
10100            None => {
10101                // A path with no slash is a top-level file.
10102                return self.root_files;
10103            }
10104        };
10105        if self
10106            .recursive_dirs
10107            .iter()
10108            .any(|dir| path_is_under_dir(path, dir))
10109        {
10110            return true;
10111        }
10112        self.parent_dirs.iter().any(|dir| dir.as_slice() == parent)
10113    }
10114}
10115
10116/// Strips a CR, leading/trailing whitespace, and an optional trailing slash is
10117/// preserved (cone patterns are slash sensitive) from a raw sparse line.
10118fn sparse_clean_line(raw: &[u8]) -> &[u8] {
10119    let line = raw.strip_suffix(b"\r").unwrap_or(raw);
10120    trim_ascii_whitespace(line)
10121}
10122
10123/// Returns `true` when `path` is the directory `dir` itself or lives anywhere
10124/// beneath it.
10125fn path_is_under_dir(path: &[u8], dir: &[u8]) -> bool {
10126    if dir.is_empty() {
10127        return true;
10128    }
10129    path.strip_prefix(dir)
10130        .is_some_and(|rest| rest.first() == Some(&b'/'))
10131}
10132
10133/// Heuristic used by [`SparseCheckoutMode::Auto`]: the pattern set is cone
10134/// shaped when every (non-comment, non-blank) line is one of the restricted
10135/// cone forms Git emits.
10136fn patterns_are_cone(patterns: &[Vec<u8>]) -> bool {
10137    let mut saw_pattern = false;
10138    for raw in patterns {
10139        let line = sparse_clean_line(raw);
10140        if line.is_empty() || line.starts_with(b"#") {
10141            continue;
10142        }
10143        saw_pattern = true;
10144        let body = line.strip_prefix(b"!").unwrap_or(line);
10145        let is_cone_shaped = body == b"/*"
10146            || body == b"/*/"
10147            || (body.starts_with(b"/")
10148                && (body.ends_with(b"/") || body.ends_with(b"/*"))
10149                && !sparse_has_unescaped_glob_meta(body));
10150        if !is_cone_shaped {
10151            return false;
10152        }
10153    }
10154    saw_pattern
10155}
10156
10157/// Detects glob metacharacters that disqualify a line from cone interpretation.
10158/// A single trailing `/*` is allowed by the caller and handled separately.
10159fn sparse_has_unescaped_glob_meta(body: &[u8]) -> bool {
10160    let trimmed = body.strip_suffix(b"/*").unwrap_or(body);
10161    for (index, byte) in trimmed.iter().enumerate() {
10162        if !matches!(*byte, b'*' | b'?' | b'[' | b']' | b'\\') {
10163            continue;
10164        }
10165        let prev = index.checked_sub(1).and_then(|i| trimmed.get(i)).copied();
10166        let next = trimmed.get(index + 1).copied();
10167        if prev == Some(b'\\') {
10168            continue;
10169        }
10170        if *byte == b'\\' && matches!(next, Some(b'*' | b'?' | b'[' | b'\\')) {
10171            continue;
10172        }
10173        return true;
10174    }
10175    false
10176}
10177
10178fn unescape_sparse_cone_dir(path: &[u8]) -> Vec<u8> {
10179    let mut out = Vec::with_capacity(path.len());
10180    let mut iter = path.iter().copied();
10181    while let Some(byte) = iter.next() {
10182        if byte == b'\\'
10183            && let Some(next @ (b'*' | b'?' | b'[' | b'\\')) = iter.next()
10184        {
10185            out.push(next);
10186            continue;
10187        }
10188        out.push(byte);
10189    }
10190    out
10191}
10192
10193fn read_core_excludes_file(root: &Path, patterns: &mut Vec<IgnorePattern>) -> bool {
10194    let Ok(config) = sley_config::read_repo_config(&root.join(".git"), None) else {
10195        return false;
10196    };
10197    let Some(value) = config.get("core", None, "excludesFile") else {
10198        return false;
10199    };
10200    let path = expand_core_excludes_file(root, value);
10201    read_ignore_patterns(path, patterns, &[], value.as_bytes());
10202    true
10203}
10204
10205fn expand_core_excludes_file(root: &Path, value: &str) -> PathBuf {
10206    let path = Path::new(value);
10207    if path.is_absolute() {
10208        return path.to_path_buf();
10209    }
10210    if let Some(rest) = value.strip_prefix("~/")
10211        && let Some(home) = std::env::var_os("HOME")
10212    {
10213        return PathBuf::from(home).join(rest);
10214    }
10215    root.join(path)
10216}
10217
10218fn read_default_global_excludes_file(patterns: &mut Vec<IgnorePattern>) {
10219    if let Some(config_home) = std::env::var_os("XDG_CONFIG_HOME")
10220        && !config_home.is_empty()
10221    {
10222        let path = PathBuf::from(config_home).join("git").join("ignore");
10223        let source = path.to_string_lossy().into_owned();
10224        read_ignore_patterns(path, patterns, &[], source.as_bytes());
10225        return;
10226    }
10227    if let Some(home) = std::env::var_os("HOME") {
10228        let path = PathBuf::from(home)
10229            .join(".config")
10230            .join("git")
10231            .join("ignore");
10232        let source = path.to_string_lossy().into_owned();
10233        read_ignore_patterns(path, patterns, &[], source.as_bytes());
10234    }
10235}
10236
10237fn collect_per_directory_patterns_into_matcher(
10238    root: &Path,
10239    dir: &Path,
10240    names: &[String],
10241    matcher: &mut IgnoreMatcher,
10242) -> Result<()> {
10243    for name in names {
10244        let path = dir.join(name);
10245        let relative = dir.strip_prefix(root).map_err(|_| {
10246            GitError::InvalidPath(format!("path {} is outside worktree", dir.display()))
10247        })?;
10248        let base = git_path_bytes(relative)?;
10249        let mut source = base.clone();
10250        if !source.is_empty() {
10251            source.push(b'/');
10252        }
10253        source.extend_from_slice(name.as_bytes());
10254        read_per_directory_ignore_patterns_into_matcher(&path, matcher, &base, &source)?;
10255    }
10256
10257    let mut entries = fs::read_dir(dir)?.collect::<std::result::Result<Vec<_>, _>>()?;
10258    entries.sort_by_key(|entry| entry.file_name());
10259    for entry in entries {
10260        let path = entry.path();
10261        if path.file_name().and_then(|name| name.to_str()) == Some(".git") {
10262            continue;
10263        }
10264        let metadata = entry.file_type()?;
10265        if metadata.is_symlink() {
10266            continue;
10267        }
10268        if metadata.is_dir() {
10269            let relative = path.strip_prefix(root).map_err(|_| {
10270                GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
10271            })?;
10272            let git_path = git_path_bytes(relative)?;
10273            if !matcher.is_ignored(&git_path, true) {
10274                collect_per_directory_patterns_into_matcher(root, &path, names, matcher)?;
10275            }
10276        }
10277    }
10278    Ok(())
10279}
10280
10281fn read_ignore_patterns(
10282    path: impl AsRef<Path>,
10283    patterns: &mut Vec<IgnorePattern>,
10284    base: &[u8],
10285    source: &[u8],
10286) {
10287    let Ok(contents) = fs::read(path) else {
10288        return;
10289    };
10290    for (line, raw) in contents.split(|byte| *byte == b'\n').enumerate() {
10291        push_ignore_pattern(patterns, raw, base, source, line + 1);
10292    }
10293}
10294
10295fn read_per_directory_ignore_patterns_into_matcher(
10296    path: impl AsRef<Path>,
10297    matcher: &mut IgnoreMatcher,
10298    base: &[u8],
10299    source: &[u8],
10300) -> Result<()> {
10301    let path = path.as_ref();
10302    let metadata = match fs::symlink_metadata(path) {
10303        Ok(metadata) => metadata,
10304        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(()),
10305        Err(err) => return Err(GitError::Io(err.to_string())),
10306    };
10307    if metadata.file_type().is_symlink() {
10308        return Err(GitError::Command(format!(
10309            "unable to access '{}'",
10310            path.display()
10311        )));
10312    }
10313    if !metadata.is_file() {
10314        return Ok(());
10315    }
10316    let contents = fs::read(path)?;
10317    for (line, raw) in contents.split(|byte| *byte == b'\n').enumerate() {
10318        matcher.push_raw_pattern(raw, base, source, line + 1);
10319    }
10320    Ok(())
10321}
10322
10323fn push_ignore_pattern(
10324    patterns: &mut Vec<IgnorePattern>,
10325    raw: &[u8],
10326    base: &[u8],
10327    source: &[u8],
10328    line_number: usize,
10329) {
10330    if let Some(pattern) = parse_ignore_pattern(raw, base, source, line_number) {
10331        patterns.push(pattern);
10332    }
10333}
10334
10335fn parse_ignore_pattern(
10336    raw: &[u8],
10337    base: &[u8],
10338    source: &[u8],
10339    line_number: usize,
10340) -> Option<IgnorePattern> {
10341    let raw = if line_number == 1 {
10342        raw.strip_prefix(b"\xEF\xBB\xBF").unwrap_or(raw)
10343    } else {
10344        raw
10345    };
10346    let mut line = raw.strip_suffix(b"\r").unwrap_or(raw).to_vec();
10347    normalize_ignore_trailing_spaces(&mut line);
10348    let original = line.clone();
10349    let mut line = line.as_slice();
10350    if line.is_empty() || line.starts_with(b"#") {
10351        return None;
10352    }
10353    let negated = if line.starts_with(b"\\#") || line.starts_with(b"\\!") {
10354        line = &line[1..];
10355        false
10356    } else if let Some(pattern) = line.strip_prefix(b"!") {
10357        line = pattern;
10358        true
10359    } else {
10360        false
10361    };
10362    let directory_only = line.ends_with(b"/");
10363    let pattern = if directory_only {
10364        line.strip_suffix(b"/").unwrap_or(line)
10365    } else {
10366        line
10367    };
10368    let (anchored, pattern) = if let Some(pattern) = pattern.strip_prefix(b"/") {
10369        (true, pattern)
10370    } else {
10371        (false, pattern)
10372    };
10373    // A leading `**/` followed by a slash-free segment is, per gitignore,
10374    // identical to the bare segment ("match in all directories"): `**/Pods` ≡
10375    // `Pods`, `**/*.jks` ≡ `*.jks`. Collapse it so the pattern matches the
10376    // basename directly (a literal/suffix compare) instead of paying for the
10377    // `**` wildcard engine on the full path — verified against `git check-ignore`.
10378    let pattern = match pattern.strip_prefix(b"**/") {
10379        Some(rest) if !rest.is_empty() && !rest.contains(&b'/') => rest,
10380        _ => pattern,
10381    };
10382    if pattern.is_empty() {
10383        return None;
10384    }
10385    let match_kind = classify_ignore_pattern(pattern);
10386    let glob_literal_prefix_len = if match_kind == MatchKind::Glob {
10387        pattern
10388            .iter()
10389            .position(|byte| matches!(byte, b'*' | b'?' | b'[' | b'\\'))
10390            .unwrap_or(pattern.len())
10391    } else {
10392        0
10393    };
10394    Some(IgnorePattern {
10395        base: base.to_vec(),
10396        pattern: pattern.to_vec(),
10397        original,
10398        source: source.to_vec(),
10399        line_number,
10400        negated,
10401        directory_only,
10402        anchored,
10403        has_slash: pattern.contains(&b'/'),
10404        match_kind,
10405        glob_literal_prefix_len,
10406    })
10407}
10408
10409fn normalize_ignore_trailing_spaces(line: &mut Vec<u8>) {
10410    while line.last() == Some(&b' ') {
10411        let space_index = line.len() - 1;
10412        let backslashes = line[..space_index]
10413            .iter()
10414            .rev()
10415            .take_while(|byte| **byte == b'\\')
10416            .count();
10417        if backslashes % 2 == 1 {
10418            line.remove(space_index - 1);
10419            break;
10420        }
10421        line.pop();
10422    }
10423}
10424
10425impl IgnorePattern {
10426    fn bucket_kind(&self) -> IgnoreBucketKind {
10427        if self.match_kind == MatchKind::PathSuffix {
10428            return if self.directory_only {
10429                IgnoreBucketKind::DirectoryPathSuffixBasename
10430            } else {
10431                IgnoreBucketKind::PathSuffixBasename
10432            };
10433        }
10434        if (self.anchored || self.has_slash) && self.match_kind == MatchKind::Literal {
10435            return if self.directory_only {
10436                IgnoreBucketKind::DirectoryLiteralPathBasename
10437            } else {
10438                IgnoreBucketKind::LiteralPathBasename
10439            };
10440        }
10441        if self.has_slash
10442            && self.match_kind == MatchKind::Glob
10443            && !self.directory_only
10444            && !path_component_has_glob_meta(path_basename(&self.pattern))
10445        {
10446            return IgnoreBucketKind::GlobPathLiteralBasename;
10447        }
10448        if self.has_slash
10449            && self.match_kind == MatchKind::Glob
10450            && self.directory_only
10451            && !path_component_has_glob_meta(path_basename(&self.pattern))
10452        {
10453            return IgnoreBucketKind::GlobDirectoryLiteralBasename;
10454        }
10455        if self.has_slash && self.match_kind == MatchKind::Glob {
10456            return match (
10457                self.directory_only,
10458                final_component_match_kind(&self.pattern),
10459            ) {
10460                (false, MatchKind::Suffix) => IgnoreBucketKind::GlobPathSuffixBasename,
10461                (false, MatchKind::Prefix) => IgnoreBucketKind::GlobPathPrefixBasename,
10462                (true, MatchKind::Suffix) => IgnoreBucketKind::GlobDirectorySuffixBasename,
10463                (true, MatchKind::Prefix) => IgnoreBucketKind::GlobDirectoryPrefixBasename,
10464                _ => IgnoreBucketKind::Other,
10465            };
10466        }
10467        if self.anchored || self.has_slash {
10468            return IgnoreBucketKind::Other;
10469        }
10470        match (self.directory_only, self.match_kind) {
10471            (false, MatchKind::Literal) => IgnoreBucketKind::LiteralBasename,
10472            (true, MatchKind::Literal) => IgnoreBucketKind::DirectoryLiteralBasename,
10473            (false, MatchKind::Suffix) => IgnoreBucketKind::SuffixBasename,
10474            (false, MatchKind::Prefix) => IgnoreBucketKind::PrefixBasename,
10475            _ => IgnoreBucketKind::Other,
10476        }
10477    }
10478
10479    fn base_matches(&self, path: &[u8]) -> bool {
10480        if self.base.is_empty() {
10481            return true;
10482        }
10483        path.strip_prefix(self.base.as_slice())
10484            .is_some_and(|rest| rest.starts_with(b"/"))
10485    }
10486
10487    fn to_match(&self) -> IgnoreMatch {
10488        IgnoreMatch {
10489            source: self.source.clone(),
10490            line_number: self.line_number,
10491            pattern: self.original.clone(),
10492            ignored: !self.negated,
10493        }
10494    }
10495
10496    fn matches(&self, path: &[u8], is_dir: bool) -> bool {
10497        let basename = path_basename(path);
10498        self.matches_with_basename(path, basename, is_dir)
10499    }
10500
10501    fn glob_literal_prefix_matches(&self, path: &[u8], basename: &[u8], is_dir: bool) -> bool {
10502        if self.match_kind != MatchKind::Glob {
10503            return true;
10504        }
10505        if self.glob_literal_prefix_len == 0 {
10506            return true;
10507        }
10508        let prefix = &self.pattern[..self.glob_literal_prefix_len];
10509        let scoped_path = if self.base.is_empty() {
10510            path
10511        } else {
10512            let Some(rest) = path
10513                .strip_prefix(self.base.as_slice())
10514                .and_then(|rest| rest.strip_prefix(b"/"))
10515            else {
10516                return false;
10517            };
10518            rest
10519        };
10520        if self.anchored || self.has_slash {
10521            return scoped_path.starts_with(prefix);
10522        }
10523        if self.directory_only && !is_dir {
10524            return true;
10525        }
10526        basename.starts_with(prefix)
10527    }
10528
10529    fn matches_with_basename(&self, path: &[u8], basename: &[u8], is_dir: bool) -> bool {
10530        let path = if self.base.is_empty() {
10531            path
10532        } else {
10533            let Some(rest) = path
10534                .strip_prefix(self.base.as_slice())
10535                .and_then(|rest| rest.strip_prefix(b"/"))
10536            else {
10537                return false;
10538            };
10539            rest
10540        };
10541        if self.directory_only {
10542            return self.matches_directory(path, is_dir);
10543        }
10544        if self.anchored || self.has_slash {
10545            return self.match_segment(path);
10546        }
10547        self.match_segment(basename)
10548    }
10549
10550    fn matches_directory(&self, path: &[u8], is_dir: bool) -> bool {
10551        if self.anchored || self.has_slash {
10552            if is_dir && self.match_path(path) {
10553                return true;
10554            }
10555            // For a *file* path, a directory-only pattern can only apply
10556            // through an *ancestor* directory of the file: the leaf is matched
10557            // only because it lives inside a directory the pattern excludes
10558            // (e.g. `/tmp-*/` excludes `tmp-info-only`, so `tmp-info-only/x`
10559            // is excluded too). Upstream git models this through directory
10560            // traversal — `last_matching_pattern` skips a MUSTBEDIR pattern for
10561            // a non-directory leaf (`dtype != DT_DIR`), and a file is excluded
10562            // only when one of its parent directories is excluded.
10563            //
10564            // A *negated* directory-only pattern (`!data/**/`) re-includes a
10565            // directory but, per git, does NOT re-include the files inside it
10566            // (git's docs: "it is not possible to re-include a file if a parent
10567            // directory of that file is excluded" — re-including the dir with
10568            // `!dir/` still requires an explicit `!dir/*` to reach its files).
10569            // So a negated directory-only pattern must never match a file via
10570            // its ancestor, otherwise it wrongly wins the leaf scan and
10571            // un-ignores a file that an earlier positive pattern ignored
10572            // (t0008-ignores "directories and ** matches": `data/**` +
10573            // `!data/**/` must leave `data/data1/file1` ignored).
10574            if self.negated {
10575                return false;
10576            }
10577            return path
10578                .iter()
10579                .enumerate()
10580                .any(|(idx, byte)| *byte == b'/' && self.match_path(&path[..idx]));
10581        }
10582        let mut components = path.split(|byte| *byte == b'/').peekable();
10583        while let Some(component) = components.next() {
10584            if self.match_segment(component) && (is_dir || components.peek().is_some()) {
10585                return true;
10586            }
10587        }
10588        false
10589    }
10590
10591    fn match_path(&self, value: &[u8]) -> bool {
10592        match self.match_kind {
10593            MatchKind::Literal => self.pattern == value,
10594            MatchKind::Suffix => !value.contains(&b'/') && value.ends_with(&self.pattern[1..]),
10595            MatchKind::Prefix => {
10596                !value.contains(&b'/') && value.starts_with(&self.pattern[..self.pattern.len() - 1])
10597            }
10598            MatchKind::PathSuffix => {
10599                let suffix = &self.pattern[3..];
10600                value
10601                    .strip_suffix(suffix)
10602                    .is_some_and(|prefix| prefix.is_empty() || prefix.ends_with(b"/"))
10603            }
10604            MatchKind::Glob => wildcard_path_matches(&self.pattern, value),
10605        }
10606    }
10607
10608    /// Match a slash-free `value` (a basename or path component) against this
10609    /// pattern. Literal and simple `*X`/`X*` patterns resolve with a direct
10610    /// comparison; only complex globs pay for the allocating wildcard engine.
10611    fn match_segment(&self, value: &[u8]) -> bool {
10612        self.match_path(value)
10613    }
10614}
10615
10616thread_local! {
10617    /// Reused dynamic-programming scratch for [`wildcard_path_matches`]. Flat
10618    /// `(pattern.len()+1) * (value.len()+1)` grid of memoised results, kept across
10619    /// calls so the hot ignore/attribute matching loop never reallocates.
10620    static WILDCARD_MEMO: RefCell<Vec<Option<bool>>> = const { RefCell::new(Vec::new()) };
10621}
10622
10623fn wildcard_path_matches(pattern: &[u8], value: &[u8]) -> bool {
10624    let stride = value.len() + 1;
10625    let cells = (pattern.len() + 1) * stride;
10626    WILDCARD_MEMO.with_borrow_mut(|memo| {
10627        // One reused allocation; clearing then resizing fills the grid with `None`.
10628        memo.clear();
10629        memo.resize(cells, None);
10630        wildcard_path_matches_from(pattern, value, 0, 0, memo, stride)
10631    })
10632}
10633
10634fn wildcard_path_matches_from(
10635    pattern: &[u8],
10636    value: &[u8],
10637    pattern_index: usize,
10638    value_index: usize,
10639    memo: &mut [Option<bool>],
10640    stride: usize,
10641) -> bool {
10642    let cell = pattern_index * stride + value_index;
10643    if let Some(cached) = memo[cell] {
10644        return cached;
10645    }
10646    let matched = if pattern_index == pattern.len() {
10647        value_index == value.len()
10648    } else {
10649        match pattern[pattern_index] {
10650            b'*' if pattern.get(pattern_index + 1) == Some(&b'*') => wildcard_double_star_matches(
10651                pattern,
10652                value,
10653                pattern_index,
10654                value_index,
10655                memo,
10656                stride,
10657            ),
10658            b'*' => {
10659                if wildcard_path_matches_from(
10660                    pattern,
10661                    value,
10662                    pattern_index + 1,
10663                    value_index,
10664                    memo,
10665                    stride,
10666                ) {
10667                    true
10668                } else {
10669                    let mut next = value_index;
10670                    while next < value.len() && value[next] != b'/' {
10671                        next += 1;
10672                        if wildcard_path_matches_from(
10673                            pattern,
10674                            value,
10675                            pattern_index + 1,
10676                            next,
10677                            memo,
10678                            stride,
10679                        ) {
10680                            return true;
10681                        }
10682                    }
10683                    false
10684                }
10685            }
10686            b'?' => {
10687                value_index < value.len()
10688                    && value[value_index] != b'/'
10689                    && wildcard_path_matches_from(
10690                        pattern,
10691                        value,
10692                        pattern_index + 1,
10693                        value_index + 1,
10694                        memo,
10695                        stride,
10696                    )
10697            }
10698            b'[' => {
10699                if value_index < value.len() && value[value_index] != b'/' {
10700                    if let Some((class_matches, next_pattern_index)) =
10701                        wildcard_class_matches(pattern, pattern_index, value[value_index])
10702                    {
10703                        class_matches
10704                            && wildcard_path_matches_from(
10705                                pattern,
10706                                value,
10707                                next_pattern_index,
10708                                value_index + 1,
10709                                memo,
10710                                stride,
10711                            )
10712                    } else {
10713                        value[value_index] == b'['
10714                            && wildcard_path_matches_from(
10715                                pattern,
10716                                value,
10717                                pattern_index + 1,
10718                                value_index + 1,
10719                                memo,
10720                                stride,
10721                            )
10722                    }
10723                } else {
10724                    false
10725                }
10726            }
10727            b'\\' if pattern_index + 1 < pattern.len() => {
10728                value_index < value.len()
10729                    && pattern[pattern_index + 1] == value[value_index]
10730                    && wildcard_path_matches_from(
10731                        pattern,
10732                        value,
10733                        pattern_index + 2,
10734                        value_index + 1,
10735                        memo,
10736                        stride,
10737                    )
10738            }
10739            literal => {
10740                value_index < value.len()
10741                    && literal == value[value_index]
10742                    && wildcard_path_matches_from(
10743                        pattern,
10744                        value,
10745                        pattern_index + 1,
10746                        value_index + 1,
10747                        memo,
10748                        stride,
10749                    )
10750            }
10751        }
10752    };
10753    memo[cell] = Some(matched);
10754    matched
10755}
10756
10757fn wildcard_double_star_matches(
10758    pattern: &[u8],
10759    value: &[u8],
10760    pattern_index: usize,
10761    value_index: usize,
10762    memo: &mut [Option<bool>],
10763    stride: usize,
10764) -> bool {
10765    let after_stars = pattern_index + 2;
10766    if pattern.get(after_stars) == Some(&b'/') {
10767        if wildcard_path_matches_from(pattern, value, after_stars + 1, value_index, memo, stride) {
10768            return true;
10769        }
10770        for next in value_index..value.len() {
10771            if value[next] == b'/'
10772                && wildcard_path_matches_from(
10773                    pattern,
10774                    value,
10775                    after_stars + 1,
10776                    next + 1,
10777                    memo,
10778                    stride,
10779                )
10780            {
10781                return true;
10782            }
10783        }
10784        return false;
10785    }
10786    for next in value_index..=value.len() {
10787        if wildcard_path_matches_from(pattern, value, after_stars, next, memo, stride) {
10788            return true;
10789        }
10790    }
10791    false
10792}
10793
10794fn wildcard_class_matches(pattern: &[u8], start: usize, value: u8) -> Option<(bool, usize)> {
10795    let mut index = start + 1;
10796    let negated = matches!(pattern.get(index), Some(b'!' | b'^'));
10797    if negated {
10798        index += 1;
10799    }
10800    let class_start = index;
10801    let end = pattern[class_start..]
10802        .iter()
10803        .position(|byte| *byte == b']')
10804        .map(|position| class_start + position)?;
10805    if end == class_start {
10806        return None;
10807    }
10808    let mut matched = false;
10809    while index < end {
10810        if index + 2 < end && pattern[index + 1] == b'-' {
10811            let lower = pattern[index].min(pattern[index + 2]);
10812            let upper = pattern[index].max(pattern[index + 2]);
10813            matched |= lower <= value && value <= upper;
10814            index += 3;
10815        } else {
10816            matched |= pattern[index] == value;
10817            index += 1;
10818        }
10819    }
10820    Some((if negated { !matched } else { matched }, end + 1))
10821}
10822
10823#[derive(Debug, Default)]
10824struct AttributeMatcher {
10825    patterns: Vec<AttributePattern>,
10826    attribute_order: BTreeMap<Vec<u8>, usize>,
10827    macros: BTreeMap<Vec<u8>, Vec<AttributeAssignment>>,
10828    ignore_case: bool,
10829}
10830
10831#[derive(Debug)]
10832struct AttributePattern {
10833    base: Vec<u8>,
10834    pattern: Vec<u8>,
10835    ignore_case_pattern: Option<Vec<u8>>,
10836    anchored: bool,
10837    has_slash: bool,
10838    assignments: Vec<AttributeAssignment>,
10839}
10840
10841#[derive(Debug, Clone, PartialEq, Eq)]
10842struct AttributeAssignment {
10843    attribute: Vec<u8>,
10844    state: Option<AttributeState>,
10845}
10846
10847impl AttributeMatcher {
10848    fn from_worktree_root(root: &Path) -> Result<Self> {
10849        let mut matcher = Self::default();
10850        let git_dir = root.join(".git");
10851        matcher.configure_case_sensitivity(&git_dir);
10852        if !matcher.read_configured_attributes(root, &git_dir) {
10853            matcher.read_default_global_attributes();
10854        }
10855        collect_attribute_patterns(root, root, &mut matcher)?;
10856        read_attribute_patterns(
10857            git_dir.join("info").join("attributes"),
10858            &mut matcher,
10859            &[],
10860            b".git/info/attributes",
10861            false,
10862        );
10863        Ok(matcher)
10864    }
10865
10866    /// Builds only the repository-wide attribute sources — `core.attributesFile`
10867    /// (or the default global) and `$GIT_DIR/info/attributes` — *without* walking
10868    /// the worktree for `.gitattributes`. The caller is expected to fold each
10869    /// directory's `.gitattributes` into the matcher as it descends (see
10870    /// [`read_dir_attribute_patterns`]), so status/diff read the tree exactly once
10871    /// instead of doing a separate full-tree attribute pass. Lower-priority sources
10872    /// are added first, so in-tree patterns added during the walk take precedence —
10873    /// matching git's lookup order.
10874    fn from_worktree_base(root: &Path) -> Self {
10875        let mut matcher = Self::default();
10876        let git_dir = root.join(".git");
10877        matcher.configure_case_sensitivity(&git_dir);
10878        if !matcher.read_configured_attributes(root, &git_dir) {
10879            matcher.read_default_global_attributes();
10880        }
10881        read_attribute_patterns(
10882            git_dir.join("info").join("attributes"),
10883            &mut matcher,
10884            &[],
10885            b".git/info/attributes",
10886            false,
10887        );
10888        matcher
10889    }
10890
10891    fn attributes_for_path(
10892        &self,
10893        path: &[u8],
10894        requested: &[Vec<u8>],
10895        all: bool,
10896    ) -> Vec<AttributeCheck> {
10897        let mut states = BTreeMap::<Vec<u8>, Option<AttributeState>>::new();
10898        for pattern in &self.patterns {
10899            if !pattern.matches(path, self.ignore_case) {
10900                continue;
10901            }
10902            for assignment in &pattern.assignments {
10903                self.apply_attribute_assignment(&mut states, assignment);
10904            }
10905        }
10906        if all {
10907            let mut checks = states
10908                .into_iter()
10909                .filter_map(|(attribute, state)| {
10910                    state.map(|state| AttributeCheck {
10911                        attribute,
10912                        state: Some(state),
10913                    })
10914                })
10915                .collect::<Vec<_>>();
10916            checks.sort_by(|left, right| {
10917                attribute_all_rank(&left.attribute, &self.attribute_order)
10918                    .cmp(&attribute_all_rank(&right.attribute, &self.attribute_order))
10919                    .then_with(|| left.attribute.cmp(&right.attribute))
10920            });
10921            return checks;
10922        }
10923        requested
10924            .iter()
10925            .map(|attribute| AttributeCheck {
10926                attribute: attribute.clone(),
10927                state: states.get(attribute).cloned().flatten(),
10928            })
10929            .collect()
10930    }
10931
10932    fn push_attribute_order(&mut self, attribute: &[u8]) {
10933        let next = self.attribute_order.len();
10934        self.attribute_order
10935            .entry(attribute.to_vec())
10936            .or_insert(next);
10937    }
10938
10939    fn apply_attribute_assignment(
10940        &self,
10941        states: &mut BTreeMap<Vec<u8>, Option<AttributeState>>,
10942        assignment: &AttributeAssignment,
10943    ) {
10944        let mut stack = vec![assignment.clone()];
10945        let mut expanded = 0usize;
10946        while let Some(assignment) = stack.pop() {
10947            states.insert(assignment.attribute.clone(), assignment.state.clone());
10948            if assignment.state != Some(AttributeState::Set) {
10949                continue;
10950            }
10951            let Some(macro_assignments) = self.macros.get(&assignment.attribute) else {
10952                continue;
10953            };
10954            expanded += 1;
10955            if expanded > 10000 {
10956                break;
10957            }
10958            for macro_assignment in macro_assignments.iter().rev() {
10959                stack.push(macro_assignment.clone());
10960            }
10961        }
10962    }
10963
10964    fn configure_case_sensitivity(&mut self, git_dir: &Path) {
10965        let Ok(config) = sley_config::read_repo_config(git_dir, None) else {
10966            return;
10967        };
10968        self.ignore_case = config.get_bool("core", None, "ignorecase").unwrap_or(false);
10969    }
10970
10971    fn read_configured_attributes(&mut self, root: &Path, git_dir: &Path) -> bool {
10972        let Ok(config) = sley_config::read_repo_config(git_dir, None) else {
10973            return false;
10974        };
10975        let Some(value) = config.get("core", None, "attributesFile") else {
10976            return false;
10977        };
10978        let path = expand_core_excludes_file(root, value);
10979        read_attribute_patterns(path, self, &[], value.as_bytes(), false);
10980        true
10981    }
10982
10983    fn read_default_global_attributes(&mut self) {
10984        if let Some(config_home) = std::env::var_os("XDG_CONFIG_HOME")
10985            && !config_home.is_empty()
10986        {
10987            let path = PathBuf::from(config_home).join("git").join("attributes");
10988            let source = path.to_string_lossy().into_owned();
10989            read_attribute_patterns(path, self, &[], source.as_bytes(), false);
10990            return;
10991        }
10992        if let Some(home) = std::env::var_os("HOME") {
10993            let path = PathBuf::from(home)
10994                .join(".config")
10995                .join("git")
10996                .join("attributes");
10997            let source = path.to_string_lossy().into_owned();
10998            read_attribute_patterns(path, self, &[], source.as_bytes(), false);
10999        }
11000    }
11001}
11002
11003fn read_dir_ignore_patterns_for_base(
11004    dir: &Path,
11005    base: &[u8],
11006    matcher: &mut IgnoreMatcher,
11007) -> Result<()> {
11008    let mut source = base.to_vec();
11009    if !source.is_empty() {
11010        source.push(b'/');
11011    }
11012    source.extend_from_slice(b".gitignore");
11013    read_per_directory_ignore_patterns_into_matcher(dir.join(".gitignore"), matcher, base, &source)
11014}
11015
11016/// Fold `dir`'s `.gitattributes` (if any) into `matcher`, scoped to `dir`'s path
11017/// within `root`. Used both by the eager full-tree pass and by the status/diff
11018/// worktree walk as it descends, so the tree is read for attributes exactly once.
11019fn read_dir_attribute_patterns(
11020    root: &Path,
11021    dir: &Path,
11022    matcher: &mut AttributeMatcher,
11023) -> Result<()> {
11024    let relative = dir.strip_prefix(root).map_err(|_| {
11025        GitError::InvalidPath(format!("path {} is outside worktree", dir.display()))
11026    })?;
11027    let base = git_path_bytes(relative)?;
11028    read_dir_attribute_patterns_for_base(dir, &base, matcher)
11029}
11030
11031fn read_dir_attribute_patterns_for_base(
11032    dir: &Path,
11033    base: &[u8],
11034    matcher: &mut AttributeMatcher,
11035) -> Result<()> {
11036    let mut source = base.to_vec();
11037    if !source.is_empty() {
11038        source.push(b'/');
11039    }
11040    source.extend_from_slice(b".gitattributes");
11041    read_attribute_patterns(dir.join(".gitattributes"), matcher, base, &source, true);
11042    Ok(())
11043}
11044
11045fn collect_attribute_patterns(
11046    root: &Path,
11047    dir: &Path,
11048    matcher: &mut AttributeMatcher,
11049) -> Result<()> {
11050    read_dir_attribute_patterns(root, dir, matcher)?;
11051
11052    let mut entries = fs::read_dir(dir)?.collect::<std::result::Result<Vec<_>, _>>()?;
11053    entries.sort_by_key(|entry| entry.file_name());
11054    for entry in entries {
11055        let path = entry.path();
11056        if path.file_name().and_then(|name| name.to_str()) == Some(".git") {
11057            continue;
11058        }
11059        if entry.metadata()?.is_dir() {
11060            collect_attribute_patterns(root, &path, matcher)?;
11061        }
11062    }
11063    Ok(())
11064}
11065
11066fn read_attribute_patterns(
11067    path: impl AsRef<Path>,
11068    matcher: &mut AttributeMatcher,
11069    base: &[u8],
11070    source: &[u8],
11071    nofollow: bool,
11072) {
11073    let path = path.as_ref();
11074    if nofollow
11075        && let Ok(metadata) = fs::symlink_metadata(path)
11076        && metadata.file_type().is_symlink()
11077    {
11078        eprintln!(
11079            "warning: unable to access '{}': Too many levels of symbolic links",
11080            String::from_utf8_lossy(source)
11081        );
11082        return;
11083    }
11084    let Ok(contents) = fs::read(path) else {
11085        return;
11086    };
11087    read_attribute_patterns_from_bytes(&contents, matcher, base, source);
11088}
11089
11090fn read_attribute_patterns_from_bytes(
11091    contents: &[u8],
11092    matcher: &mut AttributeMatcher,
11093    base: &[u8],
11094    source: &[u8],
11095) {
11096    for (index, raw) in contents.split(|byte| *byte == b'\n').enumerate() {
11097        if raw.len() >= 2048 {
11098            eprintln!(
11099                "warning: ignoring overly long attributes line {}",
11100                index + 1
11101            );
11102            continue;
11103        }
11104        push_attribute_pattern(matcher, raw, base, source, index + 1);
11105    }
11106}
11107
11108fn collect_attribute_patterns_from_tree(
11109    db: &FileObjectDatabase,
11110    format: ObjectFormat,
11111    tree_oid: &ObjectId,
11112    base: Vec<u8>,
11113    matcher: &mut AttributeMatcher,
11114) -> Result<()> {
11115    let object = read_expected_object(db, tree_oid, ObjectType::Tree)?;
11116    let mut entries = Tree::parse(format, &object.body)?.entries;
11117    entries.sort_by(|left, right| left.name.cmp(&right.name));
11118    for entry in &entries {
11119        if entry.name == b".gitattributes" && tree_entry_object_type(entry.mode) == ObjectType::Blob
11120        {
11121            let object = db.read_object(&entry.oid).map_err(|err| {
11122                expect_missing_object_kind(err, entry.oid, MissingObjectKind::Blob)
11123            })?;
11124            if object.object_type == ObjectType::Blob {
11125                let source = attribute_source_for_base(&base);
11126                read_attribute_patterns_from_bytes(&object.body, matcher, &base, &source);
11127            }
11128        }
11129    }
11130    for entry in entries {
11131        if tree_entry_object_type(entry.mode) != ObjectType::Tree {
11132            continue;
11133        }
11134        let mut child_base = base.clone();
11135        if !child_base.is_empty() {
11136            child_base.push(b'/');
11137        }
11138        child_base.extend_from_slice(entry.name.as_bytes());
11139        collect_attribute_patterns_from_tree(db, format, &entry.oid, child_base, matcher)?;
11140    }
11141    Ok(())
11142}
11143
11144fn collect_attribute_patterns_from_index(
11145    git_dir: &Path,
11146    format: ObjectFormat,
11147    db: &FileObjectDatabase,
11148    matcher: &mut AttributeMatcher,
11149) -> Result<()> {
11150    let index_path = repository_index_path(git_dir);
11151    if !index_path.exists() {
11152        return Ok(());
11153    }
11154    let mut entries = Index::parse(&fs::read(index_path)?, format)?.entries;
11155    entries.sort_by(|left, right| left.path.cmp(&right.path));
11156    for entry in entries {
11157        let is_attributes_file =
11158            entry.path == b".gitattributes" || entry.path.as_bytes().ends_with(b"/.gitattributes");
11159        if index_entry_stage(&entry) != 0
11160            || tree_entry_object_type(entry.mode) != ObjectType::Blob
11161            || !is_attributes_file
11162        {
11163            continue;
11164        }
11165        let base = match entry.path.as_bytes().strip_suffix(b".gitattributes") {
11166            Some(b"") => Vec::new(),
11167            Some(parent) => parent.strip_suffix(b"/").unwrap_or(parent).to_vec(),
11168            None => continue,
11169        };
11170        let object = db
11171            .read_object(&entry.oid)
11172            .map_err(|err| expect_missing_object_kind(err, entry.oid, MissingObjectKind::Blob))?;
11173        if object.object_type == ObjectType::Blob {
11174            read_attribute_patterns_from_bytes(&object.body, matcher, &base, entry.path.as_bytes());
11175        }
11176    }
11177    Ok(())
11178}
11179
11180fn attribute_source_for_base(base: &[u8]) -> Vec<u8> {
11181    let mut source = base.to_vec();
11182    if !source.is_empty() {
11183        source.push(b'/');
11184    }
11185    source.extend_from_slice(b".gitattributes");
11186    source
11187}
11188
11189fn push_attribute_pattern(
11190    matcher: &mut AttributeMatcher,
11191    raw: &[u8],
11192    base: &[u8],
11193    source: &[u8],
11194    line_number: usize,
11195) {
11196    let line = raw.strip_suffix(b"\r").unwrap_or(raw);
11197    let line = trim_ascii_whitespace(line);
11198    if line.is_empty() || line.starts_with(b"#") {
11199        return;
11200    }
11201    let Some((raw_pattern, fields)) = split_attribute_line(line) else {
11202        return;
11203    };
11204    if let Some(macro_name) = raw_pattern.strip_prefix(b"[attr]") {
11205        if macro_name.is_empty() {
11206            return;
11207        }
11208        if is_reserved_attribute_name(macro_name) {
11209            report_invalid_attribute_name(macro_name, source, line_number);
11210            return;
11211        }
11212        let mut assignments = Vec::new();
11213        for field in fields {
11214            push_attribute_assignments(
11215                &mut assignments,
11216                &field,
11217                &matcher.macros,
11218                source,
11219                line_number,
11220            );
11221        }
11222        matcher.push_attribute_order(macro_name);
11223        for assignment in &assignments {
11224            matcher.push_attribute_order(&assignment.attribute);
11225        }
11226        matcher.macros.insert(macro_name.to_vec(), assignments);
11227        return;
11228    }
11229    let mut assignments = Vec::new();
11230    for field in fields {
11231        push_attribute_assignments(
11232            &mut assignments,
11233            &field,
11234            &matcher.macros,
11235            source,
11236            line_number,
11237        );
11238    }
11239    if assignments.is_empty() {
11240        return;
11241    }
11242    for assignment in &assignments {
11243        matcher.push_attribute_order(&assignment.attribute);
11244    }
11245    if raw_pattern.starts_with(b"!") {
11246        eprintln!(
11247            "warning: Negative patterns are ignored in git attributes\nUse '\\!' for literal leading exclamation."
11248        );
11249        return;
11250    }
11251    let raw_pattern = raw_pattern
11252        .strip_prefix(br"\!")
11253        .map(|pattern| {
11254            let mut literal = Vec::with_capacity(pattern.len() + 1);
11255            literal.push(b'!');
11256            literal.extend_from_slice(pattern);
11257            literal
11258        })
11259        .unwrap_or(raw_pattern);
11260    let (anchored, pattern) = if let Some(pattern) = raw_pattern.strip_prefix(b"/") {
11261        (true, pattern)
11262    } else {
11263        (false, raw_pattern.as_slice())
11264    };
11265    if pattern.is_empty() {
11266        return;
11267    }
11268    matcher.patterns.push(AttributePattern {
11269        base: base.to_vec(),
11270        pattern: pattern.to_vec(),
11271        ignore_case_pattern: matcher.ignore_case.then(|| ascii_lowercase(pattern)),
11272        anchored,
11273        has_slash: pattern.contains(&b'/'),
11274        assignments,
11275    });
11276}
11277
11278fn push_attribute_assignments(
11279    assignments: &mut Vec<AttributeAssignment>,
11280    field: &[u8],
11281    macros: &BTreeMap<Vec<u8>, Vec<AttributeAssignment>>,
11282    source: &[u8],
11283    line_number: usize,
11284) {
11285    if let Some(macro_assignments) = macros.get(field) {
11286        assignments.push(AttributeAssignment {
11287            attribute: field.to_vec(),
11288            state: Some(AttributeState::Set),
11289        });
11290        assignments.extend(macro_assignments.iter().cloned());
11291        return;
11292    }
11293    if field == b"binary" {
11294        assignments.push(AttributeAssignment {
11295            attribute: b"binary".to_vec(),
11296            state: Some(AttributeState::Set),
11297        });
11298        assignments.push(AttributeAssignment {
11299            attribute: b"diff".to_vec(),
11300            state: Some(AttributeState::Unset),
11301        });
11302        assignments.push(AttributeAssignment {
11303            attribute: b"merge".to_vec(),
11304            state: Some(AttributeState::Unset),
11305        });
11306        assignments.push(AttributeAssignment {
11307            attribute: b"text".to_vec(),
11308            state: Some(AttributeState::Unset),
11309        });
11310        return;
11311    }
11312    if let Some(attribute) = field.strip_prefix(b"-") {
11313        if !attribute.is_empty() {
11314            if is_reserved_attribute_name(attribute) {
11315                report_invalid_attribute_name(attribute, source, line_number);
11316                return;
11317            }
11318            assignments.push(AttributeAssignment {
11319                attribute: attribute.to_vec(),
11320                state: Some(AttributeState::Unset),
11321            });
11322        }
11323        return;
11324    }
11325    if let Some(attribute) = field.strip_prefix(b"!") {
11326        if !attribute.is_empty() {
11327            if is_reserved_attribute_name(attribute) {
11328                report_invalid_attribute_name(attribute, source, line_number);
11329                return;
11330            }
11331            assignments.push(AttributeAssignment {
11332                attribute: attribute.to_vec(),
11333                state: None,
11334            });
11335        }
11336        return;
11337    }
11338    if let Some(equal) = field.iter().position(|byte| *byte == b'=') {
11339        let attribute = &field[..equal];
11340        let value = &field[equal + 1..];
11341        if !attribute.is_empty() {
11342            if is_reserved_attribute_name(attribute) {
11343                report_invalid_attribute_name(attribute, source, line_number);
11344                return;
11345            }
11346            assignments.push(AttributeAssignment {
11347                attribute: attribute.to_vec(),
11348                state: Some(AttributeState::Value(value.to_vec())),
11349            });
11350        }
11351        return;
11352    }
11353    if is_reserved_attribute_name(field) {
11354        report_invalid_attribute_name(field, source, line_number);
11355        return;
11356    }
11357    assignments.push(AttributeAssignment {
11358        attribute: field.to_vec(),
11359        state: Some(AttributeState::Set),
11360    });
11361}
11362
11363fn split_attribute_line(line: &[u8]) -> Option<(Vec<u8>, Vec<Vec<u8>>)> {
11364    let mut index = 0;
11365    while line.get(index).is_some_and(u8::is_ascii_whitespace) {
11366        index += 1;
11367    }
11368    if index == line.len() || line[index] == b'#' {
11369        return None;
11370    }
11371    let pattern = if line[index] == b'"' {
11372        match c_unquote_prefix(&line[index..]) {
11373            Some((pattern, consumed)) => {
11374                index += consumed;
11375                pattern
11376            }
11377            None => {
11378                let start = index;
11379                while index < line.len() && !line[index].is_ascii_whitespace() {
11380                    index += 1;
11381                }
11382                line[start..index].to_vec()
11383            }
11384        }
11385    } else {
11386        let start = index;
11387        while index < line.len() && !line[index].is_ascii_whitespace() {
11388            index += 1;
11389        }
11390        line[start..index].to_vec()
11391    };
11392    let fields = line[index..]
11393        .split(|byte| byte.is_ascii_whitespace())
11394        .filter(|field| !field.is_empty())
11395        .map(Vec::from)
11396        .collect();
11397    Some((pattern, fields))
11398}
11399
11400fn c_unquote_prefix(input: &[u8]) -> Option<(Vec<u8>, usize)> {
11401    if input.first() != Some(&b'"') {
11402        return None;
11403    }
11404    let mut out = Vec::new();
11405    let mut index = 1;
11406    while index < input.len() {
11407        match input[index] {
11408            b'"' => return Some((out, index + 1)),
11409            b'\\' if index + 1 < input.len() => {
11410                index += 1;
11411                let byte = match input[index] {
11412                    b'a' => 0x07,
11413                    b'b' => 0x08,
11414                    b'f' => 0x0c,
11415                    b'n' => b'\n',
11416                    b'r' => b'\r',
11417                    b't' => b'\t',
11418                    b'v' => 0x0b,
11419                    other => other,
11420                };
11421                out.push(byte);
11422            }
11423            byte => out.push(byte),
11424        }
11425        index += 1;
11426    }
11427    None
11428}
11429
11430fn is_reserved_attribute_name(attribute: &[u8]) -> bool {
11431    attribute.starts_with(b"builtin_")
11432}
11433
11434fn report_invalid_attribute_name(attribute: &[u8], source: &[u8], line_number: usize) {
11435    eprintln!(
11436        "{} is not a valid attribute name: {}:{}",
11437        String::from_utf8_lossy(attribute),
11438        String::from_utf8_lossy(source),
11439        line_number
11440    );
11441}
11442
11443fn attribute_all_rank(
11444    attribute: &[u8],
11445    order: &BTreeMap<Vec<u8>, usize>,
11446) -> (usize, usize, Vec<u8>) {
11447    let rank = match attribute {
11448        b"binary" => 0,
11449        b"diff" => 1,
11450        b"merge" => 2,
11451        b"text" => 3,
11452        b"eol" => 5,
11453        _ => 4,
11454    };
11455    let order = order.get(attribute).copied().unwrap_or(usize::MAX);
11456    (rank, order, attribute.to_vec())
11457}
11458
11459fn trim_ascii_whitespace(mut value: &[u8]) -> &[u8] {
11460    while value.first().is_some_and(u8::is_ascii_whitespace) {
11461        value = &value[1..];
11462    }
11463    while value.last().is_some_and(u8::is_ascii_whitespace) {
11464        value = &value[..value.len() - 1];
11465    }
11466    value
11467}
11468
11469impl AttributePattern {
11470    fn matches(&self, path: &[u8], ignore_case: bool) -> bool {
11471        let path = if self.base.is_empty() {
11472            path
11473        } else {
11474            match strip_attribute_base(path, &self.base, ignore_case) {
11475                Some(rest) => rest,
11476                None => return false,
11477            }
11478        };
11479        let folded_pattern;
11480        let folded_path;
11481        let (pattern_ref, path_ref) = if ignore_case {
11482            folded_path = ascii_lowercase(path);
11483            let pattern_ref = if let Some(pattern) = self.ignore_case_pattern.as_deref() {
11484                pattern
11485            } else {
11486                folded_pattern = ascii_lowercase(&self.pattern);
11487                folded_pattern.as_slice()
11488            };
11489            (pattern_ref, folded_path.as_slice())
11490        } else {
11491            (self.pattern.as_slice(), path)
11492        };
11493        if self.anchored || self.has_slash {
11494            return wildcard_path_matches(pattern_ref, path_ref);
11495        }
11496        path_ref
11497            .rsplit(|byte| *byte == b'/')
11498            .next()
11499            .is_some_and(|basename| wildcard_path_matches(pattern_ref, basename))
11500    }
11501}
11502
11503fn strip_attribute_base<'a>(path: &'a [u8], base: &[u8], ignore_case: bool) -> Option<&'a [u8]> {
11504    if path.len() <= base.len() || path.get(base.len()) != Some(&b'/') {
11505        return None;
11506    }
11507    let prefix = &path[..base.len()];
11508    let matches = if ignore_case {
11509        prefix.eq_ignore_ascii_case(base)
11510    } else {
11511        prefix == base
11512    };
11513    matches.then_some(&path[base.len() + 1..])
11514}
11515
11516fn ascii_lowercase(value: &[u8]) -> Vec<u8> {
11517    value.iter().map(u8::to_ascii_lowercase).collect()
11518}
11519
11520// ---------------------------------------------------------------------------
11521// Content filtering on the blob <-> worktree boundary
11522//
11523// Git runs two kinds of conversion when content crosses between the worktree
11524// and the object database:
11525//
11526//   * the line-ending / `core.autocrlf` conversion (driven by the `text`,
11527//     `eol` attributes and the `core.autocrlf` / `core.eol` config), and
11528//   * the long-running `filter.<name>.clean` / `.smudge` driver filters
11529//     (selected by the `filter=<name>` attribute and configured commands).
11530//
11531// "clean" runs on the way *into* the object store (worktree -> blob), e.g. on
11532// `git add` / `git hash-object -w`. "smudge" runs on the way *out* (blob ->
11533// worktree), e.g. on checkout / restore. The driver filter, when present,
11534// wraps the EOL conversion: on clean git first runs the configured `clean`
11535// command and then applies CRLF->LF normalization; on smudge git first applies
11536// LF->CRLF and then runs the `smudge` command.
11537// ---------------------------------------------------------------------------
11538
11539/// The line-ending conversion that applies to a path, derived from its
11540/// attributes and the repository config.
11541#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11542enum EolConversion {
11543    /// No conversion: binary content, or text with `core.autocrlf=false` and no
11544    /// `eol`/`text=auto` request to add carriage returns.
11545    None,
11546    /// Normalize to LF on clean; no carriage returns on smudge (`eol=lf`, or
11547    /// `core.autocrlf=input`).
11548    Lf,
11549    /// Normalize to LF on clean; emit CRLF on smudge (`eol=crlf`, or
11550    /// `core.autocrlf=true`).
11551    Crlf,
11552}
11553
11554/// How git should decide whether a path is text for the purpose of EOL
11555/// conversion.
11556#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11557enum TextDecision {
11558    /// `-text` / `binary`: never convert.
11559    Binary,
11560    /// `text` is set explicitly: always treat as text.
11561    Text,
11562    /// `text=auto` (or implied by `core.autocrlf`): treat as text unless the
11563    /// content looks binary.
11564    Auto,
11565    /// No opinion from attributes or config: leave content untouched.
11566    Unspecified,
11567}
11568
11569/// The fully resolved set of conversions that apply to a single path.
11570#[derive(Debug, Clone, PartialEq, Eq)]
11571struct ContentFilterPlan {
11572    text: TextDecision,
11573    /// The conversion to apply when `text` resolves to "this is text".
11574    eol: EolConversion,
11575    /// Whether `$Id$` keyword collapse/expansion applies to this path.
11576    ident: bool,
11577    /// `filter.<name>` driver, if assigned via attributes and configured.
11578    driver: Option<FilterDriver>,
11579}
11580
11581#[derive(Debug, Clone, PartialEq, Eq)]
11582struct FilterDriver {
11583    name: Vec<u8>,
11584    process: Option<String>,
11585    clean: Option<String>,
11586    smudge: Option<String>,
11587    required: bool,
11588}
11589
11590/// Decode one crlf-family attribute (`text` or its legacy alias `crlf`) into a
11591/// text decision, plus whether the value form forced an EOL direction.
11592///
11593/// Mirrors git's `git_path_check_crlf` (convert.c): a *set* attribute is text,
11594/// an *unset* one is binary, `=auto` is auto, `=input` forces LF while still
11595/// counting as text, and any other value is "undefined" — i.e. no opinion, so
11596/// the caller falls through to the next source (the `crlf` alias, then config).
11597fn decode_crlf_family_attribute(state: Option<&AttributeState>) -> (TextDecision, EolConversion) {
11598    match state {
11599        Some(AttributeState::Set) => (TextDecision::Text, EolConversion::None),
11600        Some(AttributeState::Unset) => (TextDecision::Binary, EolConversion::None),
11601        Some(AttributeState::Value(value)) if value == b"auto" => {
11602            (TextDecision::Auto, EolConversion::None)
11603        }
11604        // `crlf=input` / `text=input`: text content normalized to LF (no CR on
11605        // smudge), exactly like `core.autocrlf=input`.
11606        Some(AttributeState::Value(value)) if value == b"input" => {
11607            (TextDecision::Text, EolConversion::Lf)
11608        }
11609        // `=<other>` is CRLF_UNDEFINED in git for the `crlf` alias: no opinion.
11610        _ => (TextDecision::Unspecified, EolConversion::None),
11611    }
11612}
11613
11614impl ContentFilterPlan {
11615    /// Build the plan for `path` from the parsed attributes and repo config.
11616    fn resolve(config: &GitConfig, checks: &[AttributeCheck]) -> Self {
11617        let text_attr = checks.iter().find(|check| check.attribute == b"text");
11618        let crlf_attr = checks.iter().find(|check| check.attribute == b"crlf");
11619        let ident_attr = checks.iter().find(|check| check.attribute == b"ident");
11620        let eol_attr = checks.iter().find(|check| check.attribute == b"eol");
11621        let filter_attr = checks.iter().find(|check| check.attribute == b"filter");
11622
11623        // Resolve the eol attribute first; `eol=crlf|lf` also forces text.
11624        let eol_value = eol_attr.and_then(|check| match &check.state {
11625            Some(AttributeState::Value(value)) => Some(value.clone()),
11626            _ => None,
11627        });
11628
11629        // The `text` attribute decides first; only when it is unspecified does
11630        // git consult the legacy `crlf` alias (convert.c `convert_attrs`).
11631        let mut forced_eol = EolConversion::None;
11632        let mut text = match text_attr.map(|check| &check.state) {
11633            Some(Some(AttributeState::Set)) => TextDecision::Text,
11634            Some(Some(AttributeState::Unset)) => TextDecision::Binary,
11635            Some(Some(AttributeState::Value(value))) if value == b"auto" => TextDecision::Auto,
11636            Some(Some(AttributeState::Value(value))) if value == b"input" => {
11637                forced_eol = EolConversion::Lf;
11638                TextDecision::Text
11639            }
11640            // `text=<other>` is treated by git as a set text attribute.
11641            Some(Some(AttributeState::Value(_))) => TextDecision::Text,
11642            // `!text` (unspecified) or no text attribute: fall through to `crlf`.
11643            _ => {
11644                let (decision, eol) =
11645                    decode_crlf_family_attribute(crlf_attr.and_then(|check| check.state.as_ref()));
11646                forced_eol = eol;
11647                decision
11648            }
11649        };
11650
11651        // A concrete `eol` attribute implies the path is text even when `text`
11652        // was left unspecified (git: `eol` without `text` is treated as
11653        // `text=auto`-ish; upstream forces conversion). We honour eol only when
11654        // text is not explicitly binary.
11655        let eol = match (&text, eol_value.as_deref()) {
11656            (TextDecision::Binary, _) => EolConversion::None,
11657            (_, Some(b"crlf")) => {
11658                if text == TextDecision::Unspecified {
11659                    text = TextDecision::Text;
11660                }
11661                EolConversion::Crlf
11662            }
11663            (_, Some(b"lf")) => {
11664                if text == TextDecision::Unspecified {
11665                    text = TextDecision::Text;
11666                }
11667                EolConversion::Lf
11668            }
11669            // No explicit `eol` attribute, but `text=input`/`crlf=input` already
11670            // forced the LF direction (git's CRLF_TEXT_INPUT). Honour it over the
11671            // config-derived default.
11672            _ if forced_eol == EolConversion::Lf => EolConversion::Lf,
11673            // No eol attribute: derive direction from config.
11674            _ => eol_from_config(config),
11675        };
11676
11677        // When the path is text but neither `eol` nor `core.autocrlf`/`core.eol`
11678        // asked for carriage returns, we still normalize to LF on clean. That is
11679        // modelled by `EolConversion::Lf` (clean strips CR, smudge adds none).
11680        let eol = match (&text, eol) {
11681            (TextDecision::Text | TextDecision::Auto, EolConversion::None) => EolConversion::Lf,
11682            (_, eol) => eol,
11683        };
11684
11685        // If config does not enable autocrlf and there is no eol/text opinion,
11686        // there is genuinely nothing to do.
11687        let text = match (text, eol_attr.is_some()) {
11688            (TextDecision::Unspecified, _) => {
11689                // Without any text/eol attribute, only `core.autocrlf` can make a
11690                // path eligible, and then it behaves like `text=auto`.
11691                if autocrlf_enabled(config) {
11692                    TextDecision::Auto
11693                } else {
11694                    TextDecision::Unspecified
11695                }
11696            }
11697            (text, _) => text,
11698        };
11699
11700        let driver = resolve_filter_driver(config, filter_attr);
11701        let ident = matches!(
11702            ident_attr.and_then(|check| check.state.as_ref()),
11703            Some(AttributeState::Set)
11704        );
11705
11706        ContentFilterPlan {
11707            text,
11708            eol,
11709            ident,
11710            driver,
11711        }
11712    }
11713
11714    /// Whether EOL conversion should run for the given content.
11715    fn convert_eol(&self, content: &[u8]) -> bool {
11716        match self.text {
11717            TextDecision::Binary | TextDecision::Unspecified => false,
11718            TextDecision::Text => self.eol != EolConversion::None,
11719            // `text=auto`: only when the blob does not look binary.
11720            TextDecision::Auto => self.eol != EolConversion::None && !looks_binary(content),
11721        }
11722    }
11723
11724    /// The smudge-side LF->CRLF safety check, mirroring convert.c
11725    /// `will_convert_lf_to_crlf`. Returns false (no conversion) when:
11726    ///   * there is no naked LF to convert, or
11727    ///   * the action is `text=auto`-derived (the "new safer autocrlf") AND the
11728    ///     content already contains a lone CR or a CRLF pair, or looks binary.
11729    ///
11730    /// An explicit `text`/`eol=crlf` (non-auto) path always converts naked LFs.
11731    fn will_convert_lf_to_crlf(&self, content: &[u8]) -> bool {
11732        self.will_convert_lf_to_crlf_stats(&gather_convert_stats(content))
11733    }
11734
11735    /// Stats-based variant of [`will_convert_lf_to_crlf`], mirroring convert.c
11736    /// `will_convert_lf_to_crlf(struct text_stat *, ...)`. Used by the safecrlf
11737    /// round-trip simulation, which mutates a copy of the stats rather than
11738    /// re-scanning the buffer.
11739    fn will_convert_lf_to_crlf_stats(&self, stats: &ConvertStats) -> bool {
11740        // `output_eol(crlf_action) != EOL_CRLF` short-circuits in git.
11741        if self.eol != EolConversion::Crlf {
11742            return false;
11743        }
11744        // No naked LF? Nothing to convert.
11745        if stats.lonelf == 0 {
11746            return false;
11747        }
11748        if self.text == TextDecision::Auto {
11749            // Any CR or CRLF already present: leave it untouched (irreversible).
11750            if stats.lonecr > 0 || stats.crlf > 0 {
11751                return false;
11752            }
11753            if convert_is_binary(stats) {
11754                return false;
11755            }
11756        }
11757        true
11758    }
11759
11760    /// Whether this path is a candidate for the `core.safecrlf` round-trip check
11761    /// at all: git only warns for non-`CRLF_BINARY` actions. `Binary` and
11762    /// `Unspecified` (with autocrlf off) correspond to git's `CRLF_BINARY`.
11763    fn safecrlf_applies(&self) -> bool {
11764        matches!(self.text, TextDecision::Text | TextDecision::Auto)
11765    }
11766
11767    /// Emit git's `core.safecrlf` round-trip warning for `path`, mirroring the
11768    /// stderr side-effect of convert.c `crlf_to_git` (the `CONV_EOL_RNDTRP_*`
11769    /// branch). `old_stats` are the stats of the *pre-conversion* worktree
11770    /// content (already gathered by the caller so the buffer is scanned once);
11771    /// `index_has_crlf` is whether the path's current index blob already has a
11772    /// CRLF (git's `has_crlf_in_index`, used only for the auto-crlf decision).
11773    ///
11774    /// This never inspects or alters the bytes written to the object store; it is
11775    /// purely the additive warning git prints alongside `git add`/`commit`.
11776    /// Returns `Err` only under `core.safecrlf=true` when the round-trip is
11777    /// irreversible (git `die`s).
11778    fn check_safe_crlf_stats(
11779        &self,
11780        old_stats: &ConvertStats,
11781        index_has_crlf: bool,
11782        flags: ConvFlags,
11783        path: &[u8],
11784    ) -> Result<()> {
11785        if flags == ConvFlags::Off || !self.safecrlf_applies() {
11786            return Ok(());
11787        }
11788
11789        // Replicate `crlf_to_git`'s `convert_crlf_into_lf` decision (the clean
11790        // direction). It starts as "there is a CRLF to collapse"; auto paths
11791        // suppress conversion for binary content or content whose index blob
11792        // already carries a CRLF (the "new safer autocrlf").
11793        let mut convert_crlf_into_lf = old_stats.crlf > 0;
11794        if self.text == TextDecision::Auto {
11795            if convert_is_binary(old_stats) {
11796                // git returns 0 here: no conversion *and* no warning.
11797                return Ok(());
11798            }
11799            if index_has_crlf {
11800                convert_crlf_into_lf = false;
11801            }
11802        }
11803
11804        // Simulate the round-trip on a copy of the stats.
11805        let mut new_stats = old_stats.clone();
11806        // Simulate "git add" (clean: CRLF -> LF).
11807        if convert_crlf_into_lf {
11808            new_stats.lonelf += new_stats.crlf;
11809            new_stats.crlf = 0;
11810        }
11811        // Simulate "git checkout" (smudge: LF -> CRLF).
11812        if self.will_convert_lf_to_crlf_stats(&new_stats) {
11813            new_stats.crlf += new_stats.lonelf;
11814            new_stats.lonelf = 0;
11815        }
11816        check_safe_crlf(old_stats, &new_stats, flags, path)
11817    }
11818}
11819
11820/// Derive the smudge-direction line ending from `core.autocrlf` / `core.eol`.
11821fn eol_from_config(config: &GitConfig) -> EolConversion {
11822    if let Some(value) = config.get("core", None, "autocrlf") {
11823        match value.to_ascii_lowercase().as_str() {
11824            "input" => return EolConversion::Lf,
11825            "true" | "yes" | "on" | "1" => return EolConversion::Crlf,
11826            _ => {}
11827        }
11828    }
11829    if config.get_bool("core", None, "autocrlf") == Some(true) {
11830        return EolConversion::Crlf;
11831    }
11832    match config
11833        .get("core", None, "eol")
11834        .map(|v| v.to_ascii_lowercase())
11835    {
11836        Some(ref v) if v == "crlf" => EolConversion::Crlf,
11837        Some(ref v) if v == "lf" => EolConversion::Lf,
11838        _ => EolConversion::None,
11839    }
11840}
11841
11842/// Whether `core.autocrlf` is set to anything that enables conversion
11843/// (`true` or `input`).
11844fn autocrlf_enabled(config: &GitConfig) -> bool {
11845    if let Some(value) = config.get("core", None, "autocrlf")
11846        && value.eq_ignore_ascii_case("input")
11847    {
11848        return true;
11849    }
11850    config.get_bool("core", None, "autocrlf") == Some(true)
11851}
11852
11853/// Resolve the `filter=<name>` attribute against `filter.<name>.*` config.
11854fn resolve_filter_driver(
11855    config: &GitConfig,
11856    filter_attr: Option<&AttributeCheck>,
11857) -> Option<FilterDriver> {
11858    let name = match filter_attr.map(|check| &check.state) {
11859        Some(Some(AttributeState::Value(value))) => value.clone(),
11860        // `filter` set/unset without a value selects no driver.
11861        _ => return None,
11862    };
11863    let subsection = String::from_utf8_lossy(&name).into_owned();
11864    let process = filter_config_value(config, &subsection, "process").filter(|cmd| !cmd.is_empty());
11865    let clean = filter_config_value(config, &subsection, "clean").filter(|cmd| !cmd.is_empty());
11866    let smudge = filter_config_value(config, &subsection, "smudge").filter(|cmd| !cmd.is_empty());
11867    let required = filter_config_bool(config, &subsection, "required").unwrap_or(false);
11868    // A filter with neither command and not required is a no-op.
11869    if process.is_none() && clean.is_none() && smudge.is_none() && !required {
11870        return None;
11871    }
11872    Some(FilterDriver {
11873        name,
11874        process,
11875        clean,
11876        smudge,
11877        required,
11878    })
11879}
11880
11881fn filter_config_value(config: &GitConfig, subsection: &str, key: &str) -> Option<String> {
11882    config
11883        .get("filter", Some(subsection), key)
11884        .map(str::to_owned)
11885        .or_else(|| global_filter_config_value(subsection, key))
11886}
11887
11888fn filter_config_bool(config: &GitConfig, subsection: &str, key: &str) -> Option<bool> {
11889    config
11890        .get_bool("filter", Some(subsection), key)
11891        .or_else(|| {
11892            global_filter_config_value(subsection, key)
11893                .as_deref()
11894                .and_then(sley_config::parse_config_bool)
11895        })
11896}
11897
11898fn global_filter_config_value(subsection: &str, key: &str) -> Option<String> {
11899    for (path, _) in sley_config::default_config_layer_paths().into_iter().rev() {
11900        let Ok(config) = GitConfig::read(path) else {
11901            continue;
11902        };
11903        if let Some(value) = config.get("filter", Some(subsection), key) {
11904            return Some(value.to_owned());
11905        }
11906    }
11907    None
11908}
11909
11910/// Heuristic mirroring git's `buffer_is_binary`: content is treated as binary
11911/// when a NUL byte appears within the first 8000 bytes.
11912fn looks_binary(content: &[u8]) -> bool {
11913    const FIRST_FEW_BYTES: usize = 8000;
11914    let window = &content[..content.len().min(FIRST_FEW_BYTES)];
11915    window.contains(&0)
11916}
11917
11918/// Strip carriage returns that immediately precede a line feed (CRLF -> LF).
11919/// A lone CR (old-Mac line ending) is left untouched, matching git, which only
11920/// collapses CRLF pairs.
11921fn convert_crlf_to_lf_cow(content: Cow<'_, [u8]>) -> Cow<'_, [u8]> {
11922    if !content.windows(2).any(|window| window == b"\r\n") {
11923        return content;
11924    }
11925    let mut out = Vec::with_capacity(content.len());
11926    let mut index = 0;
11927    while index < content.len() {
11928        let byte = content[index];
11929        if byte == b'\r' && content.get(index + 1) == Some(&b'\n') {
11930            // Drop the CR; the LF is emitted on the next iteration.
11931            index += 1;
11932            continue;
11933        }
11934        out.push(byte);
11935        index += 1;
11936    }
11937    Cow::Owned(out)
11938}
11939
11940/// Convert lone LF bytes to CRLF (LF -> CRLF). An LF already preceded by a CR
11941/// is left as-is so content is not double-converted, matching git.
11942fn convert_lf_to_crlf(content: &[u8]) -> Vec<u8> {
11943    let mut out = Vec::with_capacity(content.len() + content.len() / 16);
11944    let mut prev = 0u8;
11945    for &byte in content {
11946        if byte == b'\n' && prev != b'\r' {
11947            out.push(b'\r');
11948        }
11949        out.push(byte);
11950        prev = byte;
11951    }
11952    out
11953}
11954
11955/// Collapse git `$Id: ... $` keywords to `$Id$` on the clean path.
11956fn ident_to_git_cow(content: Cow<'_, [u8]>) -> Cow<'_, [u8]> {
11957    let input = content.as_ref();
11958    if !has_git_ident(input) {
11959        return content;
11960    }
11961    let mut out = Vec::with_capacity(input.len());
11962    let mut pos = 0;
11963    while let Some(relative) = input[pos..].iter().position(|byte| *byte == b'$') {
11964        let dollar = pos + relative;
11965        out.extend_from_slice(&input[pos..=dollar]);
11966        pos = dollar + 1;
11967        if input.len().saturating_sub(pos) > 3 && input[pos..].starts_with(b"Id:") {
11968            let search = &input[pos + 3..];
11969            let Some(end_relative) = search.iter().position(|byte| *byte == b'$') else {
11970                break;
11971            };
11972            let end = pos + 3 + end_relative;
11973            if input[pos + 3..end].contains(&b'\n') {
11974                continue;
11975            }
11976            out.extend_from_slice(b"Id$");
11977            pos = end + 1;
11978        }
11979    }
11980    out.extend_from_slice(&input[pos..]);
11981    Cow::Owned(out)
11982}
11983
11984/// Expand `$Id$` and git-style `$Id: <hex> $` keywords using the blob id of the
11985/// unexpanded content, matching convert.c's ident_to_worktree.
11986fn ident_to_worktree_cow(format: ObjectFormat, content: Cow<'_, [u8]>) -> Result<Cow<'_, [u8]>> {
11987    let input = content.as_ref();
11988    if !has_git_ident(input) {
11989        return Ok(content);
11990    }
11991    let oid = EncodedObject::new(ObjectType::Blob, input.to_vec()).object_id(format)?;
11992    let replacement = format!("Id: {} $", oid.to_hex());
11993    let mut out = Vec::with_capacity(input.len() + replacement.len());
11994    let mut pos = 0;
11995    while let Some(relative) = input[pos..].iter().position(|byte| *byte == b'$') {
11996        let dollar = pos + relative;
11997        out.extend_from_slice(&input[pos..=dollar]);
11998        pos = dollar + 1;
11999        if input.len().saturating_sub(pos) < 3 || !input[pos..].starts_with(b"Id") {
12000            continue;
12001        }
12002        match input.get(pos + 2) {
12003            Some(b'$') => {
12004                pos += 3;
12005            }
12006            Some(b':') => {
12007                let search = &input[pos + 3..];
12008                let Some(end_relative) = search.iter().position(|byte| *byte == b'$') else {
12009                    break;
12010                };
12011                let end = pos + 3 + end_relative;
12012                if input[pos + 3..end].contains(&b'\n') || is_foreign_ident(&input[pos + 3..end]) {
12013                    continue;
12014                }
12015                pos = end + 1;
12016            }
12017            _ => continue,
12018        }
12019        out.extend_from_slice(replacement.as_bytes());
12020    }
12021    out.extend_from_slice(&input[pos..]);
12022    Ok(Cow::Owned(out))
12023}
12024
12025fn has_git_ident(content: &[u8]) -> bool {
12026    let mut pos = 0;
12027    while let Some(relative) = content[pos..].iter().position(|byte| *byte == b'$') {
12028        let start = pos + relative + 1;
12029        if content.len().saturating_sub(start) < 3 {
12030            break;
12031        }
12032        if !content[start..].starts_with(b"Id") {
12033            pos = start;
12034            continue;
12035        }
12036        match content.get(start + 2) {
12037            Some(b'$') => return true,
12038            Some(b':') => {
12039                let search = &content[start + 3..];
12040                let Some(end_relative) = search.iter().position(|byte| *byte == b'$') else {
12041                    break;
12042                };
12043                let end = start + 3 + end_relative;
12044                if !content[start + 3..end].contains(&b'\n') {
12045                    return true;
12046                }
12047                pos = end + 1;
12048            }
12049            _ => pos = start,
12050        }
12051    }
12052    false
12053}
12054
12055fn is_foreign_ident(expansion: &[u8]) -> bool {
12056    if expansion.len() <= 1 {
12057        return false;
12058    }
12059    expansion[1..expansion.len().saturating_sub(1)].contains(&b' ')
12060}
12061
12062/// Run a configured `clean`/`smudge` command as a subprocess, feeding `content`
12063/// on stdin and returning its stdout. Errors carry enough context for the
12064/// caller to decide whether the failure is fatal (required filter) or should be
12065/// silently ignored (optional filter passthrough).
12066fn run_filter_command(command: &str, path: &[u8], content: &[u8]) -> Result<Vec<u8>> {
12067    // Git expands `%f` in the filter command to the path of the file being
12068    // filtered (quoted). We perform the same substitution.
12069    let display_path = String::from_utf8_lossy(path);
12070    let expanded = command.replace("%f", &shell_quote(&display_path));
12071    // Run through the platform shell so pipelines / arguments in the configured
12072    // command behave the same way git's `run_command`-with-shell does.
12073    let (shell, flag) = if cfg!(windows) {
12074        ("cmd", "/C")
12075    } else {
12076        ("/bin/sh", "-c")
12077    };
12078    let mut child = Command::new(shell)
12079        .arg(flag)
12080        .arg(&expanded)
12081        .stdin(Stdio::piped())
12082        .stdout(Stdio::piped())
12083        .stderr(Stdio::piped())
12084        .spawn()
12085        .map_err(|err| GitError::Command(format!("failed to spawn filter `{command}`: {err}")))?;
12086    // Write the content to the child's stdin on a separate thread so we never
12087    // deadlock against a filter that streams output before consuming all input.
12088    let mut stdin = child
12089        .stdin
12090        .take()
12091        .ok_or_else(|| GitError::Command(format!("filter `{command}` stdin unavailable")))?;
12092    let payload = content.to_vec();
12093    let writer = std::thread::spawn(move || {
12094        let _ = stdin.write_all(&payload);
12095        // Dropping `stdin` here closes the pipe so the child sees EOF.
12096    });
12097    let output = child
12098        .wait_with_output()
12099        .map_err(|err| GitError::Command(format!("filter `{command}` failed: {err}")))?;
12100    // Join the writer; its own errors (e.g. broken pipe) are non-fatal because
12101    // the child's exit status is the authoritative signal.
12102    let _ = writer.join();
12103    if !output.status.success() {
12104        let stderr = String::from_utf8_lossy(&output.stderr);
12105        return Err(GitError::Command(format!(
12106            "filter `{command}` exited with {}: {}",
12107            output.status,
12108            stderr.trim()
12109        )));
12110    }
12111    Ok(output.stdout)
12112}
12113
12114const PROCESS_CAP_CLEAN: u8 = 1;
12115const PROCESS_CAP_SMUDGE: u8 = 1 << 1;
12116const PROCESS_CAP_DELAY: u8 = 1 << 2;
12117const PKT_DATA_MAX: usize = 65_516;
12118
12119static PROCESS_FILTERS: OnceLock<Mutex<HashMap<String, ProcessFilter>>> = OnceLock::new();
12120type ProcessFilterMetadata = Vec<(String, String)>;
12121static PROCESS_FILTER_METADATA: OnceLock<Mutex<Option<ProcessFilterMetadata>>> = OnceLock::new();
12122
12123struct ProcessFilterMetadataGuard {
12124    previous: Option<ProcessFilterMetadata>,
12125}
12126
12127impl Drop for ProcessFilterMetadataGuard {
12128    fn drop(&mut self) {
12129        if let Ok(mut guard) = PROCESS_FILTER_METADATA
12130            .get_or_init(|| Mutex::new(None))
12131            .lock()
12132        {
12133            *guard = self.previous.take();
12134        }
12135    }
12136}
12137
12138fn set_process_filter_metadata(
12139    metadata: Option<ProcessFilterMetadata>,
12140) -> ProcessFilterMetadataGuard {
12141    let mutex = PROCESS_FILTER_METADATA.get_or_init(|| Mutex::new(None));
12142    let previous = mutex
12143        .lock()
12144        .map(|mut guard| std::mem::replace(&mut *guard, metadata))
12145        .unwrap_or(None);
12146    ProcessFilterMetadataGuard { previous }
12147}
12148
12149fn current_process_filter_metadata() -> Option<ProcessFilterMetadata> {
12150    PROCESS_FILTER_METADATA
12151        .get_or_init(|| Mutex::new(None))
12152        .lock()
12153        .ok()
12154        .and_then(|guard| guard.clone())
12155}
12156
12157struct ProcessFilter {
12158    child: Child,
12159    stdin: ChildStdin,
12160    stdout: ChildStdout,
12161    capabilities: u8,
12162}
12163
12164enum ProcessFilterOutcome {
12165    Filtered(Vec<u8>),
12166    Unsupported,
12167    Status(String),
12168}
12169
12170struct ProcessFilterFailure {
12171    message: String,
12172    protocol: bool,
12173}
12174
12175impl ProcessFilterFailure {
12176    fn protocol(message: impl Into<String>) -> Self {
12177        Self {
12178            message: message.into(),
12179            protocol: true,
12180        }
12181    }
12182}
12183
12184fn run_process_filter(
12185    command: &str,
12186    direction: &str,
12187    path: &[u8],
12188    content: &[u8],
12189    blob: Option<ObjectId>,
12190) -> std::result::Result<ProcessFilterOutcome, ProcessFilterFailure> {
12191    let filters = PROCESS_FILTERS.get_or_init(|| Mutex::new(HashMap::new()));
12192    let mut filters = filters
12193        .lock()
12194        .map_err(|_| ProcessFilterFailure::protocol("process filter cache poisoned"))?;
12195    if !filters.contains_key(command) {
12196        let filter = ProcessFilter::start(command)?;
12197        filters.insert(command.to_string(), filter);
12198    }
12199    let result = filters
12200        .get_mut(command)
12201        .expect("process filter was inserted")
12202        .apply(direction, path, content, blob);
12203    if result.as_ref().is_err_and(|err| err.protocol) {
12204        filters.remove(command);
12205    }
12206    result
12207}
12208
12209impl ProcessFilter {
12210    fn start(command: &str) -> std::result::Result<Self, ProcessFilterFailure> {
12211        let (shell, flag) = if cfg!(windows) {
12212            ("cmd", "/C")
12213        } else {
12214            ("/bin/sh", "-c")
12215        };
12216        let mut child = Command::new(shell)
12217            .arg(flag)
12218            .arg(command)
12219            .stdin(Stdio::piped())
12220            .stdout(Stdio::piped())
12221            .stderr(Stdio::inherit())
12222            .spawn()
12223            .map_err(|err| {
12224                ProcessFilterFailure::protocol(format!(
12225                    "cannot fork to run subprocess '{command}': {err}"
12226                ))
12227            })?;
12228        let mut stdin = child
12229            .stdin
12230            .take()
12231            .ok_or_else(|| ProcessFilterFailure::protocol("process filter stdin unavailable"))?;
12232        let mut stdout = child
12233            .stdout
12234            .take()
12235            .ok_or_else(|| ProcessFilterFailure::protocol("process filter stdout unavailable"))?;
12236
12237        write_pkt_text(&mut stdin, "git-filter-client\n")?;
12238        write_pkt_text(&mut stdin, "version=2\n")?;
12239        write_flush(&mut stdin)?;
12240
12241        let line = read_pkt_text(&mut stdout)?.ok_or_else(|| {
12242            ProcessFilterFailure::protocol(
12243                "Unexpected line '<flush packet>', expected git-filter-server",
12244            )
12245        })?;
12246        if line != "git-filter-server" {
12247            return Err(ProcessFilterFailure::protocol(format!(
12248                "Unexpected line '{line}', expected git-filter-server"
12249            )));
12250        }
12251        let line = read_pkt_text(&mut stdout)?.ok_or_else(|| {
12252            ProcessFilterFailure::protocol("Unexpected line '<flush packet>', expected version")
12253        })?;
12254        if line != "version=2" {
12255            return Err(ProcessFilterFailure::protocol(format!(
12256                "Unexpected line '{line}', expected version"
12257            )));
12258        }
12259        if let Some(line) = read_pkt_text(&mut stdout)? {
12260            return Err(ProcessFilterFailure::protocol(format!(
12261                "Unexpected line '{line}', expected flush"
12262            )));
12263        }
12264
12265        write_pkt_text(&mut stdin, "capability=clean\n")?;
12266        write_pkt_text(&mut stdin, "capability=smudge\n")?;
12267        write_pkt_text(&mut stdin, "capability=delay\n")?;
12268        write_flush(&mut stdin)?;
12269
12270        let mut capabilities = 0;
12271        while let Some(line) = read_pkt_text(&mut stdout)? {
12272            match line.as_str() {
12273                "capability=clean" => capabilities |= PROCESS_CAP_CLEAN,
12274                "capability=smudge" => capabilities |= PROCESS_CAP_SMUDGE,
12275                "capability=delay" => capabilities |= PROCESS_CAP_DELAY,
12276                _ => {}
12277            }
12278        }
12279
12280        Ok(Self {
12281            child,
12282            stdin,
12283            stdout,
12284            capabilities,
12285        })
12286    }
12287
12288    fn apply(
12289        &mut self,
12290        direction: &str,
12291        path: &[u8],
12292        content: &[u8],
12293        blob: Option<ObjectId>,
12294    ) -> std::result::Result<ProcessFilterOutcome, ProcessFilterFailure> {
12295        let wanted = match direction {
12296            "clean" => PROCESS_CAP_CLEAN,
12297            "smudge" => PROCESS_CAP_SMUDGE,
12298            _ => 0,
12299        };
12300        if self.capabilities & wanted == 0 {
12301            return Ok(ProcessFilterOutcome::Unsupported);
12302        }
12303
12304        write_pkt_text(&mut self.stdin, &format!("command={direction}\n"))?;
12305        write_pkt_text(
12306            &mut self.stdin,
12307            &format!("pathname={}\n", String::from_utf8_lossy(path)),
12308        )?;
12309        if direction == "smudge"
12310            && let Some(blob) = blob
12311        {
12312            if let Some(metadata) = current_process_filter_metadata() {
12313                for (key, value) in metadata {
12314                    write_pkt_text(&mut self.stdin, &format!("{key}={value}\n"))?;
12315                }
12316            }
12317            write_pkt_text(&mut self.stdin, &format!("blob={}\n", blob.to_hex()))?;
12318        }
12319        write_flush(&mut self.stdin)?;
12320        write_pkt_content(&mut self.stdin, content)?;
12321        write_flush(&mut self.stdin)?;
12322
12323        let mut status = read_process_status(&mut self.stdout)?.unwrap_or_default();
12324        match status.as_str() {
12325            "success" => {}
12326            "error" | "abort" | "delayed" => return Ok(ProcessFilterOutcome::Status(status)),
12327            other => {
12328                return Err(ProcessFilterFailure::protocol(format!(
12329                    "external filter returned unsupported status '{other}'"
12330                )));
12331            }
12332        }
12333
12334        let output = read_pkt_content(&mut self.stdout)?;
12335        if let Some(next) = read_process_status(&mut self.stdout)? {
12336            status = next;
12337        }
12338        match status.as_str() {
12339            "" | "success" => Ok(ProcessFilterOutcome::Filtered(output)),
12340            "error" | "abort" | "delayed" => Ok(ProcessFilterOutcome::Status(status)),
12341            other => Err(ProcessFilterFailure::protocol(format!(
12342                "external filter returned unsupported status '{other}'"
12343            ))),
12344        }
12345    }
12346}
12347
12348impl Drop for ProcessFilter {
12349    fn drop(&mut self) {
12350        let _ = self.stdin.flush();
12351        let _ = self.child.kill();
12352        let _ = self.child.wait();
12353    }
12354}
12355
12356fn write_pkt_text(
12357    writer: &mut ChildStdin,
12358    text: &str,
12359) -> std::result::Result<(), ProcessFilterFailure> {
12360    write_pkt_data(writer, text.as_bytes())
12361}
12362
12363fn write_pkt_content(
12364    writer: &mut ChildStdin,
12365    content: &[u8],
12366) -> std::result::Result<(), ProcessFilterFailure> {
12367    for chunk in content.chunks(PKT_DATA_MAX) {
12368        write_pkt_data(writer, chunk)?;
12369    }
12370    Ok(())
12371}
12372
12373fn write_pkt_data(
12374    writer: &mut ChildStdin,
12375    data: &[u8],
12376) -> std::result::Result<(), ProcessFilterFailure> {
12377    let len = data.len() + 4;
12378    write!(writer, "{len:04x}")
12379        .and_then(|_| writer.write_all(data))
12380        .map_err(|err| {
12381            ProcessFilterFailure::protocol(format!("process filter write failed: {err}"))
12382        })
12383}
12384
12385fn write_flush(writer: &mut ChildStdin) -> std::result::Result<(), ProcessFilterFailure> {
12386    writer
12387        .write_all(b"0000")
12388        .and_then(|_| writer.flush())
12389        .map_err(|err| {
12390            ProcessFilterFailure::protocol(format!("process filter write failed: {err}"))
12391        })
12392}
12393
12394fn read_pkt_text(
12395    reader: &mut ChildStdout,
12396) -> std::result::Result<Option<String>, ProcessFilterFailure> {
12397    let Some(mut data) = read_pkt_data(reader)? else {
12398        return Ok(None);
12399    };
12400    if data.last() == Some(&b'\n') {
12401        data.pop();
12402    }
12403    Ok(Some(String::from_utf8_lossy(&data).into_owned()))
12404}
12405
12406fn read_pkt_content(
12407    reader: &mut ChildStdout,
12408) -> std::result::Result<Vec<u8>, ProcessFilterFailure> {
12409    let mut out = Vec::new();
12410    while let Some(data) = read_pkt_data(reader)? {
12411        out.extend_from_slice(&data);
12412    }
12413    Ok(out)
12414}
12415
12416fn read_pkt_data(
12417    reader: &mut ChildStdout,
12418) -> std::result::Result<Option<Vec<u8>>, ProcessFilterFailure> {
12419    let mut header = [0u8; 4];
12420    reader.read_exact(&mut header).map_err(|err| {
12421        ProcessFilterFailure::protocol(format!("process filter read failed: {err}"))
12422    })?;
12423    let header = std::str::from_utf8(&header)
12424        .map_err(|err| ProcessFilterFailure::protocol(format!("invalid pkt-line header: {err}")))?;
12425    let len = usize::from_str_radix(header, 16)
12426        .map_err(|err| ProcessFilterFailure::protocol(format!("invalid pkt-line length: {err}")))?;
12427    if len == 0 {
12428        return Ok(None);
12429    }
12430    if len < 4 {
12431        return Err(ProcessFilterFailure::protocol(format!(
12432            "invalid pkt-line length {len}"
12433        )));
12434    }
12435    let mut data = vec![0; len - 4];
12436    reader.read_exact(&mut data).map_err(|err| {
12437        ProcessFilterFailure::protocol(format!("process filter read failed: {err}"))
12438    })?;
12439    Ok(Some(data))
12440}
12441
12442fn read_process_status(
12443    reader: &mut ChildStdout,
12444) -> std::result::Result<Option<String>, ProcessFilterFailure> {
12445    let mut status = None;
12446    while let Some(line) = read_pkt_text(reader)? {
12447        if let Some(value) = line.strip_prefix("status=") {
12448            status = Some(value.to_string());
12449        }
12450    }
12451    Ok(status)
12452}
12453
12454/// Minimal POSIX single-quote escaping for substituting `%f` into a shell
12455/// command (used only for the path passed to driver filters).
12456fn shell_quote(value: &str) -> String {
12457    let mut out = String::with_capacity(value.len() + 2);
12458    out.push('\'');
12459    for ch in value.chars() {
12460        if ch == '\'' {
12461            out.push_str("'\\''");
12462        } else {
12463            out.push(ch);
12464        }
12465    }
12466    out.push('\'');
12467    out
12468}
12469
12470/// Apply the *clean* conversion to `content` for `path` (worktree -> blob):
12471/// first the configured `filter.<name>.clean` driver (if any), then CRLF->LF
12472/// normalization when EOL conversion applies.
12473///
12474/// `config` is the repository config (`GitConfig`) and `path` is the
12475/// repository-relative path of the file (forward-slash separated, e.g.
12476/// `src/main.rs`). When no filter or EOL conversion applies the input is
12477/// returned unchanged.
12478///
12479/// A *required* driver (`filter.<name>.required=true`) whose `clean` command is
12480/// missing or fails produces a [`GitError::Command`]; a non-required driver
12481/// failure (or absence of a `clean` command) passes the content through
12482/// unfiltered, matching git.
12483pub fn apply_clean_filter(
12484    worktree_root: impl AsRef<Path>,
12485    git_dir: impl AsRef<Path>,
12486    config: &GitConfig,
12487    path: &[u8],
12488    content: &[u8],
12489) -> Result<Vec<u8>> {
12490    // On clean the worktree file exists, so the live `.gitattributes` chain is
12491    // authoritative. `git_dir` is accepted for symmetry with the smudge entry
12492    // point (which falls back to the index) and for future use.
12493    let _ = git_dir.as_ref();
12494    let checks = filter_attribute_checks(worktree_root.as_ref(), path)?;
12495    apply_clean_filter_with_attributes(config, &checks, path, content)
12496}
12497
12498/// A reusable handle that captures the worktree's `.gitattributes` chain once so
12499/// repeated clean-filter calls (e.g. `hash-object --stdin-paths` hashing many
12500/// paths in one process) don't re-walk the worktree and re-read every
12501/// `.gitattributes`/global config per path.
12502///
12503/// Build it once with [`WorktreeAttributes::from_worktree_root`], then call
12504/// [`WorktreeAttributes::apply_clean_filter`] per path. This mirrors
12505/// [`apply_clean_filter`] exactly except the expensive attribute-source scan is
12506/// amortized across calls.
12507pub struct WorktreeAttributes {
12508    matcher: AttributeMatcher,
12509}
12510
12511impl WorktreeAttributes {
12512    /// Read the worktree's attribute sources once (global/`core.attributesFile`,
12513    /// every in-tree `.gitattributes`, and `$GIT_DIR/info/attributes`).
12514    pub fn from_worktree_root(worktree_root: impl AsRef<Path>) -> Result<Self> {
12515        Ok(Self {
12516            matcher: AttributeMatcher::from_worktree_root(worktree_root.as_ref())?,
12517        })
12518    }
12519
12520    /// Apply the clean conversion to `content` for `path`, reusing the cached
12521    /// attribute chain. Behaviourally identical to [`apply_clean_filter`].
12522    pub fn apply_clean_filter(
12523        &self,
12524        config: &GitConfig,
12525        path: &[u8],
12526        content: &[u8],
12527    ) -> Result<Vec<u8>> {
12528        let checks = self
12529            .matcher
12530            .attributes_for_path(path, &filter_attribute_names(), false);
12531        apply_clean_filter_with_attributes(config, &checks, path, content)
12532    }
12533}
12534
12535/// A reusable handle that captures a *tree's* `.gitattributes` chain once so
12536/// repeated smudge-filter calls (e.g. `git archive` streaming every blob in a
12537/// tree) resolve attributes from the tree being processed rather than the live
12538/// worktree.
12539///
12540/// This is the attribute direction `git archive` uses: upstream unpacks the
12541/// archived tree into a scratch index and sets `GIT_ATTR_INDEX`, so the
12542/// `.gitattributes` that govern conversion come from the *archived tree* (plus
12543/// the global/`core.attributesFile` chain and `$GIT_DIR/info/attributes`), not
12544/// from whatever happens to be checked out. `--worktree-attributes` callers
12545/// should use [`WorktreeAttributes`] instead.
12546///
12547/// Build it once with [`TreeAttributes::from_tree`], then call
12548/// [`TreeAttributes::apply_smudge_filter`] per blob. Behaviourally this mirrors
12549/// [`apply_smudge_filter`] except the attribute source is the supplied tree and
12550/// the expensive source scan is amortized across calls.
12551pub struct TreeAttributes {
12552    matcher: AttributeMatcher,
12553}
12554
12555impl TreeAttributes {
12556    /// Read the attribute sources for `tree_oid` once: the global /
12557    /// `core.attributesFile` chain, every `.gitattributes` blob found while
12558    /// walking `tree_oid`, and `$GIT_DIR/info/attributes`.
12559    ///
12560    /// `attr_root` locates the global config (`read_configured_attributes`);
12561    /// pass the worktree root for a non-bare repo, or the git dir for a bare
12562    /// one. `git_dir` locates `info/attributes` directly (so this works for bare
12563    /// repos, where there is no nested `.git`). No worktree `.gitattributes`
12564    /// files are read — use [`WorktreeAttributes`] for the
12565    /// `--worktree-attributes` direction.
12566    pub fn from_tree(
12567        attr_root: impl AsRef<Path>,
12568        git_dir: impl AsRef<Path>,
12569        db: &FileObjectDatabase,
12570        format: ObjectFormat,
12571        tree_oid: &ObjectId,
12572    ) -> Result<Self> {
12573        let attr_root = attr_root.as_ref();
12574        let git_dir = git_dir.as_ref();
12575        let mut matcher = AttributeMatcher::default();
12576        matcher.configure_case_sensitivity(git_dir);
12577        if !matcher.read_configured_attributes(attr_root, git_dir) {
12578            matcher.read_default_global_attributes();
12579        }
12580        collect_attribute_patterns_from_tree(db, format, tree_oid, Vec::new(), &mut matcher)?;
12581        read_attribute_patterns(
12582            git_dir.join("info").join("attributes"),
12583            &mut matcher,
12584            &[],
12585            b"info/attributes",
12586            false,
12587        );
12588        Ok(Self { matcher })
12589    }
12590
12591    /// Apply the smudge conversion (blob -> worktree: EOL `LF`->`CRLF` plus any
12592    /// configured `filter.<name>.smudge` driver) to `content` for `path`,
12593    /// reusing the cached attribute chain. Behaviourally identical to
12594    /// [`apply_smudge_filter`] except attributes come from the tree this handle
12595    /// was built from.
12596    pub fn apply_smudge_filter(
12597        &self,
12598        config: &GitConfig,
12599        path: &[u8],
12600        content: &[u8],
12601    ) -> Result<Vec<u8>> {
12602        let checks = self
12603            .matcher
12604            .attributes_for_path(path, &filter_attribute_names(), false);
12605        apply_smudge_filter_with_attributes(config, &checks, path, content)
12606    }
12607
12608    pub fn attributes_for_path(&self, path: &[u8], requested: &[Vec<u8>]) -> Vec<AttributeCheck> {
12609        self.matcher.attributes_for_path(path, requested, false)
12610    }
12611
12612    /// True when `path` has the `export-subst` attribute set (git's
12613    /// `check_attr_export_subst`), meaning `git archive` should run
12614    /// `$Format:…$` keyword substitution on its content.
12615    pub fn export_subst_for_path(&self, path: &[u8]) -> bool {
12616        self.attribute_is_set(path, b"export-subst")
12617    }
12618
12619    /// True when `path` has the `export-ignore` attribute set (git's
12620    /// `check_attr_export_ignore`), meaning `git archive` should omit the path
12621    /// (and, for a directory, its whole subtree) from the archive.
12622    pub fn export_ignore_for_path(&self, path: &[u8]) -> bool {
12623        self.attribute_is_set(path, b"export-ignore")
12624    }
12625
12626    fn attribute_is_set(&self, path: &[u8], attribute: &[u8]) -> bool {
12627        let requested = [attribute.to_vec()];
12628        let checks = self.matcher.attributes_for_path(path, &requested, false);
12629        matches!(
12630            checks.first().and_then(|check| check.state.as_ref()),
12631            Some(AttributeState::Set)
12632        )
12633    }
12634
12635    /// The `diff` attribute state for `path` (`Set` for `diff`, `Unset` for
12636    /// `-diff`, `Value(name)` for `diff=<name>`, `None` when unspecified). Used
12637    /// by `git archive`'s zip backend to classify text vs. binary via the
12638    /// path's userdiff driver.
12639    pub fn diff_attribute_for_path(&self, path: &[u8]) -> Option<AttributeState> {
12640        let requested = [b"diff".to_vec()];
12641        let checks = self.matcher.attributes_for_path(path, &requested, false);
12642        checks.into_iter().next().and_then(|check| check.state)
12643    }
12644}
12645
12646/// Like [`apply_clean_filter`] but takes already-resolved attribute checks,
12647/// letting callers that have computed attributes once reuse them.
12648pub fn apply_clean_filter_with_attributes(
12649    config: &GitConfig,
12650    attributes: &[AttributeCheck],
12651    path: &[u8],
12652    content: &[u8],
12653) -> Result<Vec<u8>> {
12654    Ok(apply_clean_filter_with_attributes_cow(config, attributes, path, content)?.into_owned())
12655}
12656
12657/// Borrow-first variant of [`apply_clean_filter_with_attributes`].
12658///
12659/// When no filter or EOL conversion changes the content, the returned value
12660/// borrows `content`; callers that can consume a [`Cow`] avoid allocating for
12661/// the common pass-through case.
12662pub fn apply_clean_filter_with_attributes_cow<'a>(
12663    config: &GitConfig,
12664    attributes: &[AttributeCheck],
12665    path: &[u8],
12666    content: &'a [u8],
12667) -> Result<Cow<'a, [u8]>> {
12668    apply_clean_filter_with_attributes_cow_safecrlf(
12669        config,
12670        attributes,
12671        path,
12672        content,
12673        ConvFlags::Off,
12674        SafeCrlfIndexBlob::None,
12675    )
12676}
12677
12678/// How the safecrlf check should learn whether this path's *current index blob*
12679/// already contains a CRLF (git's `has_crlf_in_index`). Only consulted on the
12680/// `text=auto` / `core.autocrlf` path.
12681pub enum SafeCrlfIndexBlob<'a> {
12682    /// No index blob is available (the staging caller has none, or safecrlf is
12683    /// off) — treated as "no CRLF in index".
12684    None,
12685    /// The path's current index blob, read on demand from this object database
12686    /// only when the auto-crlf decision actually needs it.
12687    Lookup {
12688        odb: &'a FileObjectDatabase,
12689        oid: ObjectId,
12690    },
12691}
12692
12693impl SafeCrlfIndexBlob<'_> {
12694    fn has_crlf(&self) -> bool {
12695        match self {
12696            SafeCrlfIndexBlob::None => false,
12697            SafeCrlfIndexBlob::Lookup { odb, oid } => has_crlf_in_index(odb, oid),
12698        }
12699    }
12700}
12701
12702/// [`apply_clean_filter_with_attributes_cow`] plus git's additive `core.safecrlf`
12703/// round-trip warning (convert.c `crlf_to_git`).
12704///
12705/// The conversion result is byte-for-byte identical to the plain variant;
12706/// `flags`/`index_blob` only drive the stderr warning git prints when a
12707/// CRLF<->LF round-trip would not be reversible. The warning is computed on the
12708/// *post-driver, pre-EOL-conversion* content, matching git's ordering in
12709/// `convert_to_git` (apply_filter -> crlf_to_git).
12710pub fn apply_clean_filter_with_attributes_cow_safecrlf<'a>(
12711    config: &GitConfig,
12712    attributes: &[AttributeCheck],
12713    path: &[u8],
12714    content: &'a [u8],
12715    flags: ConvFlags,
12716    index_blob: SafeCrlfIndexBlob<'_>,
12717) -> Result<Cow<'a, [u8]>> {
12718    let plan = ContentFilterPlan::resolve(config, attributes);
12719    let mut data = Cow::Borrowed(content);
12720    if let Some(driver) = &plan.driver {
12721        data = run_driver(driver, driver.clean.as_deref(), "clean", None, path, data)?;
12722    }
12723    // The safecrlf check scans the (post-driver) buffer once for line-ending
12724    // stats. Gate it tightly so the extra scan never runs on the dominant
12725    // pass-through paths: only when safecrlf is enabled, the path is a real
12726    // conversion candidate (not `CRLF_BINARY`), and the buffer is non-empty.
12727    if flags != ConvFlags::Off && !data.is_empty() && plan.safecrlf_applies() {
12728        let old_stats = gather_convert_stats(&data);
12729        plan.check_safe_crlf_stats(&old_stats, index_blob.has_crlf(), flags, path)?;
12730    }
12731    if plan.convert_eol(&data) {
12732        data = convert_crlf_to_lf_cow(data);
12733    }
12734    if plan.ident {
12735        data = ident_to_git_cow(data);
12736    }
12737    Ok(data)
12738}
12739
12740/// Apply the *smudge* conversion to `content` for `path` (blob -> worktree):
12741/// first LF->CRLF when EOL conversion applies, then the configured
12742/// `filter.<name>.smudge` driver (if any).
12743///
12744/// Semantics mirror [`apply_clean_filter`]: a required driver with a missing or
12745/// failing `smudge` command errors, while a non-required one passes the content
12746/// through.
12747pub fn apply_smudge_filter(
12748    worktree_root: impl AsRef<Path>,
12749    git_dir: impl AsRef<Path>,
12750    format: ObjectFormat,
12751    config: &GitConfig,
12752    path: &[u8],
12753    content: &[u8],
12754) -> Result<Vec<u8>> {
12755    // On smudge (checkout) the worktree file may not exist yet, so resolve the
12756    // attributes from the `.gitattributes` recorded in the index.
12757    let checks =
12758        smudge_attribute_checks_from_index(worktree_root.as_ref(), git_dir.as_ref(), format, path)?;
12759    Ok(
12760        apply_smudge_filter_with_attributes_cow_format(config, &checks, path, content, format)?
12761            .into_owned(),
12762    )
12763}
12764
12765/// Like [`apply_smudge_filter`] but takes already-resolved attribute checks.
12766pub fn apply_smudge_filter_with_attributes(
12767    config: &GitConfig,
12768    attributes: &[AttributeCheck],
12769    path: &[u8],
12770    content: &[u8],
12771) -> Result<Vec<u8>> {
12772    Ok(apply_smudge_filter_with_attributes_cow(config, attributes, path, content)?.into_owned())
12773}
12774
12775/// Borrow-first variant of [`apply_smudge_filter_with_attributes`].
12776///
12777/// When no filter or EOL conversion changes the content, the returned value
12778/// borrows `content`; callers that can consume a [`Cow`] avoid allocating for
12779/// the common pass-through case.
12780pub fn apply_smudge_filter_with_attributes_cow<'a>(
12781    config: &GitConfig,
12782    attributes: &[AttributeCheck],
12783    path: &[u8],
12784    content: &'a [u8],
12785) -> Result<Cow<'a, [u8]>> {
12786    apply_smudge_filter_with_attributes_cow_format(
12787        config,
12788        attributes,
12789        path,
12790        content,
12791        ObjectFormat::Sha1,
12792    )
12793}
12794
12795fn apply_smudge_filter_with_attributes_cow_format<'a>(
12796    config: &GitConfig,
12797    attributes: &[AttributeCheck],
12798    path: &[u8],
12799    content: &'a [u8],
12800    format: ObjectFormat,
12801) -> Result<Cow<'a, [u8]>> {
12802    let plan = ContentFilterPlan::resolve(config, attributes);
12803    let mut data = Cow::Borrowed(content);
12804    if plan.ident {
12805        data = ident_to_worktree_cow(format, data)?;
12806    }
12807    if plan.eol == EolConversion::Crlf
12808        && plan.convert_eol(&data)
12809        && plan.will_convert_lf_to_crlf(&data)
12810    {
12811        data = Cow::Owned(convert_lf_to_crlf(&data));
12812    }
12813    if let Some(driver) = &plan.driver {
12814        data = run_driver(
12815            driver,
12816            driver.smudge.as_deref(),
12817            "smudge",
12818            Some(format),
12819            path,
12820            data,
12821        )?;
12822    }
12823    Ok(data)
12824}
12825
12826/// Execute one direction of a driver filter, honouring the `required` flag.
12827fn run_driver<'a>(
12828    driver: &FilterDriver,
12829    command: Option<&str>,
12830    direction: &str,
12831    format: Option<ObjectFormat>,
12832    path: &[u8],
12833    content: Cow<'a, [u8]>,
12834) -> Result<Cow<'a, [u8]>> {
12835    if let Some(process) = &driver.process {
12836        let blob = if direction == "smudge" {
12837            match format {
12838                Some(format) => {
12839                    Some(EncodedObject::new(ObjectType::Blob, content.to_vec()).object_id(format)?)
12840                }
12841                None => None,
12842            }
12843        } else {
12844            None
12845        };
12846        match run_process_filter(process, direction, path, &content, blob) {
12847            Ok(ProcessFilterOutcome::Filtered(output)) => return Ok(Cow::Owned(output)),
12848            Ok(ProcessFilterOutcome::Unsupported) => {}
12849            Ok(ProcessFilterOutcome::Status(status)) => {
12850                if driver.required {
12851                    return Err(GitError::Command(format!(
12852                        "external filter '{}' returned status {status}",
12853                        process
12854                    )));
12855                }
12856                return Ok(content);
12857            }
12858            Err(err) => {
12859                if err.protocol {
12860                    eprintln!("error: external filter '{}' failed", process);
12861                }
12862                if driver.required {
12863                    return Err(GitError::Command(err.message));
12864                }
12865                return Ok(content);
12866            }
12867        }
12868    }
12869    let Some(command) = command else {
12870        // No command in this direction. Required filters must error; optional
12871        // ones pass content through unchanged.
12872        if driver.required {
12873            let path = String::from_utf8_lossy(path);
12874            let name = String::from_utf8_lossy(&driver.name);
12875            if direction == "clean" {
12876                eprintln!("fatal: {path}: clean filter '{name}' failed");
12877            } else {
12878                eprintln!("fatal: {path}: smudge filter {name} failed");
12879            }
12880            return Err(GitError::Exit(128));
12881        }
12882        return Ok(content);
12883    };
12884    match run_filter_command(command, path, &content) {
12885        Ok(output) => Ok(Cow::Owned(output)),
12886        Err(err) => {
12887            if driver.required {
12888                Err(err)
12889            } else {
12890                // Non-required filter failure: fall back to the unfiltered
12891                // content, matching git's behaviour.
12892                Ok(content)
12893            }
12894        }
12895    }
12896}
12897
12898/// Compute the attributes relevant to content filtering (`text`, `eol`,
12899/// `filter`) for `path` from the worktree `.gitattributes` chain.
12900fn filter_attribute_checks(worktree_root: &Path, path: &[u8]) -> Result<Vec<AttributeCheck>> {
12901    let requested = filter_attribute_names();
12902    let mut matcher = AttributeMatcher::default();
12903    let git_dir = worktree_root.join(".git");
12904    matcher.configure_case_sensitivity(&git_dir);
12905    if !matcher.read_configured_attributes(worktree_root, &git_dir) {
12906        matcher.read_default_global_attributes();
12907    }
12908    read_dir_attribute_patterns_for_base(worktree_root, &[], &mut matcher)?;
12909    let mut prefix = Vec::new();
12910    let mut parts = path.split(|byte| *byte == b'/').peekable();
12911    while let Some(part) = parts.next() {
12912        if parts.peek().is_none() {
12913            break;
12914        }
12915        if !prefix.is_empty() {
12916            prefix.push(b'/');
12917        }
12918        prefix.extend_from_slice(part);
12919        let dir = worktree_root.join(repo_path_to_os_path(&prefix)?);
12920        read_dir_attribute_patterns_for_base(&dir, &prefix, &mut matcher)?;
12921    }
12922    read_attribute_patterns(
12923        worktree_root.join(".git").join("info").join("attributes"),
12924        &mut matcher,
12925        &[],
12926        b".git/info/attributes",
12927        false,
12928    );
12929    Ok(matcher.attributes_for_path(path, &requested, false))
12930}
12931
12932/// Compute filtering attributes for a checkout (blob -> worktree).
12933///
12934/// `git checkout -- <pathspec>` / `git restore` materialize through git's
12935/// **default** attr direction, which is `GIT_ATTR_CHECKIN` (attr.c: the static
12936/// `direction` is zero-initialized and `builtin/checkout.c` never overrides it
12937/// for the pathspec path). Under that direction `read_attr` reads each
12938/// `.gitattributes` frame from the **worktree file first**, falling back to the
12939/// staged blob only when no worktree file exists at that directory level
12940/// (sparse-checkout). This is the precedence the smudge filter must use:
12941/// t0027 commits an *empty* root `.gitattributes`, then overwrites the worktree
12942/// copy with `*.txt text eol=crlf` *without re-staging* — and git's checkout
12943/// still honours the worktree copy. Reading the index alone (or index-first)
12944/// made checkout under-convert line endings, because the staged blob was empty.
12945fn smudge_attribute_checks_from_index(
12946    worktree_root: &Path,
12947    git_dir: &Path,
12948    format: ObjectFormat,
12949    path: &[u8],
12950) -> Result<Vec<AttributeCheck>> {
12951    let requested = filter_attribute_names();
12952    let mut matcher = AttributeMatcher::default();
12953    matcher.configure_case_sensitivity(git_dir);
12954    if !matcher.read_configured_attributes(worktree_root, git_dir) {
12955        matcher.read_default_global_attributes();
12956    }
12957
12958    // Build the set of `.gitattributes` blobs the index carries, keyed by the
12959    // directory they govern, so each ancestry frame can prefer the staged copy.
12960    let index_attributes = index_gitattributes_by_base(git_dir, format)?;
12961
12962    // Walk root -> ... -> the file's parent directory, folding each frame's
12963    // `.gitattributes` in shallow-to-deep order so deeper directories win.
12964    fold_checkout_attribute_frame(worktree_root, &[], &index_attributes, &mut matcher)?;
12965    let mut prefix = Vec::new();
12966    let mut parts = path.split(|byte| *byte == b'/').peekable();
12967    while let Some(part) = parts.next() {
12968        if parts.peek().is_none() {
12969            break;
12970        }
12971        if !prefix.is_empty() {
12972            prefix.push(b'/');
12973        }
12974        prefix.extend_from_slice(part);
12975        let dir = worktree_root.join(repo_path_to_os_path(&prefix)?);
12976        fold_checkout_attribute_frame(&dir, &prefix, &index_attributes, &mut matcher)?;
12977    }
12978
12979    read_attribute_patterns(
12980        worktree_root.join(".git").join("info").join("attributes"),
12981        &mut matcher,
12982        &[],
12983        b".git/info/attributes",
12984        false,
12985    );
12986    Ok(matcher.attributes_for_path(path, &requested, false))
12987}
12988
12989/// Fold the `.gitattributes` governing directory `base` (whose on-disk location
12990/// is `dir`) into `matcher`, preferring the worktree file and falling back to
12991/// the staged blob. Mirrors one attr-stack frame under `GIT_ATTR_CHECKIN`
12992/// (git's default direction, used by `checkout -- <pathspec>` / `restore`).
12993fn fold_checkout_attribute_frame(
12994    dir: &Path,
12995    base: &[u8],
12996    index_attributes: &BTreeMap<Vec<u8>, Vec<u8>>,
12997    matcher: &mut AttributeMatcher,
12998) -> Result<()> {
12999    let worktree_file = dir.join(".gitattributes");
13000    let source = attribute_source_for_base(base);
13001    if let Ok(contents) = fs::read(&worktree_file) {
13002        // A worktree `.gitattributes` exists at this level: it wins outright
13003        // (git only consults the index when the worktree file is absent).
13004        read_attribute_patterns_from_bytes(&contents, matcher, base, &source);
13005    } else if let Some(contents) = index_attributes.get(base) {
13006        read_attribute_patterns_from_bytes(contents, matcher, base, &source);
13007    }
13008    Ok(())
13009}
13010
13011/// Read every staged `.gitattributes` blob, keyed by the repo-relative directory
13012/// it governs (`""` for the worktree root). Stage-0 blob entries only.
13013fn index_gitattributes_by_base(
13014    git_dir: &Path,
13015    format: ObjectFormat,
13016) -> Result<BTreeMap<Vec<u8>, Vec<u8>>> {
13017    let mut map = BTreeMap::new();
13018    let index_path = repository_index_path(git_dir);
13019    if !index_path.exists() {
13020        return Ok(map);
13021    }
13022    let db = FileObjectDatabase::from_git_dir(git_dir, format);
13023    let entries = Index::parse(&fs::read(index_path)?, format)?.entries;
13024    for entry in entries {
13025        let is_attributes_file =
13026            entry.path == b".gitattributes" || entry.path.as_bytes().ends_with(b"/.gitattributes");
13027        if index_entry_stage(&entry) != 0
13028            || tree_entry_object_type(entry.mode) != ObjectType::Blob
13029            || !is_attributes_file
13030        {
13031            continue;
13032        }
13033        let base = match entry.path.as_bytes().strip_suffix(b".gitattributes") {
13034            Some(b"") => Vec::new(),
13035            Some(parent) => parent.strip_suffix(b"/").unwrap_or(parent).to_vec(),
13036            None => continue,
13037        };
13038        let object = db
13039            .read_object(&entry.oid)
13040            .map_err(|err| expect_missing_object_kind(err, entry.oid, MissingObjectKind::Blob))?;
13041        if object.object_type == ObjectType::Blob {
13042            map.insert(base, object.body.clone());
13043        }
13044    }
13045    Ok(map)
13046}
13047
13048fn filter_attribute_names() -> Vec<Vec<u8>> {
13049    // `crlf` is git's legacy alias for `text` (convert.c registers both); it is
13050    // consulted as a fallback when `text` is unspecified, so we must resolve it.
13051    vec![
13052        b"text".to_vec(),
13053        b"crlf".to_vec(),
13054        b"ident".to_vec(),
13055        b"eol".to_vec(),
13056        b"filter".to_vec(),
13057    ]
13058}
13059
13060// ---------------------------------------------------------------------------
13061// `ls-files --eol` line-ending information
13062//
13063// Git's `git ls-files --eol` prints, for each path, three fields:
13064//   i/<stat>  — line-ending statistics of the *index* blob content
13065//   w/<stat>  — line-ending statistics of the *worktree* file content
13066//   attr/<a>  — the resolved crlf/eol attribute action (attributes only, no
13067//               config) — `get_convert_attr_ascii` in convert.c
13068// The two stat fields mirror `gather_convert_stats_ascii`; the attr field
13069// mirrors `convert_attrs` up to `ca->attr_action` (i.e. *before* the config
13070// derived `text` -> input/crlf substitution and the `core.autocrlf` fallback).
13071// ---------------------------------------------------------------------------
13072
13073/// Line-ending statistics of a byte buffer, mirroring convert.c `gather_stats`.
13074#[derive(Clone)]
13075struct ConvertStats {
13076    nul: u32,
13077    lonecr: u32,
13078    lonelf: u32,
13079    crlf: u32,
13080    printable: u32,
13081    nonprintable: u32,
13082}
13083
13084fn gather_convert_stats(buf: &[u8]) -> ConvertStats {
13085    let mut stats = ConvertStats {
13086        nul: 0,
13087        lonecr: 0,
13088        lonelf: 0,
13089        crlf: 0,
13090        printable: 0,
13091        nonprintable: 0,
13092    };
13093    let mut i = 0;
13094    while i < buf.len() {
13095        let c = buf[i];
13096        if c == b'\r' {
13097            if buf.get(i + 1) == Some(&b'\n') {
13098                stats.crlf += 1;
13099                i += 1;
13100            } else {
13101                stats.lonecr += 1;
13102            }
13103            i += 1;
13104            continue;
13105        }
13106        if c == b'\n' {
13107            stats.lonelf += 1;
13108            i += 1;
13109            continue;
13110        }
13111        if c == 127 {
13112            // DEL
13113            stats.nonprintable += 1;
13114        } else if c < 32 {
13115            match c {
13116                // BS, HT, ESC and FF are printable.
13117                0x08 | 0x09 | 0x1b | 0x0c => stats.printable += 1,
13118                0 => {
13119                    stats.nul += 1;
13120                    stats.nonprintable += 1;
13121                }
13122                _ => stats.nonprintable += 1,
13123            }
13124        } else {
13125            stats.printable += 1;
13126        }
13127        i += 1;
13128    }
13129    // A trailing EOF (^Z, 0x1a) is not counted as non-printable.
13130    if buf.last() == Some(&0x1a) {
13131        stats.nonprintable = stats.nonprintable.saturating_sub(1);
13132    }
13133    stats
13134}
13135
13136/// Mirror of convert.c `has_crlf_in_index`: whether the blob currently recorded
13137/// in the index for this path is non-binary text containing a CRLF. Used only by
13138/// the auto-crlf safecrlf decision to keep an already-CRLF index blob from being
13139/// silently collapsed. A missing/unreadable blob (or a non-blob entry) counts as
13140/// "no CRLF", matching git's `read_blob_data_from_index` returning NULL.
13141fn has_crlf_in_index(odb: &FileObjectDatabase, oid: &ObjectId) -> bool {
13142    let Ok(object) = odb.read_object(oid) else {
13143        return false;
13144    };
13145    if object.object_type != ObjectType::Blob {
13146        return false;
13147    }
13148    let data = &object.body;
13149    // git short-circuits on the first '\r' via memchr before gathering stats.
13150    if !data.contains(&b'\r') {
13151        return false;
13152    }
13153    let stats = gather_convert_stats(data);
13154    !convert_is_binary(&stats) && stats.crlf > 0
13155}
13156
13157/// Mirror of convert.c `convert_is_binary`: a lone CR or NUL, or a high
13158/// non-printable ratio, marks the content as binary.
13159fn convert_is_binary(stats: &ConvertStats) -> bool {
13160    if stats.lonecr > 0 {
13161        return true;
13162    }
13163    if stats.nul > 0 {
13164        return true;
13165    }
13166    (stats.printable >> 7) < stats.nonprintable
13167}
13168
13169/// The `core.safecrlf` round-trip-warning mode, mirroring git's
13170/// `global_conv_flags_eol` (environment.c). git's *default* — when
13171/// `core.safecrlf` is unset — is [`ConvFlags::Warn`], so the warning fires even
13172/// without any explicit config.
13173#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13174pub enum ConvFlags {
13175    /// `core.safecrlf=false`: never warn.
13176    Off,
13177    /// `core.safecrlf=warn` (and the unset default): emit a warning when a
13178    /// CRLF<->LF round-trip would not be reversible.
13179    Warn,
13180    /// `core.safecrlf=true`: die instead of warn.
13181    Die,
13182}
13183
13184impl ConvFlags {
13185    /// Resolve `core.safecrlf` from config, mirroring environment.c
13186    /// `git_default_core_config`: `warn` -> [`ConvFlags::Warn`], a boolean-true
13187    /// value -> [`ConvFlags::Die`], a boolean-false value -> [`ConvFlags::Off`].
13188    /// When the key is absent git leaves `global_conv_flags_eol` at its initial
13189    /// [`ConvFlags::Warn`], so unset also resolves to [`ConvFlags::Warn`].
13190    pub fn from_config(config: &GitConfig) -> Self {
13191        match config.get("core", None, "safecrlf") {
13192            Some(value) if value.eq_ignore_ascii_case("warn") => ConvFlags::Warn,
13193            Some(_) => {
13194                if config.get_bool("core", None, "safecrlf") == Some(true) {
13195                    ConvFlags::Die
13196                } else {
13197                    ConvFlags::Off
13198                }
13199            }
13200            None => ConvFlags::Warn,
13201        }
13202    }
13203}
13204
13205/// Mirror of convert.c `check_global_conv_flags_eol`: compare the pre-conversion
13206/// `old_stats` against the simulated round-trip `new_stats` and, when the
13207/// CRLF/LF content would not survive a clean+smudge cycle, warn (or die under
13208/// `core.safecrlf=true`).
13209///
13210/// Returns `Err(GitError::Exit(128))` when `flags` is [`ConvFlags::Die`] and the
13211/// round-trip is irreversible (git `die`s with exit 128 here); otherwise prints
13212/// the warning to stderr and returns `Ok(())`. This is a pure stderr-side
13213/// effect: it never changes the bytes written to the object store.
13214fn check_safe_crlf(
13215    old_stats: &ConvertStats,
13216    new_stats: &ConvertStats,
13217    flags: ConvFlags,
13218    path: &[u8],
13219) -> Result<()> {
13220    if flags == ConvFlags::Off {
13221        return Ok(());
13222    }
13223    let display = String::from_utf8_lossy(path);
13224    if old_stats.crlf > 0 && new_stats.crlf == 0 {
13225        // CRLFs would not be restored by checkout.
13226        match flags {
13227            ConvFlags::Die => {
13228                eprintln!("fatal: CRLF would be replaced by LF in {display}");
13229                return Err(GitError::Exit(128));
13230            }
13231            ConvFlags::Warn => {
13232                eprintln!(
13233                    "warning: in the working copy of '{display}', CRLF will be replaced by LF the next time Git touches it"
13234                );
13235            }
13236            ConvFlags::Off => unreachable!("handled above"),
13237        }
13238    } else if old_stats.lonelf > 0 && new_stats.lonelf == 0 {
13239        // CRLFs would be added by checkout.
13240        match flags {
13241            ConvFlags::Die => {
13242                eprintln!("fatal: LF would be replaced by CRLF in {display}");
13243                return Err(GitError::Exit(128));
13244            }
13245            ConvFlags::Warn => {
13246                eprintln!(
13247                    "warning: in the working copy of '{display}', LF will be replaced by CRLF the next time Git touches it"
13248                );
13249            }
13250            ConvFlags::Off => unreachable!("handled above"),
13251        }
13252    }
13253    Ok(())
13254}
13255
13256/// Compute the `i/` or `w/` stat string for `content`, mirroring
13257/// convert.c `gather_convert_stats_ascii`.
13258fn convert_stats_ascii(content: &[u8]) -> &'static str {
13259    if content.is_empty() {
13260        return "none";
13261    }
13262    let stats = gather_convert_stats(content);
13263    if convert_is_binary(&stats) {
13264        return "-text";
13265    }
13266    match (stats.lonelf > 0, stats.crlf > 0) {
13267        (true, false) => "lf",
13268        (false, true) => "crlf",
13269        (true, true) => "mixed",
13270        (false, false) => "none",
13271    }
13272}
13273
13274/// The resolved crlf/eol attribute action for a path, mirroring convert.c
13275/// `convert_attrs` up to `ca->attr_action` (attributes only, no config), and
13276/// `get_convert_attr_ascii` for the ascii spelling.
13277fn convert_attr_ascii(checks: &[AttributeCheck]) -> &'static str {
13278    fn state_of<'a>(checks: &'a [AttributeCheck], name: &[u8]) -> Option<&'a AttributeState> {
13279        checks
13280            .iter()
13281            .find(|check| check.attribute == name)
13282            .and_then(|check| check.state.as_ref())
13283    }
13284
13285    // git_path_check_crlf: ATTR_TRUE -> TEXT, ATTR_FALSE -> BINARY,
13286    // ATTR_UNSET -> (fall through), "input" -> TEXT_INPUT, "auto" -> AUTO,
13287    // anything else -> UNDEFINED.
13288    #[derive(Clone, Copy, PartialEq)]
13289    enum Action {
13290        Undefined,
13291        Binary,
13292        Text,
13293        TextInput,
13294        TextCrlf,
13295        Auto,
13296        AutoCrlf,
13297        AutoInput,
13298    }
13299    fn check_crlf(state: Option<&AttributeState>) -> Action {
13300        match state {
13301            Some(AttributeState::Set) => Action::Text,
13302            Some(AttributeState::Unset) => Action::Binary,
13303            Some(AttributeState::Value(value)) if value == b"input" => Action::TextInput,
13304            Some(AttributeState::Value(value)) if value == b"auto" => Action::Auto,
13305            // ATTR_UNSET / any other value -> CRLF_UNDEFINED.
13306            _ => Action::Undefined,
13307        }
13308    }
13309
13310    // Resolve from the `text` attribute, then fall back to the legacy `crlf`
13311    // alias only when `text` left the action undefined.
13312    let mut action = check_crlf(state_of(checks, b"text"));
13313    if action == Action::Undefined {
13314        action = check_crlf(state_of(checks, b"crlf"));
13315    }
13316
13317    if action != Action::Binary {
13318        // git_path_check_eol: only "lf"/"crlf" values matter.
13319        let eol = match state_of(checks, b"eol") {
13320            Some(AttributeState::Value(value)) if value == b"lf" => Some(false),
13321            Some(AttributeState::Value(value)) if value == b"crlf" => Some(true),
13322            _ => None,
13323        };
13324        action = match (action, eol) {
13325            (Action::Auto, Some(false)) => Action::AutoInput,
13326            (Action::Auto, Some(true)) => Action::AutoCrlf,
13327            (_, Some(false)) if action != Action::Auto => Action::TextInput,
13328            (_, Some(true)) if action != Action::Auto => Action::TextCrlf,
13329            _ => action,
13330        };
13331    }
13332
13333    match action {
13334        Action::Undefined => "",
13335        Action::Binary => "-text",
13336        Action::Text => "text",
13337        Action::TextInput => "text eol=lf",
13338        Action::TextCrlf => "text eol=crlf",
13339        Action::Auto => "text=auto",
13340        Action::AutoCrlf => "text=auto eol=crlf",
13341        Action::AutoInput => "text=auto eol=lf",
13342    }
13343}
13344
13345/// The three `ls-files --eol` fields for a single path.
13346pub struct EolInfo {
13347    /// Stat of the index blob (`i/...`); empty when there is no index blob.
13348    pub index: &'static str,
13349    /// Stat of the worktree file (`w/...`); empty when the file is absent.
13350    pub worktree: &'static str,
13351    /// Resolved crlf/eol attribute action (`attr/...`).
13352    pub attr: &'static str,
13353}
13354
13355impl EolInfo {
13356    /// Format as git's `ls-files --eol` prefix: `i/%-5s w/%-5s attr/%-17s\t`.
13357    pub fn format_prefix(&self) -> String {
13358        format!(
13359            "i/{:<5} w/{:<5} attr/{:<17}\t",
13360            self.index, self.worktree, self.attr
13361        )
13362    }
13363}
13364
13365/// Compute the `ls-files --eol` info for `path`.
13366///
13367/// `index_content` is the raw index blob bytes (None when the path has no
13368/// index entry or is not a regular file). The worktree file is read from
13369/// `worktree_root/path`; if it is absent or not a regular file the `w/` field
13370/// is empty. Attributes are resolved from the worktree `.gitattributes` chain
13371/// via `attr_checks`.
13372pub fn eol_info_for_path(
13373    worktree_root: impl AsRef<Path>,
13374    path: &[u8],
13375    index_content: Option<&[u8]>,
13376    attr_checks: &[AttributeCheck],
13377) -> EolInfo {
13378    let index = index_content.map(convert_stats_ascii).unwrap_or("");
13379
13380    let worktree_root = worktree_root.as_ref();
13381    let worktree = match repo_path_to_os_path(path) {
13382        Ok(rel) => {
13383            let absolute = worktree_root.join(rel);
13384            match fs::symlink_metadata(&absolute) {
13385                // git: only regular files get a `w/` stat (lstat + S_ISREG).
13386                Ok(meta) if meta.file_type().is_file() => match fs::read(&absolute) {
13387                    Ok(content) => convert_stats_ascii_owned(&content),
13388                    Err(_) => "",
13389                },
13390                _ => "",
13391            }
13392        }
13393        Err(_) => "",
13394    };
13395
13396    let attr = convert_attr_ascii(attr_checks);
13397
13398    EolInfo {
13399        index,
13400        worktree,
13401        attr,
13402    }
13403}
13404
13405/// `convert_stats_ascii` over an owned buffer; the result is a `'static` str so
13406/// the buffer can be dropped.
13407fn convert_stats_ascii_owned(content: &[u8]) -> &'static str {
13408    convert_stats_ascii(content)
13409}
13410
13411/// Resolve the crlf/eol/text/filter attributes for `path` from the worktree
13412/// `.gitattributes` chain (the set `ls-files --eol` needs for its `attr/`
13413/// field).
13414pub fn eol_attribute_checks(
13415    worktree_root: impl AsRef<Path>,
13416    path: &[u8],
13417) -> Result<Vec<AttributeCheck>> {
13418    filter_attribute_checks(worktree_root.as_ref(), path)
13419}
13420
13421pub fn deleted_index_entries(
13422    worktree_root: impl AsRef<Path>,
13423    git_dir: impl AsRef<Path>,
13424    format: ObjectFormat,
13425) -> Result<Vec<IndexEntry>> {
13426    let worktree_root = worktree_root.as_ref();
13427    let git_dir = git_dir.as_ref();
13428    let index_path = repository_index_path(git_dir);
13429    if !index_path.exists() {
13430        return Ok(Vec::new());
13431    }
13432    let index = Index::parse(&fs::read(index_path)?, format)?;
13433    let mut deleted = Vec::new();
13434    for entry in index.entries {
13435        if !worktree_path(worktree_root, entry.path.as_bytes())?.exists()
13436            && !index_entry_skip_worktree(&entry)
13437        {
13438            deleted.push(entry);
13439        }
13440    }
13441    Ok(deleted)
13442}
13443
13444pub fn modified_index_entries(
13445    worktree_root: impl AsRef<Path>,
13446    git_dir: impl AsRef<Path>,
13447    format: ObjectFormat,
13448) -> Result<Vec<IndexEntry>> {
13449    let worktree_root = worktree_root.as_ref();
13450    let git_dir = git_dir.as_ref();
13451    let index_path = repository_index_path(git_dir);
13452    if !index_path.exists() {
13453        return Ok(Vec::new());
13454    }
13455    let mut index = Index::parse(&fs::read(&index_path)?, format)?;
13456    if index.entries.iter().any(IndexEntry::is_sparse_dir) {
13457        let db = FileObjectDatabase::from_git_dir(git_dir, format);
13458        expand_sparse_index(&mut index, &db, format)?;
13459    }
13460    // Reuse the same racy-git stat shortcut here: build the cache from the index
13461    // we just parsed (no second parse) so the worktree walk can skip re-hashing
13462    // unchanged files. A cached oid is only trusted on a non-racy stat match, so
13463    // genuinely modified files still fall through to a hash and are reported.
13464    let stat_cache = IndexStatCache::from_index(&index, &index_path);
13465    let mut modified = Vec::new();
13466    for entry in index.entries {
13467        let worktree_entry = worktree_entry_for_git_path(
13468            worktree_root,
13469            git_dir,
13470            format,
13471            entry.path.as_bytes(),
13472            &entry.oid,
13473            entry.mode,
13474            Some(&stat_cache),
13475        )?;
13476        let Some(worktree_entry) = worktree_entry else {
13477            if !index_entry_skip_worktree(&entry) {
13478                modified.push(entry);
13479            }
13480            continue;
13481        };
13482        if worktree_entry.mode != entry.mode || worktree_entry.oid != entry.oid {
13483            modified.push(entry);
13484        }
13485    }
13486    Ok(modified)
13487}
13488
13489pub fn checkout_branch(
13490    worktree_root: impl AsRef<Path>,
13491    git_dir: impl AsRef<Path>,
13492    format: ObjectFormat,
13493    branch: &str,
13494    committer: Vec<u8>,
13495) -> Result<CheckoutResult> {
13496    let worktree_root = worktree_root.as_ref();
13497    let git_dir = git_dir.as_ref();
13498    let branch_ref = branch_ref_name(branch)?;
13499    let refs = FileRefStore::new(git_dir, format);
13500    let target = match sley_refs::resolve_ref_peeled(&refs, &branch_ref)? {
13501        Some(oid) => oid,
13502        None => {
13503            checkout_switch_head_symbolic(&refs, branch_ref, committer, branch, None, None)?;
13504            return Ok(CheckoutResult {
13505                branch: branch.into(),
13506                oid: ObjectId::null(format),
13507                files: 0,
13508            });
13509        }
13510    };
13511    let current_head = resolve_head_commit_oid(git_dir, format)?;
13512    let files = if current_head == Some(target) {
13513        0
13514    } else {
13515        checkout_commit_to_index_and_worktree(worktree_root, git_dir, format, &target)?
13516    };
13517    checkout_switch_head_symbolic(
13518        &refs,
13519        branch_ref,
13520        committer,
13521        branch,
13522        Some(target),
13523        Some(target),
13524    )?;
13525    Ok(CheckoutResult {
13526        branch: branch.into(),
13527        oid: target,
13528        files,
13529    })
13530}
13531
13532pub fn checkout_detached(
13533    worktree_root: impl AsRef<Path>,
13534    git_dir: impl AsRef<Path>,
13535    format: ObjectFormat,
13536    target: &ObjectId,
13537    committer: Vec<u8>,
13538    message: Vec<u8>,
13539) -> Result<CheckoutResult> {
13540    let worktree_root = worktree_root.as_ref();
13541    let git_dir = git_dir.as_ref();
13542    let files = checkout_commit_to_index_and_worktree(worktree_root, git_dir, format, target)?;
13543    let refs = FileRefStore::new(git_dir, format);
13544    let zero = ObjectId::null(format);
13545    let mut tx = refs.transaction();
13546    tx.update(RefUpdate {
13547        name: "HEAD".into(),
13548        expected: None,
13549        new: RefTarget::Direct(*target),
13550        reflog: Some(ReflogEntry {
13551            old_oid: zero,
13552            new_oid: *target,
13553            committer,
13554            message,
13555        }),
13556    });
13557    tx.commit()?;
13558    Ok(CheckoutResult {
13559        branch: target.to_string(),
13560        oid: *target,
13561        files,
13562    })
13563}
13564
13565/// Like [`checkout_branch`], but runs the smudge-side content filters
13566/// (`core.autocrlf`/`text`/`eol` EOL conversion and `filter.<name>.smudge`
13567/// drivers) on each blob as it is written to the worktree. `config` is the
13568/// repository config used to resolve the filters.
13569pub fn checkout_branch_filtered(
13570    worktree_root: impl AsRef<Path>,
13571    git_dir: impl AsRef<Path>,
13572    format: ObjectFormat,
13573    branch: &str,
13574    committer: Vec<u8>,
13575    config: &GitConfig,
13576) -> Result<CheckoutResult> {
13577    let worktree_root = worktree_root.as_ref();
13578    let git_dir = git_dir.as_ref();
13579    let branch_ref = branch_ref_name(branch)?;
13580    let refs = FileRefStore::new(git_dir, format);
13581    let target = match sley_refs::resolve_ref_peeled(&refs, &branch_ref)? {
13582        Some(oid) => oid,
13583        None => {
13584            checkout_switch_head_symbolic(&refs, branch_ref, committer, branch, None, None)?;
13585            return Ok(CheckoutResult {
13586                branch: branch.into(),
13587                oid: ObjectId::null(format),
13588                files: 0,
13589            });
13590        }
13591    };
13592    let current_head = resolve_head_commit_oid(git_dir, format)?;
13593    let files = if current_head == Some(target) {
13594        0
13595    } else {
13596        checkout_commit_to_index_and_worktree_filtered(
13597            worktree_root,
13598            git_dir,
13599            format,
13600            &target,
13601            Some(config),
13602            Some(vec![
13603                ("ref".to_string(), branch_ref.clone()),
13604                ("treeish".to_string(), target.to_hex()),
13605            ]),
13606        )?
13607    };
13608    checkout_switch_head_symbolic(
13609        &refs,
13610        branch_ref,
13611        committer,
13612        branch,
13613        Some(target),
13614        Some(target),
13615    )?;
13616    Ok(CheckoutResult {
13617        branch: branch.into(),
13618        oid: target,
13619        files,
13620    })
13621}
13622
13623/// Like [`checkout_detached`], but runs the smudge-side content filters (see
13624/// [`checkout_branch_filtered`]).
13625pub fn checkout_detached_filtered(
13626    worktree_root: impl AsRef<Path>,
13627    git_dir: impl AsRef<Path>,
13628    format: ObjectFormat,
13629    target: &ObjectId,
13630    committer: Vec<u8>,
13631    message: Vec<u8>,
13632    config: &GitConfig,
13633) -> Result<CheckoutResult> {
13634    let worktree_root = worktree_root.as_ref();
13635    let git_dir = git_dir.as_ref();
13636    let files = checkout_commit_to_index_and_worktree_filtered(
13637        worktree_root,
13638        git_dir,
13639        format,
13640        target,
13641        Some(config),
13642        Some(vec![("treeish".to_string(), target.to_hex())]),
13643    )?;
13644    let refs = FileRefStore::new(git_dir, format);
13645    let zero = ObjectId::null(format);
13646    let mut tx = refs.transaction();
13647    tx.update(RefUpdate {
13648        name: "HEAD".into(),
13649        expected: None,
13650        new: RefTarget::Direct(*target),
13651        reflog: Some(ReflogEntry {
13652            old_oid: zero,
13653            new_oid: *target,
13654            committer,
13655            message,
13656        }),
13657    });
13658    tx.commit()?;
13659    Ok(CheckoutResult {
13660        branch: target.to_string(),
13661        oid: *target,
13662        files,
13663    })
13664}
13665
13666fn checkout_commit_to_index_and_worktree(
13667    worktree_root: &Path,
13668    git_dir: &Path,
13669    format: ObjectFormat,
13670    target: &ObjectId,
13671) -> Result<usize> {
13672    checkout_commit_to_index_and_worktree_filtered(
13673        worktree_root,
13674        git_dir,
13675        format,
13676        target,
13677        None,
13678        None,
13679    )
13680}
13681
13682/// Like [`checkout_commit_to_index_and_worktree`] but optionally runs the
13683/// smudge-side content filters (see [`apply_smudge_filter`]) on each blob before
13684/// it is written to the worktree. Attribute lookups use the `.gitattributes`
13685/// recorded in the *target tree* so the rules of the checked-out commit apply.
13686fn checkout_commit_to_index_and_worktree_filtered(
13687    worktree_root: &Path,
13688    git_dir: &Path,
13689    format: ObjectFormat,
13690    target: &ObjectId,
13691    smudge_config: Option<&GitConfig>,
13692    process_metadata: Option<Vec<(String, String)>>,
13693) -> Result<usize> {
13694    if let Some((sparse, mode)) = active_sparse_checkout(git_dir)? {
13695        return checkout_commit_to_index_and_worktree_sparse(
13696            worktree_root,
13697            git_dir,
13698            format,
13699            target,
13700            Some((&sparse, mode)),
13701            smudge_config,
13702            process_metadata,
13703        );
13704    }
13705    let _process_filter_metadata = set_process_filter_metadata(process_metadata);
13706    let mut dirty = false;
13707    if smudge_config.is_some() {
13708        dirty = !modified_index_entries(worktree_root, git_dir, format)?.is_empty();
13709    } else {
13710        stream_short_status(worktree_root, git_dir, format, |entry| {
13711            if !status_row_is_untracked_or_ignored(entry) {
13712                dirty = true;
13713                return Ok(StreamControl::Stop);
13714            }
13715            Ok(StreamControl::Continue)
13716        })?;
13717    }
13718    if dirty {
13719        return Err(GitError::Transaction(
13720            "checkout requires a clean working tree".into(),
13721        ));
13722    }
13723    let db = FileObjectDatabase::from_git_dir(git_dir, format);
13724    let commit = read_commit(&db, format, target)?;
13725    let mut target_entries = BTreeMap::new();
13726    collect_tree_entries(&db, format, &commit.tree, &mut target_entries)?;
13727    refuse_if_current_working_directory_becomes_file(worktree_root, &target_entries)?;
13728
13729    let attributes = smudge_config
13730        .map(|_| build_tree_attribute_matcher(worktree_root, &db, format, &commit.tree))
13731        .transpose()?;
13732
13733    for path in read_index_entries(git_dir, format)?.keys() {
13734        if !target_entries.contains_key(path) {
13735            remove_worktree_file(worktree_root, path)?;
13736        }
13737    }
13738
13739    let mut index_entries = Vec::new();
13740    for (path, entry) in &target_entries {
13741        // Gitlinks go through the shared materialization step (mkdir + zeroed
13742        // stat); smudge filters never apply to a submodule directory.
13743        if sley_index::is_gitlink(entry.mode) {
13744            index_entries.push(materialize_tree_entry(&db, worktree_root, path, entry)?);
13745            continue;
13746        }
13747        let object = read_expected_object(&db, &entry.oid, ObjectType::Blob)?;
13748        let body: Cow<'_, [u8]> = match (smudge_config, &attributes) {
13749            (Some(config), Some(matcher)) => {
13750                let checks = matcher.attributes_for_path(path, &filter_attribute_names(), false);
13751                apply_smudge_filter_with_attributes_cow_format(
13752                    config,
13753                    &checks,
13754                    path,
13755                    &object.body,
13756                    format,
13757                )?
13758            }
13759            _ => Cow::Borrowed(&object.body),
13760        };
13761        let file_path = worktree_path(worktree_root, path)?;
13762        prepare_blob_parent_dirs(worktree_root, &file_path)?;
13763        remove_existing_worktree_path(&file_path)?;
13764        fs::write(&file_path, &body)?;
13765        set_worktree_file_mode(&file_path, entry.mode)?;
13766        let metadata = fs::metadata(&file_path)?;
13767        let mut index_entry = index_entry_from_metadata(path.clone(), entry.oid, &metadata);
13768        index_entry.mode = entry.mode;
13769        index_entries.push(index_entry);
13770    }
13771    index_entries.sort_by(|left, right| left.path.cmp(&right.path));
13772    let extensions = preserved_index_extensions(git_dir, format)?;
13773    fs::write(
13774        repository_index_path(git_dir),
13775        Index {
13776            version: 2,
13777            entries: index_entries,
13778            extensions,
13779            checksum: None,
13780        }
13781        .write(format)?,
13782    )?;
13783    Ok(target_entries.len())
13784}
13785
13786/// Build an [`AttributeMatcher`] from the `.gitattributes` files contained in a
13787/// tree, plus the repo-level (`core.attributesFile`, `.git/info/attributes`)
13788/// sources, mirroring [`standard_attributes_for_path_from_tree`].
13789fn build_tree_attribute_matcher(
13790    worktree_root: &Path,
13791    db: &FileObjectDatabase,
13792    format: ObjectFormat,
13793    tree_oid: &ObjectId,
13794) -> Result<AttributeMatcher> {
13795    let mut matcher = AttributeMatcher::default();
13796    let git_dir = worktree_root.join(".git");
13797    matcher.configure_case_sensitivity(&git_dir);
13798    if !matcher.read_configured_attributes(worktree_root, &git_dir) {
13799        matcher.read_default_global_attributes();
13800    }
13801    collect_attribute_patterns_from_tree(db, format, tree_oid, Vec::new(), &mut matcher)?;
13802    read_attribute_patterns(
13803        worktree_root.join(".git").join("info").join("attributes"),
13804        &mut matcher,
13805        &[],
13806        b".git/info/attributes",
13807        false,
13808    );
13809    Ok(matcher)
13810}
13811
13812fn materialize_tree_entry_with_optional_smudge(
13813    db: &FileObjectDatabase,
13814    format: ObjectFormat,
13815    worktree_root: &Path,
13816    path: &[u8],
13817    entry: &TrackedEntry,
13818    smudge_config: Option<&GitConfig>,
13819    attributes: Option<&AttributeMatcher>,
13820) -> Result<IndexEntry> {
13821    if smudge_config.is_none() || sley_index::is_gitlink(entry.mode) {
13822        return materialize_tree_entry(db, worktree_root, path, entry);
13823    }
13824    let config = smudge_config.expect("checked above");
13825    let matcher = attributes.expect("attributes are built when smudge_config is set");
13826    let object = read_expected_object(db, &entry.oid, ObjectType::Blob)?;
13827    let checks = matcher.attributes_for_path(path, &filter_attribute_names(), false);
13828    let body = apply_smudge_filter_with_attributes_cow_format(
13829        config,
13830        &checks,
13831        path,
13832        &object.body,
13833        format,
13834    )?;
13835    let file_path = worktree_path(worktree_root, path)?;
13836    prepare_blob_parent_dirs(worktree_root, &file_path)?;
13837    remove_existing_worktree_path(&file_path)?;
13838    fs::write(&file_path, &body)?;
13839    set_worktree_file_mode(&file_path, entry.mode)?;
13840    let metadata = fs::metadata(&file_path)?;
13841    let mut index_entry = index_entry_from_metadata(path.to_vec(), entry.oid, &metadata);
13842    index_entry.mode = entry.mode;
13843    Ok(index_entry)
13844}
13845
13846/// Sparse- and skip-worktree-aware variant of
13847/// [`checkout_commit_to_index_and_worktree`].
13848///
13849/// When `sparse` is `None` this behaves like the plain checkout except that it
13850/// preserves any pre-existing skip-worktree bits (so an already-sparse worktree
13851/// is not silently re-expanded). When `sparse` is `Some`, every target path is
13852/// additionally classified against the patterns: in-cone paths are written and
13853/// have their skip-worktree bit cleared, while out-of-cone paths are left out
13854/// of the worktree, get their skip-worktree bit set, and have any stale file
13855/// removed.
13856fn checkout_commit_to_index_and_worktree_sparse(
13857    worktree_root: &Path,
13858    git_dir: &Path,
13859    format: ObjectFormat,
13860    target: &ObjectId,
13861    sparse: Option<(&SparseCheckout, SparseCheckoutMode)>,
13862    smudge_config: Option<&GitConfig>,
13863    process_metadata: Option<Vec<(String, String)>>,
13864) -> Result<usize> {
13865    let _process_filter_metadata = set_process_filter_metadata(process_metadata);
13866    let previously_skipped = skip_worktree_paths(git_dir, format)?;
13867    let db = FileObjectDatabase::from_git_dir(git_dir, format);
13868    let commit = read_commit(&db, format, target)?;
13869    let mut target_entries = BTreeMap::new();
13870    collect_tree_entries(&db, format, &commit.tree, &mut target_entries)?;
13871
13872    // Honor skip-worktree: a path whose worktree file is intentionally absent
13873    // must not be treated as a dirty (deleted) change blocking the checkout.
13874    let mut dirty = false;
13875    stream_short_status(worktree_root, git_dir, format, |entry| {
13876        if previously_skipped.contains(entry.path) {
13877            return Ok(StreamControl::Continue);
13878        }
13879        // Submodule state never blocks a checkout: upstream unpack-trees
13880        // treats gitlinks as always up-to-date (ie_match_stat refuses to pay
13881        // for a submodule dirtiness probe), so new commits / dirty content in
13882        // a submodule must not fail the branch switch.
13883        if entry.index_mode.is_some_and(sley_index::is_gitlink)
13884            || entry.worktree_mode.is_some_and(sley_index::is_gitlink)
13885        {
13886            return Ok(StreamControl::Continue);
13887        }
13888        // An untracked embedded repository where the target tree records a
13889        // gitlink is reused as-is (upstream entry.c write_entry: mkdir with
13890        // EEXIST is success), so it does not block the checkout either.
13891        if entry.index == b'?' && entry.worktree == b'?' {
13892            let path = entry.path.strip_suffix(b"/").unwrap_or(entry.path);
13893            if target_entries
13894                .get(path)
13895                .is_some_and(|target| sley_index::is_gitlink(target.mode))
13896            {
13897                return Ok(StreamControl::Continue);
13898            }
13899        }
13900        dirty = true;
13901        Ok(StreamControl::Stop)
13902    })?;
13903    if dirty {
13904        return Err(GitError::Transaction(
13905            "checkout requires a clean working tree".into(),
13906        ));
13907    }
13908
13909    let matcher = sparse.map(|(spec, mode)| SparseMatcher::new(spec, mode));
13910    let attributes = smudge_config
13911        .map(|_| build_tree_attribute_matcher(worktree_root, &db, format, &commit.tree))
13912        .transpose()?;
13913
13914    for path in read_index_entries(git_dir, format)?.keys() {
13915        if target_entries.contains_key(path) {
13916            continue;
13917        }
13918        // Do not disturb the worktree state of an intentionally skipped path.
13919        if previously_skipped.contains(path) {
13920            continue;
13921        }
13922        remove_worktree_file(worktree_root, path)?;
13923    }
13924
13925    let mut index_entries = Vec::new();
13926    for (path, entry) in &target_entries {
13927        let in_cone = matcher.as_ref().map_or_else(
13928            || !previously_skipped.contains(path),
13929            |matcher| matcher.includes_file(path),
13930        );
13931        let index_entry = if in_cone {
13932            materialize_tree_entry_with_optional_smudge(
13933                &db,
13934                format,
13935                worktree_root,
13936                path,
13937                entry,
13938                smudge_config,
13939                attributes.as_ref(),
13940            )?
13941        } else {
13942            // Out of cone: ensure no stale worktree file remains and synthesize
13943            // an index entry straight from the tree (no worktree metadata),
13944            // then mark it skip-worktree.
13945            remove_worktree_file(worktree_root, path)?;
13946            let mut index_entry = restored_head_index_entry(worktree_root, &db, path, entry)?;
13947            set_skip_worktree(&mut index_entry);
13948            index_entry
13949        };
13950        index_entries.push(index_entry);
13951    }
13952    index_entries.sort_by(|left, right| left.path.cmp(&right.path));
13953    let mut index = Index {
13954        version: 2,
13955        entries: index_entries,
13956        extensions: preserved_index_extensions(git_dir, format)?,
13957        checksum: None,
13958    };
13959    normalize_index_version_for_extended_flags(&mut index);
13960    write_repository_index_ref(git_dir, format, &index)?;
13961    Ok(target_entries.len())
13962}
13963
13964fn skip_worktree_paths(git_dir: &Path, format: ObjectFormat) -> Result<BTreeSet<Vec<u8>>> {
13965    let index_path = repository_index_path(git_dir);
13966    if !index_path.exists() {
13967        return Ok(BTreeSet::new());
13968    }
13969    let index = Index::parse(&fs::read(index_path)?, format)?;
13970    Ok(index
13971        .entries
13972        .into_iter()
13973        .filter(index_entry_skip_worktree)
13974        .map(|entry| entry.path.into_bytes())
13975        .collect())
13976}
13977
13978pub fn restore_worktree_paths(
13979    worktree_root: impl AsRef<Path>,
13980    git_dir: impl AsRef<Path>,
13981    format: ObjectFormat,
13982    paths: &[PathBuf],
13983) -> Result<RestoreResult> {
13984    restore_worktree_paths_inner(
13985        worktree_root.as_ref(),
13986        git_dir.as_ref(),
13987        format,
13988        paths,
13989        None,
13990    )
13991}
13992
13993/// Like [`restore_worktree_paths`], applying the smudge-side content filters
13994/// (CRLF / ident / filter drivers) the way a checkout writes blobs.
13995pub fn restore_worktree_paths_filtered(
13996    worktree_root: impl AsRef<Path>,
13997    git_dir: impl AsRef<Path>,
13998    format: ObjectFormat,
13999    paths: &[PathBuf],
14000    config: &GitConfig,
14001) -> Result<RestoreResult> {
14002    restore_worktree_paths_inner(
14003        worktree_root.as_ref(),
14004        git_dir.as_ref(),
14005        format,
14006        paths,
14007        Some(config),
14008    )
14009}
14010
14011fn restore_worktree_paths_inner(
14012    worktree_root: &Path,
14013    git_dir: &Path,
14014    format: ObjectFormat,
14015    paths: &[PathBuf],
14016    smudge_config: Option<&GitConfig>,
14017) -> Result<RestoreResult> {
14018    let index_path = repository_index_path(git_dir);
14019    if !index_path.exists() {
14020        return Err(GitError::Exit(1));
14021    }
14022    let mut index = Index::parse(&fs::read(&index_path)?, format)?;
14023    let stat_cache = IndexStatCache::from_index(&index, &index_path);
14024    let db = FileObjectDatabase::from_git_dir(git_dir, format);
14025    let mut restored = BTreeSet::new();
14026    for path in paths {
14027        let absolute = if path.is_absolute() {
14028            path.clone()
14029        } else {
14030            worktree_root.join(path)
14031        };
14032        let absolute = normalize_absolute_path_lexically(&absolute);
14033        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
14034            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
14035        })?;
14036        let git_path = git_path_bytes(relative)?;
14037        let recursive = path == Path::new(".")
14038            || path.to_string_lossy().ends_with('/')
14039            || absolute.is_dir()
14040            || index_has_entry_under(&index.entries, &git_path);
14041        let mut matched = false;
14042        let matched_positions = index
14043            .entries
14044            .iter()
14045            .enumerate()
14046            .filter_map(|(position, entry)| {
14047                (entry.path.as_bytes() == git_path.as_slice()
14048                    || (recursive && index_entry_is_under_path(entry.path.as_bytes(), &git_path)))
14049                .then_some(position)
14050            })
14051            .collect::<Vec<_>>();
14052        for position in matched_positions {
14053            let refreshed = restore_index_entry(
14054                worktree_root,
14055                git_dir,
14056                format,
14057                &db,
14058                &index.entries[position],
14059                smudge_config,
14060                Some(&stat_cache),
14061            )?;
14062            restored.insert(index.entries[position].path.clone());
14063            matched = true;
14064            if let Some(refreshed) = refreshed {
14065                index.entries[position] = refreshed;
14066            }
14067        }
14068        if !matched {
14069            eprintln!(
14070                "error: pathspec '{}' did not match any file(s) known to git",
14071                path.display()
14072            );
14073            return Err(GitError::Exit(1));
14074        }
14075    }
14076    write_repository_index_ref(git_dir, format, &index)?;
14077    Ok(RestoreResult {
14078        restored: restored.len(),
14079    })
14080}
14081
14082pub fn checkout_index_paths(
14083    worktree_root: impl AsRef<Path>,
14084    git_dir: impl AsRef<Path>,
14085    format: ObjectFormat,
14086    paths: &[PathBuf],
14087    options: CheckoutIndexPathOptions<'_>,
14088) -> Result<RestoreResult> {
14089    let worktree_root = worktree_root.as_ref();
14090    let git_dir = git_dir.as_ref();
14091    let index_path = repository_index_path(git_dir);
14092    if !index_path.exists() {
14093        return Err(GitError::Exit(1));
14094    }
14095    let mut index = Index::parse(&fs::read(&index_path)?, format)?;
14096    if options.merge {
14097        checkout_unmerge_resolve_undo_paths(worktree_root, &mut index, format, paths)?;
14098    }
14099    let stat_cache = IndexStatCache::from_index(&index, &index_path);
14100    let db = FileObjectDatabase::from_git_dir(git_dir, format);
14101    let selected = checkout_selected_index_paths(worktree_root, &index, paths)?;
14102
14103    if options.stage.is_none() && !options.merge && !options.force {
14104        for path in &selected {
14105            if checkout_path_is_unmerged(&index, path) {
14106                eprintln!(
14107                    "error: path '{}' is unmerged",
14108                    String::from_utf8_lossy(path)
14109                );
14110                return Err(GitError::Exit(1));
14111            }
14112        }
14113    }
14114
14115    let mut refreshed = BTreeMap::new();
14116    let mut restored = BTreeSet::new();
14117    for path in selected {
14118        let positions = index
14119            .entries
14120            .iter()
14121            .enumerate()
14122            .filter_map(|(position, entry)| (entry.path.as_bytes() == path).then_some(position))
14123            .collect::<Vec<_>>();
14124        let stage0 = positions
14125            .iter()
14126            .copied()
14127            .find(|position| index.entries[*position].stage() == Stage::Normal);
14128        let is_unmerged = positions
14129            .iter()
14130            .any(|position| index.entries[*position].stage() != Stage::Normal);
14131
14132        if is_unmerged {
14133            if let Some(stage) = options.stage {
14134                let wanted = match stage {
14135                    CheckoutStage::Ours => Stage::Ours,
14136                    CheckoutStage::Theirs => Stage::Theirs,
14137                };
14138                let Some(position) = positions
14139                    .iter()
14140                    .copied()
14141                    .find(|position| index.entries[*position].stage() == wanted)
14142                else {
14143                    eprintln!(
14144                        "error: path '{}' does not have {} version",
14145                        String::from_utf8_lossy(&path),
14146                        match stage {
14147                            CheckoutStage::Ours => "our",
14148                            CheckoutStage::Theirs => "their",
14149                        }
14150                    );
14151                    return Err(GitError::Exit(1));
14152                };
14153                checkout_write_index_entry_to_worktree(
14154                    worktree_root,
14155                    git_dir,
14156                    format,
14157                    &db,
14158                    &index.entries[position],
14159                    options.smudge_config,
14160                    Some(&stat_cache),
14161                )?;
14162                restored.insert(path);
14163                continue;
14164            }
14165            if options.merge {
14166                checkout_merge_unmerged_path(
14167                    worktree_root,
14168                    &db,
14169                    &index,
14170                    &positions,
14171                    options.conflict_style,
14172                )?;
14173                restored.insert(path);
14174                continue;
14175            }
14176            if options.force {
14177                continue;
14178            }
14179        }
14180
14181        if let Some(position) = stage0 {
14182            if let Some(updated) = checkout_write_index_entry_to_worktree(
14183                worktree_root,
14184                git_dir,
14185                format,
14186                &db,
14187                &index.entries[position],
14188                options.smudge_config,
14189                Some(&stat_cache),
14190            )? {
14191                refreshed.insert(position, updated);
14192            }
14193            restored.insert(path);
14194        }
14195    }
14196
14197    for (position, entry) in refreshed {
14198        index.entries[position] = entry;
14199    }
14200    if !index.entries.is_empty() {
14201        write_repository_index_ref(git_dir, format, &index)?;
14202    }
14203    Ok(RestoreResult {
14204        restored: restored.len(),
14205    })
14206}
14207
14208pub fn unresolve_index_paths(
14209    worktree_root: impl AsRef<Path>,
14210    git_dir: impl AsRef<Path>,
14211    format: ObjectFormat,
14212    paths: &[PathBuf],
14213) -> Result<()> {
14214    let worktree_root = worktree_root.as_ref();
14215    let git_dir = git_dir.as_ref();
14216    let index_path = repository_index_path(git_dir);
14217    if !index_path.exists() {
14218        return Ok(());
14219    }
14220    let mut index = Index::parse(&fs::read(&index_path)?, format)?;
14221    checkout_unmerge_resolve_undo_paths(worktree_root, &mut index, format, paths)?;
14222    write_repository_index_ref(git_dir, format, &index)
14223}
14224
14225fn checkout_selected_index_paths(
14226    worktree_root: &Path,
14227    index: &Index,
14228    paths: &[PathBuf],
14229) -> Result<BTreeSet<Vec<u8>>> {
14230    let index_paths = index
14231        .entries
14232        .iter()
14233        .map(|entry| entry.path.as_bytes().to_vec())
14234        .collect::<BTreeSet<_>>();
14235    let mut selected = BTreeSet::new();
14236    for path in paths {
14237        let absolute = if path.is_absolute() {
14238            path.clone()
14239        } else {
14240            worktree_root.join(path)
14241        };
14242        let absolute = normalize_absolute_path_lexically(&absolute);
14243        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
14244            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
14245        })?;
14246        let git_path = git_path_bytes(relative)?;
14247        let recursive = path == Path::new(".")
14248            || path.to_string_lossy().ends_with('/')
14249            || absolute.is_dir()
14250            || index_paths
14251                .iter()
14252                .any(|entry| index_entry_is_under_path(entry, &git_path));
14253        let matched = index_paths
14254            .iter()
14255            .filter(|entry| {
14256                entry.as_slice() == git_path.as_slice()
14257                    || (recursive && index_entry_is_under_path(entry, &git_path))
14258            })
14259            .cloned()
14260            .collect::<Vec<_>>();
14261        if matched.is_empty() {
14262            eprintln!(
14263                "error: pathspec '{}' did not match any file(s) known to git",
14264                path.display()
14265            );
14266            return Err(GitError::Exit(1));
14267        }
14268        selected.extend(matched);
14269    }
14270    Ok(selected)
14271}
14272
14273fn checkout_unmerge_resolve_undo_paths(
14274    worktree_root: &Path,
14275    index: &mut Index,
14276    format: ObjectFormat,
14277    paths: &[PathBuf],
14278) -> Result<()> {
14279    let records = parse_resolve_undo_records(index.extension(b"REUC")?, format)?;
14280    if records.is_empty() {
14281        return Ok(());
14282    }
14283    let mut remaining = Vec::new();
14284    let mut unmerged_any = false;
14285    for record in records {
14286        if checkout_pathspecs_match_git_path(worktree_root, paths, &record.path)? {
14287            remove_index_entries_with_path(&mut index.entries, &record.path);
14288            for (idx, stage) in record.stages.into_iter().enumerate() {
14289                let Some((mode, oid)) = stage else {
14290                    continue;
14291                };
14292                index.entries.push(resolve_undo_index_entry(
14293                    record.path.clone(),
14294                    mode,
14295                    oid,
14296                    (idx + 1) as u16,
14297                ));
14298            }
14299            unmerged_any = true;
14300        } else {
14301            remaining.push(record);
14302        }
14303    }
14304    if unmerged_any {
14305        index.entries.sort_by(compare_index_key);
14306        normalize_index_version_for_extended_flags(index);
14307        set_resolve_undo_extension(index, &remaining)?;
14308    }
14309    Ok(())
14310}
14311
14312fn checkout_pathspecs_match_git_path(
14313    worktree_root: &Path,
14314    paths: &[PathBuf],
14315    candidate: &[u8],
14316) -> Result<bool> {
14317    for path in paths {
14318        let absolute = if path.is_absolute() {
14319            path.clone()
14320        } else {
14321            worktree_root.join(path)
14322        };
14323        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
14324            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
14325        })?;
14326        let git_path = git_path_bytes(relative)?;
14327        let recursive = path == Path::new(".")
14328            || path.to_string_lossy().ends_with('/')
14329            || absolute.is_dir()
14330            || index_entry_is_under_path(candidate, &git_path);
14331        if candidate == git_path.as_slice()
14332            || (recursive && index_entry_is_under_path(candidate, &git_path))
14333        {
14334            return Ok(true);
14335        }
14336    }
14337    Ok(false)
14338}
14339
14340fn resolve_undo_index_entry(path: Vec<u8>, mode: u32, oid: ObjectId, stage: u16) -> IndexEntry {
14341    let name_len = (path
14342        .len()
14343        .min(sley_index::INDEX_FLAG_NAME_LENGTH_MASK as usize)) as u16;
14344    IndexEntry {
14345        ctime_seconds: 0,
14346        ctime_nanoseconds: 0,
14347        mtime_seconds: 0,
14348        mtime_nanoseconds: 0,
14349        dev: 0,
14350        ino: 0,
14351        mode,
14352        uid: 0,
14353        gid: 0,
14354        size: 0,
14355        oid,
14356        flags: name_len | (stage << 12),
14357        flags_extended: 0,
14358        path: path.into(),
14359    }
14360}
14361
14362fn checkout_path_is_unmerged(index: &Index, path: &[u8]) -> bool {
14363    index
14364        .entries
14365        .iter()
14366        .any(|entry| entry.path.as_bytes() == path && entry.stage() != Stage::Normal)
14367}
14368
14369fn checkout_write_index_entry_to_worktree(
14370    worktree_root: &Path,
14371    git_dir: &Path,
14372    format: ObjectFormat,
14373    db: &FileObjectDatabase,
14374    entry: &IndexEntry,
14375    smudge_config: Option<&GitConfig>,
14376    stat_cache: Option<&IndexStatCache>,
14377) -> Result<Option<IndexEntry>> {
14378    restore_index_entry(
14379        worktree_root,
14380        git_dir,
14381        format,
14382        db,
14383        entry,
14384        smudge_config,
14385        stat_cache,
14386    )
14387}
14388
14389fn checkout_merge_unmerged_path(
14390    worktree_root: &Path,
14391    db: &FileObjectDatabase,
14392    index: &Index,
14393    positions: &[usize],
14394    style: CheckoutConflictStyle,
14395) -> Result<()> {
14396    let mut base = None;
14397    let mut ours = None;
14398    let mut theirs = None;
14399    for position in positions {
14400        let entry = &index.entries[*position];
14401        match entry.stage() {
14402            Stage::Base => base = Some(entry),
14403            Stage::Ours => ours = Some(entry),
14404            Stage::Theirs => theirs = Some(entry),
14405            Stage::Normal => {}
14406        }
14407    }
14408    let Some(ours) = ours else {
14409        return Ok(());
14410    };
14411    let Some(theirs) = theirs else {
14412        return Ok(());
14413    };
14414    let base_body = match base {
14415        Some(entry) => read_expected_object(db, &entry.oid, ObjectType::Blob)?
14416            .body
14417            .clone(),
14418        None => Vec::new(),
14419    };
14420    let ours_body = read_expected_object(db, &ours.oid, ObjectType::Blob)?
14421        .body
14422        .clone();
14423    let theirs_body = read_expected_object(db, &theirs.oid, ObjectType::Blob)?
14424        .body
14425        .clone();
14426    let result = sley_diff_merge::merge_blobs(
14427        &base_body,
14428        &ours_body,
14429        &theirs_body,
14430        &sley_diff_merge::MergeBlobOptions {
14431            ours_label: "ours",
14432            theirs_label: "theirs",
14433            base_label: "base",
14434            style: match style {
14435                CheckoutConflictStyle::Merge => sley_diff_merge::ConflictStyle::Merge,
14436                CheckoutConflictStyle::Diff3 => sley_diff_merge::ConflictStyle::Diff3,
14437            },
14438        },
14439    );
14440    let file_path = worktree_path(worktree_root, ours.path.as_bytes())?;
14441    prepare_blob_parent_dirs(worktree_root, &file_path)?;
14442    remove_existing_worktree_path(&file_path)?;
14443    fs::write(&file_path, result.content)?;
14444    set_worktree_file_mode(&file_path, ours.mode)?;
14445    Ok(())
14446}
14447
14448pub fn restore_index_paths_from_head(
14449    worktree_root: impl AsRef<Path>,
14450    git_dir: impl AsRef<Path>,
14451    format: ObjectFormat,
14452    paths: &[PathBuf],
14453) -> Result<RestoreResult> {
14454    let worktree_root = worktree_root.as_ref();
14455    let git_dir = git_dir.as_ref();
14456    let index_path = repository_index_path(git_dir);
14457    let index = if index_path.exists() {
14458        Index::parse(&fs::read(&index_path)?, format)?
14459    } else {
14460        Index {
14461            version: 2,
14462            entries: Vec::new(),
14463            extensions: Vec::new(),
14464            checksum: None,
14465        }
14466    };
14467    let db = FileObjectDatabase::from_git_dir(git_dir, format);
14468    let head_entries = head_tree_entries(git_dir, format, &db)?;
14469    restore_index_paths_from_entries(
14470        worktree_root,
14471        git_dir,
14472        format,
14473        &db,
14474        index,
14475        &head_entries,
14476        paths,
14477        false,
14478    )
14479}
14480
14481pub fn restore_index_paths_from_tree(
14482    worktree_root: impl AsRef<Path>,
14483    git_dir: impl AsRef<Path>,
14484    format: ObjectFormat,
14485    tree_oid: &ObjectId,
14486    paths: &[PathBuf],
14487) -> Result<RestoreResult> {
14488    let worktree_root = worktree_root.as_ref();
14489    let git_dir = git_dir.as_ref();
14490    let index_path = repository_index_path(git_dir);
14491    let index = if index_path.exists() {
14492        Index::parse(&fs::read(&index_path)?, format)?
14493    } else {
14494        Index {
14495            version: 2,
14496            entries: Vec::new(),
14497            extensions: Vec::new(),
14498            checksum: None,
14499        }
14500    };
14501    let db = FileObjectDatabase::from_git_dir(git_dir, format);
14502    let source_entries = tree_entries(&db, format, tree_oid)?;
14503    restore_index_paths_from_entries(
14504        worktree_root,
14505        git_dir,
14506        format,
14507        &db,
14508        index,
14509        &source_entries,
14510        paths,
14511        false,
14512    )
14513}
14514
14515pub fn restore_index_paths_from_tree_allow_unmatched(
14516    worktree_root: impl AsRef<Path>,
14517    git_dir: impl AsRef<Path>,
14518    format: ObjectFormat,
14519    tree_oid: &ObjectId,
14520    paths: &[PathBuf],
14521) -> Result<RestoreResult> {
14522    let worktree_root = worktree_root.as_ref();
14523    let git_dir = git_dir.as_ref();
14524    let index_path = repository_index_path(git_dir);
14525    let index = if index_path.exists() {
14526        Index::parse(&fs::read(&index_path)?, format)?
14527    } else {
14528        Index {
14529            version: 2,
14530            entries: Vec::new(),
14531            extensions: Vec::new(),
14532            checksum: None,
14533        }
14534    };
14535    let db = FileObjectDatabase::from_git_dir(git_dir, format);
14536    let source_entries = tree_entries(&db, format, tree_oid)?;
14537    restore_index_paths_from_entries(
14538        worktree_root,
14539        git_dir,
14540        format,
14541        &db,
14542        index,
14543        &source_entries,
14544        paths,
14545        true,
14546    )
14547}
14548
14549fn restore_index_paths_from_entries(
14550    worktree_root: &Path,
14551    git_dir: &Path,
14552    format: ObjectFormat,
14553    db: &FileObjectDatabase,
14554    mut index: Index,
14555    source_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
14556    paths: &[PathBuf],
14557    allow_unmatched: bool,
14558) -> Result<RestoreResult> {
14559    let sparse = active_sparse_checkout(git_dir)?;
14560    if index.is_sparse() {
14561        expand_sparse_index(&mut index, db, format)?;
14562    }
14563    let index_version = index.version;
14564    let extensions = index_extensions_without_cache_tree(&index.extensions);
14565    let mut index_entries = index
14566        .entries
14567        .into_iter()
14568        .map(|entry| (entry.path.as_bytes().to_vec(), entry))
14569        .collect::<BTreeMap<_, _>>();
14570    let prior_skip_worktree = index_entries
14571        .iter()
14572        .filter(|(_, entry)| entry.is_skip_worktree())
14573        .map(|(path, _)| path.clone())
14574        .collect::<BTreeSet<_>>();
14575    let mut restored = BTreeSet::new();
14576    for path in paths {
14577        let absolute = if path.is_absolute() {
14578            path.clone()
14579        } else {
14580            worktree_root.join(path)
14581        };
14582        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
14583            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
14584        })?;
14585        let git_path = git_path_bytes(relative)?;
14586        let recursive = path == Path::new(".")
14587            || path.to_string_lossy().ends_with('/')
14588            || absolute.is_dir()
14589            || index_entries
14590                .keys()
14591                .any(|entry| index_entry_is_under_path(entry, &git_path))
14592            || source_entries
14593                .keys()
14594                .any(|entry| index_entry_is_under_path(entry, &git_path));
14595        let mut matched_paths = BTreeSet::new();
14596        for path in index_entries.keys().chain(source_entries.keys()) {
14597            if path.as_slice() == git_path.as_slice()
14598                || (recursive && index_entry_is_under_path(path, &git_path))
14599            {
14600                matched_paths.insert(path.clone());
14601            }
14602        }
14603        if matched_paths.is_empty() {
14604            if allow_unmatched {
14605                continue;
14606            }
14607            eprintln!(
14608                "error: pathspec '{}' did not match any file(s) known to git",
14609                path.display()
14610            );
14611            return Err(GitError::Exit(1));
14612        }
14613        for path in matched_paths {
14614            if let Some(entry) = source_entries.get(&path) {
14615                // git's pathspec reset (`reset_index` → diff against the source
14616                // tree) only rewrites entries that actually CHANGE: an entry whose
14617                // oid and mode already equal the source is left untouched, so its
14618                // cached stat is preserved and `git diff-files` stays clean (t7102
14619                // "resetting an unmodified path is a no-op"). Only when the entry
14620                // genuinely changes does git write a fresh, stat-zeroed entry.
14621                let unchanged = index_entries.get(&path).is_some_and(|existing| {
14622                    existing.oid == entry.oid
14623                        && existing.mode == entry.mode
14624                        && !existing.is_intent_to_add()
14625                });
14626                if !unchanged {
14627                    let mut restored = restored_head_index_entry(worktree_root, db, &path, entry)?;
14628                    if prior_skip_worktree.contains(&path) {
14629                        restored.set_skip_worktree(true);
14630                    }
14631                    index_entries.insert(path.clone(), restored);
14632                }
14633            } else {
14634                index_entries.remove(&path);
14635            }
14636            restored.insert(path);
14637        }
14638    }
14639    let mut entries = index_entries.into_values().collect::<Vec<_>>();
14640    entries.sort_by(|left, right| left.path.cmp(&right.path));
14641    let restored_paths = restored.iter().cloned().collect::<Vec<_>>();
14642    let mut index = Index {
14643        version: index_version,
14644        entries,
14645        extensions,
14646        checksum: None,
14647    };
14648    invalidate_untracked_cache_for_git_paths(&mut index, format, &restored_paths)?;
14649    if let Some((sparse, mode)) = sparse
14650        && sparse.sparse_index
14651    {
14652        let matcher = SparseMatcher::new(&sparse, mode);
14653        collapse_to_sparse_index(&mut index, &matcher, db, format)?;
14654    }
14655    write_repository_index_ref(git_dir, format, &index)?;
14656    Ok(RestoreResult {
14657        restored: restored.len(),
14658    })
14659}
14660
14661pub fn restore_index_and_worktree_paths_from_head(
14662    worktree_root: impl AsRef<Path>,
14663    git_dir: impl AsRef<Path>,
14664    format: ObjectFormat,
14665    paths: &[PathBuf],
14666) -> Result<RestoreResult> {
14667    let worktree_root = worktree_root.as_ref();
14668    let git_dir = git_dir.as_ref();
14669    let index_path = repository_index_path(git_dir);
14670    let index = if index_path.exists() {
14671        Index::parse(&fs::read(&index_path)?, format)?
14672    } else {
14673        Index {
14674            version: 2,
14675            entries: Vec::new(),
14676            extensions: Vec::new(),
14677            checksum: None,
14678        }
14679    };
14680    let db = FileObjectDatabase::from_git_dir(git_dir, format);
14681    let head_entries = head_tree_entries(git_dir, format, &db)?;
14682    restore_index_and_worktree_paths_from_entries(
14683        worktree_root,
14684        git_dir,
14685        format,
14686        &db,
14687        index,
14688        &head_entries,
14689        paths,
14690    )
14691}
14692
14693pub fn restore_index_and_worktree_paths_from_tree(
14694    worktree_root: impl AsRef<Path>,
14695    git_dir: impl AsRef<Path>,
14696    format: ObjectFormat,
14697    tree_oid: &ObjectId,
14698    paths: &[PathBuf],
14699) -> Result<RestoreResult> {
14700    let worktree_root = worktree_root.as_ref();
14701    let git_dir = git_dir.as_ref();
14702    let index_path = repository_index_path(git_dir);
14703    let index = if index_path.exists() {
14704        Index::parse(&fs::read(&index_path)?, format)?
14705    } else {
14706        Index {
14707            version: 2,
14708            entries: Vec::new(),
14709            extensions: Vec::new(),
14710            checksum: None,
14711        }
14712    };
14713    let db = FileObjectDatabase::from_git_dir(git_dir, format);
14714    let source_entries = tree_entries(&db, format, tree_oid)?;
14715    restore_index_and_worktree_paths_from_entries(
14716        worktree_root,
14717        git_dir,
14718        format,
14719        &db,
14720        index,
14721        &source_entries,
14722        paths,
14723    )
14724}
14725
14726fn restore_index_and_worktree_paths_from_entries(
14727    worktree_root: &Path,
14728    git_dir: &Path,
14729    format: ObjectFormat,
14730    db: &FileObjectDatabase,
14731    index: Index,
14732    source_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
14733    paths: &[PathBuf],
14734) -> Result<RestoreResult> {
14735    let index_version = index.version;
14736    let extensions = index_extensions_without_cache_tree(&index.extensions);
14737    let mut index_entries = index
14738        .entries
14739        .into_iter()
14740        .map(|entry| (entry.path.as_bytes().to_vec(), entry))
14741        .collect::<BTreeMap<_, _>>();
14742    let mut restored = BTreeSet::new();
14743    for path in paths {
14744        let absolute = if path.is_absolute() {
14745            path.clone()
14746        } else {
14747            worktree_root.join(path)
14748        };
14749        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
14750            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
14751        })?;
14752        let git_path = git_path_bytes(relative)?;
14753        let recursive = path == Path::new(".")
14754            || path.to_string_lossy().ends_with('/')
14755            || absolute.is_dir()
14756            || index_entries
14757                .keys()
14758                .any(|entry| index_entry_is_under_path(entry, &git_path))
14759            || source_entries
14760                .keys()
14761                .any(|entry| index_entry_is_under_path(entry, &git_path));
14762        let mut matched_paths = BTreeSet::new();
14763        for path in index_entries.keys().chain(source_entries.keys()) {
14764            if path.as_slice() == git_path.as_slice()
14765                || (recursive && index_entry_is_under_path(path, &git_path))
14766            {
14767                matched_paths.insert(path.clone());
14768            }
14769        }
14770        if matched_paths.is_empty() {
14771            eprintln!(
14772                "error: pathspec '{}' did not match any file(s) known to git",
14773                path.display()
14774            );
14775            return Err(GitError::Exit(1));
14776        }
14777        for path in matched_paths {
14778            if let Some(entry) = source_entries.get(&path) {
14779                index_entries.insert(
14780                    path.clone(),
14781                    restore_head_entry_to_worktree_and_index(worktree_root, db, &path, entry)?,
14782                );
14783            } else {
14784                index_entries.remove(&path);
14785                remove_worktree_file(worktree_root, &path)?;
14786            }
14787            restored.insert(path);
14788        }
14789    }
14790    let mut entries = index_entries.into_values().collect::<Vec<_>>();
14791    entries.sort_by(|left, right| left.path.cmp(&right.path));
14792    let restored_paths = restored.iter().cloned().collect::<Vec<_>>();
14793    let mut index = Index {
14794        version: index_version,
14795        entries,
14796        extensions,
14797        checksum: None,
14798    };
14799    invalidate_untracked_cache_for_git_paths(&mut index, format, &restored_paths)?;
14800    write_repository_index_ref(git_dir, format, &index)?;
14801    Ok(RestoreResult {
14802        restored: restored.len(),
14803    })
14804}
14805
14806pub fn reset_index_and_worktree_to_commit(
14807    worktree_root: impl AsRef<Path>,
14808    git_dir: impl AsRef<Path>,
14809    format: ObjectFormat,
14810    commit_oid: &ObjectId,
14811) -> Result<RestoreResult> {
14812    let worktree_root = worktree_root.as_ref();
14813    let git_dir = git_dir.as_ref();
14814    let db = FileObjectDatabase::from_git_dir(git_dir, format);
14815    let commit = read_commit(&db, format, commit_oid)?;
14816    let mut target_entries = BTreeMap::new();
14817    collect_tree_entries(&db, format, &commit.tree, &mut target_entries)?;
14818    refuse_if_current_working_directory_becomes_file(worktree_root, &target_entries)?;
14819    let config = sley_config::read_repo_config(git_dir, None).unwrap_or_default();
14820    let attributes = build_tree_attribute_matcher(worktree_root, &db, format, &commit.tree)?;
14821
14822    // git's `reset --hard` runs a one-way merge through unpack-trees: EVERY path
14823    // present in the current index (at ANY stage) that the target tree does not
14824    // track is removed from the worktree. A conflicted D/F merge can leave a
14825    // path like `dir~HEAD` at stage 2 only — those entries are dropped by the
14826    // stage-0-only `read_index_entries`, so iterate the RAW index paths here
14827    // (deduped across stages) to match git and delete the moved-aside file.
14828    for path in current_index_paths(git_dir, format, &db)? {
14829        if !target_entries.contains_key(&path) {
14830            remove_worktree_file(worktree_root, &path)?;
14831        }
14832    }
14833
14834    let mut index_entries = Vec::new();
14835    for (path, entry) in &target_entries {
14836        index_entries.push(materialize_tree_entry_filtered(
14837            &db,
14838            format,
14839            worktree_root,
14840            path,
14841            entry,
14842            &config,
14843            &attributes,
14844        )?);
14845    }
14846    index_entries.sort_by(|left, right| left.path.cmp(&right.path));
14847    let extensions = preserved_index_extensions(git_dir, format)?;
14848    fs::write(
14849        repository_index_path(git_dir),
14850        Index {
14851            version: 2,
14852            entries: index_entries,
14853            extensions,
14854            checksum: None,
14855        }
14856        .write(format)?,
14857    )?;
14858    Ok(RestoreResult {
14859        restored: target_entries.len(),
14860    })
14861}
14862
14863/// All paths the current index references, deduped across stages (a conflicted
14864/// path appears at stages 1–3; we want it listed once). Unlike
14865/// `read_index_entries`, which filters to stage 0, this keeps conflicted paths
14866/// so a `reset --hard` worktree sweep removes moved-aside files (`dir~HEAD`) the
14867/// target tree doesn't track — matching git's one-way unpack-trees behavior.
14868fn current_index_paths(
14869    git_dir: &Path,
14870    format: ObjectFormat,
14871    db: &FileObjectDatabase,
14872) -> Result<BTreeSet<Vec<u8>>> {
14873    let (index, _stat_cache, _head_matches) = read_index_with_stat_cache(git_dir, format, db)?;
14874    Ok(index
14875        .entries
14876        .into_iter()
14877        .map(|entry| entry.path.into_bytes())
14878        .collect())
14879}
14880
14881/// Write one target tree entry into the worktree and return its index entry —
14882/// the shared materialization step for every checkout/reset worktree rebuild.
14883///
14884/// Gitlinks (mode 160000) never touch the object database: their oid names a
14885/// commit in the *submodule's* repository, not an object here. Upstream
14886/// (entry.c `write_entry` S_IFGITLINK) just mkdirs the path — an
14887/// already-populated submodule is left untouched (EEXIST is success) — and
14888/// records the oid in the index with a zeroed stat so status re-evaluates the
14889/// gitlink against the embedded repository's HEAD.
14890fn materialize_tree_entry(
14891    db: &FileObjectDatabase,
14892    worktree_root: &Path,
14893    path: &[u8],
14894    entry: &TrackedEntry,
14895) -> Result<IndexEntry> {
14896    if sley_index::is_gitlink(entry.mode) {
14897        let dir_path = worktree_path(worktree_root, path)?;
14898        materialize_gitlink_dir(worktree_root, &dir_path)?;
14899        return Ok(IndexEntry {
14900            ctime_seconds: 0,
14901            ctime_nanoseconds: 0,
14902            mtime_seconds: 0,
14903            mtime_nanoseconds: 0,
14904            dev: 0,
14905            ino: 0,
14906            mode: entry.mode,
14907            uid: 0,
14908            gid: 0,
14909            size: 0,
14910            oid: entry.oid,
14911            flags: path.len().min(0x0fff) as u16,
14912            flags_extended: 0,
14913            path: BString::from(path),
14914        });
14915    }
14916    let file_path = write_worktree_blob_entry(db, worktree_root, path, entry)?;
14917    let metadata = fs::symlink_metadata(&file_path)?;
14918    let mut index_entry = index_entry_from_metadata(path.to_vec(), entry.oid, &metadata);
14919    index_entry.mode = entry.mode;
14920    Ok(index_entry)
14921}
14922
14923fn materialize_gitlink_dir(worktree_root: &Path, dir_path: &Path) -> Result<()> {
14924    prepare_blob_parent_dirs(worktree_root, dir_path)?;
14925    if fs::symlink_metadata(dir_path).is_ok_and(|metadata| !metadata.is_dir()) {
14926        remove_existing_worktree_path(dir_path)?;
14927    }
14928    fs::create_dir_all(dir_path)?;
14929    Ok(())
14930}
14931
14932fn materialize_tree_entry_filtered(
14933    db: &FileObjectDatabase,
14934    format: ObjectFormat,
14935    worktree_root: &Path,
14936    path: &[u8],
14937    entry: &TrackedEntry,
14938    config: &GitConfig,
14939    attributes: &AttributeMatcher,
14940) -> Result<IndexEntry> {
14941    if sley_index::is_gitlink(entry.mode) || (entry.mode & 0o170000) == 0o120000 {
14942        return materialize_tree_entry(db, worktree_root, path, entry);
14943    }
14944    let object = read_expected_object(db, &entry.oid, ObjectType::Blob)?;
14945    let checks = attributes.attributes_for_path(path, &filter_attribute_names(), false);
14946    let body = apply_smudge_filter_with_attributes_cow_format(
14947        config,
14948        &checks,
14949        path,
14950        &object.body,
14951        format,
14952    )?;
14953    let file_path = worktree_path(worktree_root, path)?;
14954    prepare_blob_parent_dirs(worktree_root, &file_path)?;
14955    remove_existing_worktree_path(&file_path)?;
14956    fs::write(&file_path, &body)?;
14957    set_worktree_file_mode(&file_path, entry.mode)?;
14958    let metadata = fs::symlink_metadata(&file_path)?;
14959    let mut index_entry = index_entry_from_metadata(path.to_vec(), entry.oid, &metadata);
14960    index_entry.mode = entry.mode;
14961    Ok(index_entry)
14962}
14963
14964/// Materialize a blob (or symlink) tree entry into the worktree at `path`,
14965/// returning the absolute path written. Shared by every checkout/reset worktree
14966/// rebuild so the type-change handling is identical everywhere.
14967///
14968/// Mirrors git's entry.c `write_entry`: it unlinks whatever currently occupies
14969/// the path before creating the new object, so a type transition (regular file ⇄
14970/// symlink, or a stale symlink/directory in the way) is overwritten rather than
14971/// left in place or failing with EEXIST. A plain `fs::write` follows an existing
14972/// symlink and would write *through* it (leaving the link), so the unlink is
14973/// load-bearing for the symlink-stash / reset-hard type-change cases.
14974fn write_worktree_blob_entry(
14975    db: &FileObjectDatabase,
14976    worktree_root: &Path,
14977    path: &[u8],
14978    entry: &TrackedEntry,
14979) -> Result<PathBuf> {
14980    let object = read_expected_object(db, &entry.oid, ObjectType::Blob)?;
14981    let file_path = worktree_path(worktree_root, path)?;
14982    // Clear any non-directory blocking an ancestor component (prior tree had
14983    // `dir` as a FILE, target wants `dir/<child>`), creating the parent dirs.
14984    prepare_blob_parent_dirs(worktree_root, &file_path)?;
14985    // Clear whatever sits at the leaf — including a directory where the target
14986    // wants a plain file (reverse D/F) — before writing.
14987    remove_existing_worktree_path(&file_path)?;
14988    if (entry.mode & 0o170000) == 0o120000 {
14989        // Symlink entry (mode 120000): the blob body is the link target.
14990        #[cfg(unix)]
14991        {
14992            use std::os::unix::ffi::OsStringExt;
14993            let target =
14994                std::path::PathBuf::from(std::ffi::OsString::from_vec(object.body.clone()));
14995            std::os::unix::fs::symlink(&target, &file_path)?;
14996        }
14997        #[cfg(not(unix))]
14998        fs::write(&file_path, &object.body)?;
14999    } else {
15000        fs::write(&file_path, &object.body)?;
15001        set_worktree_file_mode(&file_path, entry.mode)?;
15002    }
15003    Ok(file_path)
15004}
15005
15006/// Create the ancestor directories of a worktree blob path, removing any
15007/// regular file or symlink that occupies an ancestor *component* first.
15008///
15009/// Mirrors git's `entry.c` `create_directories`: it walks each path component
15010/// between `worktree_root` and the leaf and, for each, if a non-directory (a
15011/// regular file or symlink left by a prior tree where `dir` was a FILE) blocks
15012/// it, unlinks the blocker before `mkdir`. A plain `fs::create_dir_all` fails
15013/// with `ENOTDIR`/`EEXIST` on such a D/F transition; this is the directory-side
15014/// of git's force-checkout D/F clearing.
15015///
15016/// `worktree_root` itself is never touched. Only components strictly between the
15017/// root and the leaf are cleared, matching `create_directories`' `base_dir_len`
15018/// boundary.
15019fn prepare_blob_parent_dirs(worktree_root: &Path, file_path: &Path) -> Result<()> {
15020    let parent = match file_path.parent() {
15021        Some(parent) => parent,
15022        None => return Ok(()),
15023    };
15024    // Fast path: parent already a directory (the overwhelmingly common case).
15025    if parent.is_dir() {
15026        return Ok(());
15027    }
15028    // Collect the ancestor chain from worktree_root (exclusive) down to `parent`
15029    // (inclusive). We can't `create_dir_all` blindly because a non-directory may
15030    // sit on one of these components; walk them and clear blockers as git does.
15031    let mut components: Vec<&Path> = Vec::new();
15032    let mut cursor = Some(parent);
15033    while let Some(dir) = cursor {
15034        if dir == worktree_root {
15035            break;
15036        }
15037        components.push(dir);
15038        cursor = dir.parent();
15039        if cursor.is_none() {
15040            break;
15041        }
15042    }
15043    // Walk root → leaf so each parent exists before its child.
15044    for dir in components.iter().rev() {
15045        match fs::symlink_metadata(dir) {
15046            Ok(metadata) if metadata.is_dir() => {}
15047            Ok(_) => {
15048                // A regular file or symlink occupies this component (the prior
15049                // tree had `dir` as a FILE). Unlink it, then create the dir.
15050                fs::remove_file(dir)?;
15051                fs::create_dir(dir)?;
15052            }
15053            Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
15054                fs::create_dir(dir)?;
15055            }
15056            Err(err) => return Err(err.into()),
15057        }
15058    }
15059    Ok(())
15060}
15061
15062/// Remove whatever currently occupies a worktree path before writing a new
15063/// object there — a symlink (even a dangling one, which `Path::exists` misses),
15064/// a regular file, or a directory subtree. Uses `symlink_metadata` (lstat) so a
15065/// symlink is removed as the link, never followed.
15066fn remove_existing_worktree_path(file_path: &Path) -> Result<()> {
15067    let metadata = match fs::symlink_metadata(file_path) {
15068        Ok(metadata) => metadata,
15069        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(()),
15070        Err(err) => return Err(err.into()),
15071    };
15072    if metadata.is_dir() {
15073        if path_is_original_cwd(file_path) {
15074            return refuse_remove_current_working_directory(file_path);
15075        }
15076        // A directory in the way of a file (D/F transition) or a populated
15077        // gitlink: remove the subtree so the file can be created.
15078        match fs::remove_dir_all(file_path) {
15079            Ok(()) => {}
15080            Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
15081            Err(err) => return Err(err.into()),
15082        }
15083    } else {
15084        fs::remove_file(file_path)?;
15085    }
15086    Ok(())
15087}
15088
15089/// chmod a freshly-materialized worktree blob to match its tree/index entry mode.
15090///
15091/// `fs::write` truncates an existing file *in place*, preserving its prior
15092/// permission bits. For a mode-only diff (identical oid, 100644 vs 100755) that
15093/// leaves the wrong exec bit on disk — which is exactly the `reset --hard` /
15094/// checkout bug this guards against. git's checkout path unlinks+recreates the
15095/// file precisely to "get the new one with the right permissions" (entry.c
15096/// `write_entry`); we instead chmod the just-written file.
15097///
15098/// Mirrors the observable result of git's `create_file` (entry.c):
15099/// `(mode & 0100) ? 0777 : 0666` masked by the standard umask (0022), i.e. 0755
15100/// for an executable entry and 0644 otherwise. Only regular-file entries (100644
15101/// / 100755) are chmod'd; gitlinks and symlinks have no meaningful exec bit.
15102///
15103/// We set the perms directly (rather than relying on a fresh `open(2)` to apply
15104/// the umask) because `fs::write` truncates an existing file in place, leaving its
15105/// old permission bits — the very thing that breaks a mode-only checkout/reset.
15106/// Matching git's default-umask output keeps the worktree byte-for-byte aligned
15107/// with the oracle, which is what the parity suite asserts.
15108#[cfg(unix)]
15109fn set_worktree_file_mode(file_path: &Path, entry_mode: u32) -> Result<()> {
15110    use std::os::unix::fs::PermissionsExt;
15111    let perms = match entry_mode {
15112        0o100755 => 0o755,
15113        0o100644 => 0o644,
15114        _ => return Ok(()),
15115    };
15116    fs::set_permissions(file_path, fs::Permissions::from_mode(perms))?;
15117    Ok(())
15118}
15119
15120#[cfg(not(unix))]
15121fn set_worktree_file_mode(_file_path: &Path, _entry_mode: u32) -> Result<()> {
15122    Ok(())
15123}
15124
15125/// Materialize a tree object into the index and worktree.
15126pub fn checkout_tree_to_index_and_worktree(
15127    worktree_root: impl AsRef<Path>,
15128    git_dir: impl AsRef<Path>,
15129    format: ObjectFormat,
15130    tree_oid: &ObjectId,
15131) -> Result<RestoreResult> {
15132    let worktree_root = worktree_root.as_ref();
15133    let git_dir = git_dir.as_ref();
15134    let db = FileObjectDatabase::from_git_dir(git_dir, format);
15135    let mut target_entries = BTreeMap::new();
15136    collect_tree_entries(&db, format, tree_oid, &mut target_entries)?;
15137
15138    for path in read_index_entries(git_dir, format)?.keys() {
15139        if !target_entries.contains_key(path) {
15140            remove_worktree_file(worktree_root, path)?;
15141        }
15142    }
15143
15144    let mut index_entries = Vec::new();
15145    for (path, entry) in &target_entries {
15146        index_entries.push(materialize_tree_entry(&db, worktree_root, path, entry)?);
15147    }
15148    index_entries.sort_by(|left, right| left.path.cmp(&right.path));
15149    let extensions = preserved_index_extensions(git_dir, format)?;
15150    fs::write(
15151        repository_index_path(git_dir),
15152        Index {
15153            version: 2,
15154            entries: index_entries,
15155            extensions,
15156            checksum: None,
15157        }
15158        .write(format)?,
15159    )?;
15160    Ok(RestoreResult {
15161        restored: target_entries.len(),
15162    })
15163}
15164
15165pub fn reset_index_to_commit(
15166    worktree_root: impl AsRef<Path>,
15167    git_dir: impl AsRef<Path>,
15168    format: ObjectFormat,
15169    commit_oid: &ObjectId,
15170) -> Result<RestoreResult> {
15171    let worktree_root = worktree_root.as_ref();
15172    let git_dir = git_dir.as_ref();
15173    let db = FileObjectDatabase::from_git_dir(git_dir, format);
15174    let commit = read_commit(&db, format, commit_oid)?;
15175    let mut target_entries = BTreeMap::new();
15176    collect_tree_entries(&db, format, &commit.tree, &mut target_entries)?;
15177    // git's `reset --mixed` preserves the skip-worktree bit on entries that survive
15178    // the reset (t7102 "--mixed preserves skip-worktree"): carry it forward from the
15179    // pre-reset index keyed by path, so reconstructed entries keep CE_SKIP_WORKTREE.
15180    let index_path = repository_index_path(git_dir);
15181    let prior_skip_worktree: BTreeSet<Vec<u8>> = match fs::read(&index_path) {
15182        Ok(bytes) => Index::parse(&bytes, format)?
15183            .entries
15184            .iter()
15185            .filter(|entry| entry.is_skip_worktree())
15186            .map(|entry| entry.path.as_bytes().to_vec())
15187            .collect(),
15188        Err(err) if err.kind() == std::io::ErrorKind::NotFound => BTreeSet::new(),
15189        Err(err) => return Err(err.into()),
15190    };
15191    let mut index_entries = Vec::new();
15192    for (path, entry) in &target_entries {
15193        let mut restored = restored_head_index_entry(worktree_root, &db, path, entry)?;
15194        if prior_skip_worktree.contains(path) {
15195            restored.set_skip_worktree(true);
15196        }
15197        index_entries.push(restored);
15198    }
15199    index_entries.sort_by(|left, right| left.path.cmp(&right.path));
15200    let mut index = Index {
15201        version: 2,
15202        entries: index_entries,
15203        extensions: preserved_index_extensions(git_dir, format)?,
15204        checksum: None,
15205    };
15206    index.upgrade_version_for_flags();
15207    write_repository_index_ref(git_dir, format, &index)?;
15208    Ok(RestoreResult {
15209        restored: target_entries.len(),
15210    })
15211}
15212
15213/// Build a fresh in-memory index that mirrors the tree `tree_oid`, the way
15214/// `git read-tree <tree>` does: every blob, symlink, and gitlink leaf (found by
15215/// recursing subtrees) becomes a stage-0 entry carrying the tree mode and oid,
15216/// with a fully zeroed stat (so nothing is treated as stat-clean) and size 0.
15217/// Entries are sorted by path; the index is version 2 with no extensions.
15218///
15219/// This does not touch the worktree or write anything to disk — serialize the
15220/// result with [`Index::write`] (and persist it) when you want to replace
15221/// `.git/index`.
15222pub fn index_from_tree(
15223    db: &FileObjectDatabase,
15224    format: ObjectFormat,
15225    tree_oid: &ObjectId,
15226) -> Result<Index> {
15227    let mut entries: Vec<IndexEntry> = Vec::new();
15228    if *tree_oid != ObjectId::empty_tree(format) {
15229        let mut tree_entries = BTreeMap::new();
15230        collect_tree_entries(db, format, tree_oid, &mut tree_entries)?;
15231        entries.reserve(tree_entries.len());
15232        for (path, entry) in tree_entries {
15233            let name_len = (path.len().min(0x0fff)) as u16;
15234            entries.push(IndexEntry {
15235                ctime_seconds: 0,
15236                ctime_nanoseconds: 0,
15237                mtime_seconds: 0,
15238                mtime_nanoseconds: 0,
15239                dev: 0,
15240                ino: 0,
15241                mode: entry.mode,
15242                uid: 0,
15243                gid: 0,
15244                size: 0,
15245                oid: entry.oid,
15246                flags: name_len,
15247                flags_extended: 0,
15248                path: path.into(),
15249            });
15250        }
15251    }
15252    // git orders index entries by path bytes; BTreeMap already yields that, but
15253    // sort explicitly so the contract holds regardless of how entries arrive.
15254    entries.sort_by(|left, right| left.path.cmp(&right.path));
15255    Ok(Index {
15256        version: 2,
15257        entries,
15258        extensions: Vec::new(),
15259        checksum: None,
15260    })
15261}
15262
15263/// Enforces a [`SparseCheckout`] against the current index and worktree.
15264///
15265/// Every stage-0 index entry is classified with the sparse patterns (see
15266/// [`SparseCheckoutMode`] for the matching semantics):
15267///
15268/// * **In cone**: the skip-worktree bit is cleared and, if the worktree file is
15269///   missing, it is re-materialized from the entry's blob in the object
15270///   database. Existing worktree files are left untouched so local content is
15271///   preserved.
15272/// * **Out of cone**: the skip-worktree bit is set and any existing worktree
15273///   file is removed (empty parent directories are pruned).
15274///
15275/// Returns `true` when `path` is inside the sparse-checkout described by
15276/// `sparse` under the given matching `mode`. This is the engine behind
15277/// `git sparse-checkout check-rules`: a path is "in" the sparse-checkout when
15278/// the compiled matcher would keep its worktree file. Cone and full (gitignore)
15279/// grammars are both handled, exactly as the apply engine interprets them, so
15280/// `check-rules` and `set`/`reapply` agree by construction.
15281pub fn path_in_sparse_checkout(
15282    path: &[u8],
15283    sparse: &SparseCheckout,
15284    mode: SparseCheckoutMode,
15285) -> bool {
15286    SparseMatcher::new(sparse, mode).includes_file(path)
15287}
15288
15289fn active_sparse_checkout(git_dir: &Path) -> Result<Option<(SparseCheckout, SparseCheckoutMode)>> {
15290    let worktree_config = GitConfig::read(git_dir.join("config.worktree")).unwrap_or_default();
15291    let repo_config = GitConfig::read(git_dir.join("config")).unwrap_or_default();
15292    let sparse_enabled = worktree_config
15293        .get_bool("core", None, "sparseCheckout")
15294        .or_else(|| repo_config.get_bool("core", None, "sparseCheckout"))
15295        .unwrap_or(false);
15296    if !sparse_enabled {
15297        return Ok(None);
15298    }
15299    let sparse_file = git_dir.join("info").join("sparse-checkout");
15300    if !sparse_file.exists() {
15301        return Ok(None);
15302    }
15303    let cone = worktree_config
15304        .get_bool("core", None, "sparseCheckoutCone")
15305        .or_else(|| repo_config.get_bool("core", None, "sparseCheckoutCone"))
15306        .unwrap_or(false);
15307    let sparse_index = cone
15308        && worktree_config
15309            .get_bool("index", None, "sparse")
15310            .or_else(|| repo_config.get_bool("index", None, "sparse"))
15311            .unwrap_or(false);
15312    let bytes = fs::read(sparse_file)?;
15313    let mut patterns = bytes
15314        .split(|byte| *byte == b'\n')
15315        .map(<[u8]>::to_vec)
15316        .collect::<Vec<_>>();
15317    if patterns.last().map(Vec::is_empty) == Some(true) {
15318        patterns.pop();
15319    }
15320    let mode = if cone {
15321        SparseCheckoutMode::Cone
15322    } else {
15323        SparseCheckoutMode::Full
15324    };
15325    Ok(Some((
15326        SparseCheckout {
15327            patterns,
15328            sparse_index,
15329        },
15330        mode,
15331    )))
15332}
15333
15334/// Conflicted entries (stage != 0) are never given the skip-worktree bit and
15335/// are left alone, matching upstream Git. The index is rewritten in place.
15336pub fn apply_sparse_checkout(
15337    worktree_root: impl AsRef<Path>,
15338    git_dir: impl AsRef<Path>,
15339    format: ObjectFormat,
15340    sparse: &SparseCheckout,
15341) -> Result<ApplySparseResult> {
15342    apply_sparse_checkout_with_mode(
15343        worktree_root,
15344        git_dir,
15345        format,
15346        sparse,
15347        SparseCheckoutMode::Auto,
15348    )
15349}
15350
15351/// Like [`apply_sparse_checkout`] but lets the caller force the pattern
15352/// interpretation instead of auto-detecting it.
15353pub fn apply_sparse_checkout_with_mode(
15354    worktree_root: impl AsRef<Path>,
15355    git_dir: impl AsRef<Path>,
15356    format: ObjectFormat,
15357    sparse: &SparseCheckout,
15358    mode: SparseCheckoutMode,
15359) -> Result<ApplySparseResult> {
15360    let worktree_root = worktree_root.as_ref();
15361    let git_dir = git_dir.as_ref();
15362    let index_path = repository_index_path(git_dir);
15363    let mut index = if index_path.exists() {
15364        Index::parse(&fs::read(&index_path)?, format)?
15365    } else {
15366        return Ok(ApplySparseResult {
15367            materialized: Vec::new(),
15368            skipped: Vec::new(),
15369            not_up_to_date: Vec::new(),
15370        });
15371    };
15372    let matcher = SparseMatcher::new(sparse, mode);
15373    let db = FileObjectDatabase::from_git_dir(git_dir, format);
15374    // Expand any collapsed sparse-directory entries to a full index before we
15375    // reconcile per-path: the apply loop reasons about individual blob paths, so
15376    // it must never see a sparse-dir entry. (Re-collapse happens at the end when
15377    // a sparse index is requested.)
15378    if index.entries.iter().any(IndexEntry::is_sparse_dir) {
15379        expand_sparse_index(&mut index, &db, format)?;
15380    }
15381    let mut materialized = Vec::new();
15382    let mut skipped = Vec::new();
15383    let mut not_up_to_date = Vec::new();
15384    for entry in &mut index.entries {
15385        // Never touch conflicted entries.
15386        if index_entry_stage(entry) != 0 {
15387            continue;
15388        }
15389        if matcher.includes_file(entry.path.as_bytes()) {
15390            clear_skip_worktree(entry);
15391            let file_path = worktree_path(worktree_root, entry.path.as_bytes())?;
15392            if !file_path.exists() {
15393                materialize_index_entry_file(&db, worktree_root, &file_path, entry)?;
15394                let metadata = fs::symlink_metadata(&file_path)?;
15395                *entry = index_entry_with_refreshed_stat(entry, &metadata);
15396            }
15397            materialized.push(entry.path.as_bytes().to_vec());
15398        } else {
15399            // The path is out of cone, so its worktree file should be removed and
15400            // the entry marked skip-worktree. But git refuses to delete a file
15401            // that is *not up to date* with the index (e.g. one that reappeared in
15402            // the worktree after the path was already sparse): it leaves the file,
15403            // leaves the skip-worktree bit clear, and reports the path in its "not
15404            // up to date" warning. Mirror that to avoid silent data loss.
15405            let file_path = worktree_path(worktree_root, entry.path.as_bytes())?;
15406            match fs::symlink_metadata(&file_path) {
15407                Ok(metadata) if !worktree_entry_is_uptodate(entry, &metadata) => {
15408                    clear_skip_worktree(entry);
15409                    not_up_to_date.push(entry.path.as_bytes().to_vec());
15410                }
15411                _ => {
15412                    set_skip_worktree(entry);
15413                    remove_worktree_file(worktree_root, entry.path.as_bytes())?;
15414                    skipped.push(entry.path.as_bytes().to_vec());
15415                }
15416            }
15417        }
15418    }
15419    not_up_to_date.sort();
15420    normalize_index_version_for_extended_flags(&mut index);
15421    // When a sparse index was requested (cone mode + index.sparse), collapse the
15422    // fully-out-of-cone directories into single sparse-directory entries and
15423    // mark the index with the `sdir` extension. Otherwise ensure the index is
15424    // written full (and any prior `sdir` marker is cleared).
15425    if sparse.sparse_index {
15426        collapse_to_sparse_index(&mut index, &matcher, &db, format)?;
15427    } else {
15428        index.clear_sparse_extension()?;
15429    }
15430    write_repository_index_ref(git_dir, format, &index)?;
15431    Ok(ApplySparseResult {
15432        materialized,
15433        skipped,
15434        not_up_to_date,
15435    })
15436}
15437
15438/// Expands every sparse-directory entry in `index` back into the full set of
15439/// blob (and nested-directory) entries it collapses, reading each directory's
15440/// tree from `db`. After this the index contains no sparse-directory entries and
15441/// carries no `sdir` marker — it is a full index that any per-path command can
15442/// operate on without sparse-index awareness.
15443///
15444/// This is the **close-the-class** primitive: a command never needs to special-
15445/// case a sparse index, because the moment it loads the index it expands to the
15446/// full form. The collapsed shape is purely an on-disk storage optimization.
15447pub fn expand_sparse_index(
15448    index: &mut Index,
15449    db: &FileObjectDatabase,
15450    format: ObjectFormat,
15451) -> Result<bool> {
15452    if !index.entries.iter().any(IndexEntry::is_sparse_dir) {
15453        // Still strip a stray `sdir` marker so the written index is recorded full.
15454        let had_marker = index.is_sparse();
15455        index.clear_sparse_extension()?;
15456        if had_marker {
15457            sley_core::trace2::region("index", "ensure_full_index");
15458        }
15459        return Ok(had_marker);
15460    }
15461    let mut expanded: Vec<IndexEntry> = Vec::with_capacity(index.entries.len());
15462    for entry in std::mem::take(&mut index.entries) {
15463        if !entry.is_sparse_dir() {
15464            expanded.push(entry);
15465            continue;
15466        }
15467        // The sparse-dir path ends in `/`; its OID is the directory's tree.
15468        let dir = entry.path.as_bytes();
15469        let dir_prefix = dir; // includes the trailing slash
15470        for (rel, (mode, oid)) in sley_diff_merge::flatten_tree(db, format, &entry.oid)? {
15471            let mut full_path = dir_prefix.to_vec();
15472            full_path.extend_from_slice(&rel);
15473            let mut blob = blank_sparse_blob_entry(format, &full_path, mode, oid);
15474            // Re-collapsed entries are skip-worktree (they live outside the cone).
15475            blob.set_skip_worktree(true);
15476            expanded.push(blob);
15477        }
15478    }
15479    expanded.sort_by(|a, b| a.path.as_bytes().cmp(b.path.as_bytes()));
15480    index.entries = expanded;
15481    index.clear_sparse_extension()?;
15482    normalize_index_version_for_extended_flags(index);
15483    sley_core::trace2::region("index", "ensure_full_index");
15484    Ok(true)
15485}
15486
15487fn index_sparse_dir_contains_path(index: &Index, git_path: &[u8]) -> bool {
15488    index.entries.iter().any(|entry| {
15489        entry.is_sparse_dir()
15490            && git_path.starts_with(entry.path.as_bytes())
15491            && git_path.len() > entry.path.len()
15492    })
15493}
15494
15495/// Builds a minimal index entry for an expanded sparse blob: zeroed stat fields
15496/// (the file is not in the worktree), the given mode/oid, and a fresh name
15497/// length. Stat fields are zero because a skip-worktree file has no on-disk
15498/// presence to record.
15499fn blank_sparse_blob_entry(
15500    format: ObjectFormat,
15501    path: &[u8],
15502    mode: u32,
15503    oid: ObjectId,
15504) -> IndexEntry {
15505    let _ = format;
15506    let mut entry = IndexEntry {
15507        ctime_seconds: 0,
15508        ctime_nanoseconds: 0,
15509        mtime_seconds: 0,
15510        mtime_nanoseconds: 0,
15511        dev: 0,
15512        ino: 0,
15513        mode,
15514        uid: 0,
15515        gid: 0,
15516        size: 0,
15517        oid,
15518        flags: 0,
15519        flags_extended: 0,
15520        path: path.into(),
15521    };
15522    entry.refresh_name_length();
15523    entry
15524}
15525
15526/// Collapses fully-out-of-cone directories in `index` into single sparse-
15527/// directory entries (mode `040000`, skip-worktree, the directory tree's OID),
15528/// then marks the index with the `sdir` extension. A directory is collapsible
15529/// when *every* entry under it is skip-worktree and stage 0 — i.e. nothing in it
15530/// is in the cone or conflicted. The shallowest such directory subsumes deeper
15531/// ones, matching git's `convert_to_sparse` cache-tree walk.
15532fn collapse_to_sparse_index(
15533    index: &mut Index,
15534    matcher: &SparseMatcher,
15535    db: &FileObjectDatabase,
15536    format: ObjectFormat,
15537) -> Result<()> {
15538    // First expand any pre-existing sparse-dir entries so the collapse decision
15539    // sees a uniform full index (idempotent re-collapse).
15540    if index.entries.iter().any(IndexEntry::is_sparse_dir) {
15541        expand_sparse_index(index, db, format)?;
15542    }
15543
15544    // Any unmerged (stage != 0) entry forbids a sparse index entirely (the cache
15545    // tree cannot be built), so stay full — matching git's bail.
15546    if index.entries.iter().any(|e| index_entry_stage(e) != 0) {
15547        index.clear_sparse_extension()?;
15548        return Ok(());
15549    }
15550
15551    index
15552        .entries
15553        .sort_by(|a, b| a.path.as_bytes().cmp(b.path.as_bytes()));
15554
15555    // Determine, for every directory prefix, whether it contains any in-cone
15556    // path. A directory with no in-cone descendant is collapsible.
15557    use std::collections::BTreeMap;
15558    let mut dir_has_in_cone: BTreeMap<Vec<u8>, bool> = BTreeMap::new();
15559    for entry in &index.entries {
15560        let path = entry.path.as_bytes();
15561        let in_cone = matcher.includes_file(path);
15562        let mut start = 0usize;
15563        while let Some(rel) = path
15564            .get(start..)
15565            .and_then(|s| s.iter().position(|b| *b == b'/'))
15566        {
15567            let end = start + rel;
15568            let dir = path[..end].to_vec();
15569            let flag = dir_has_in_cone.entry(dir).or_insert(false);
15570            *flag = *flag || in_cone;
15571            start = end + 1;
15572        }
15573    }
15574
15575    // The collapsible directories are those with no in-cone descendant; keep only
15576    // the shallowest (a directory whose ancestor is also collapsible is subsumed).
15577    let collapsible: Vec<Vec<u8>> = {
15578        let all: Vec<Vec<u8>> = dir_has_in_cone
15579            .iter()
15580            .filter(|(_, has)| !**has)
15581            .map(|(dir, _)| dir.clone())
15582            .collect();
15583        all.iter()
15584            .filter(|dir| {
15585                !all.iter().any(|other| {
15586                    other != *dir
15587                        && dir
15588                            .strip_prefix(other.as_slice())
15589                            .is_some_and(|rest| rest.first() == Some(&b'/'))
15590                })
15591            })
15592            .cloned()
15593            .collect()
15594    };
15595    if collapsible.is_empty() {
15596        index.clear_sparse_extension()?;
15597        return Ok(());
15598    }
15599
15600    let mut checker = db.presence_checker();
15601    let mut new_entries: Vec<IndexEntry> = Vec::with_capacity(index.entries.len());
15602    let mut consumed: std::collections::HashSet<Vec<u8>> = std::collections::HashSet::new();
15603    for dir in &collapsible {
15604        // Gather the entries that live strictly under this directory.
15605        let mut subtree: Vec<&IndexEntry> = index
15606            .entries
15607            .iter()
15608            .filter(|e| {
15609                e.path
15610                    .as_bytes()
15611                    .strip_prefix(dir.as_slice())
15612                    .is_some_and(|rest| rest.first() == Some(&b'/'))
15613            })
15614            .collect();
15615        if subtree.is_empty() {
15616            continue;
15617        }
15618        subtree.sort_by(|a, b| a.path.as_bytes().cmp(b.path.as_bytes()));
15619        // Build the subtree object and capture its OID.
15620        let mut prefix = dir.clone();
15621        prefix.push(b'/');
15622        let tree_entries: Vec<WriteTreeEntry<'_>> = subtree
15623            .iter()
15624            .map(|e| WriteTreeEntry {
15625                path: e.path.as_bytes(),
15626                mode: e.mode,
15627                oid: e.oid.clone(),
15628            })
15629            .collect();
15630        let tree_oid =
15631            write_tree_entries_stream(&tree_entries, &prefix, None, db, &mut checker, false)?;
15632        // Mark every consumed path so the second pass drops them.
15633        for e in &subtree {
15634            consumed.insert(e.path.as_bytes().to_vec());
15635        }
15636        // The sparse-dir entry's name is the directory path WITH a trailing slash.
15637        let mut sparse_path = dir.clone();
15638        sparse_path.push(b'/');
15639        let mut sparse_entry =
15640            blank_sparse_blob_entry(format, &sparse_path, SPARSE_DIR_MODE, tree_oid);
15641        sparse_entry.set_skip_worktree(true);
15642        new_entries.push(sparse_entry);
15643    }
15644    // Carry forward every entry that was not collapsed.
15645    for entry in &index.entries {
15646        if consumed.contains(entry.path.as_bytes()) {
15647            continue;
15648        }
15649        new_entries.push(entry.clone());
15650    }
15651    new_entries.sort_by(|a, b| a.path.as_bytes().cmp(b.path.as_bytes()));
15652    index.entries = new_entries;
15653    index.set_sparse_extension();
15654    normalize_index_version_for_extended_flags(index);
15655    sley_core::trace2::region("index", "convert_to_sparse");
15656    Ok(())
15657}
15658
15659/// Whether the worktree file described by `metadata` is up to date with `entry`'s
15660/// cached index stat, using the size + mtime heuristic at the core of git's
15661/// `ie_match_stat`. A freshly-checked-out (clean) file matches; a file that was
15662/// deleted and later recreated — as happens when an out-of-cone path reappears in
15663/// the worktree — gets a fresh mtime and so reads as modified, which is exactly
15664/// the state git declines to overwrite during a sparse update.
15665fn worktree_entry_is_uptodate(entry: &IndexEntry, metadata: &fs::Metadata) -> bool {
15666    if u64::from(entry.size) != metadata.len() {
15667        return false;
15668    }
15669    let Some((mtime_seconds, mtime_nanoseconds)) = file_mtime_parts(metadata) else {
15670        // Without a usable mtime we cannot prove the file is clean; treat it as
15671        // not up to date so a present file is never silently discarded.
15672        return false;
15673    };
15674    u64::from(entry.mtime_seconds) == mtime_seconds
15675        && u64::from(entry.mtime_nanoseconds) == mtime_nanoseconds
15676}
15677
15678fn worktree_entry_ref_is_uptodate(entry: &IndexEntryRef<'_>, metadata: &fs::Metadata) -> bool {
15679    if u64::from(entry.size) != metadata.len() {
15680        return false;
15681    }
15682    let Some((mtime_seconds, mtime_nanoseconds)) = file_mtime_parts(metadata) else {
15683        return false;
15684    };
15685    u64::from(entry.mtime_seconds) == mtime_seconds
15686        && u64::from(entry.mtime_nanoseconds) == mtime_nanoseconds
15687}
15688
15689/// The file's modification time split into whole seconds and the sub-second
15690/// nanosecond remainder, matching how git stores `mtime` in the index.
15691fn file_mtime_parts(metadata: &fs::Metadata) -> Option<(u64, u64)> {
15692    let modified = metadata.modified().ok()?;
15693    let duration = modified.duration_since(UNIX_EPOCH).ok()?;
15694    Some((duration.as_secs(), u64::from(duration.subsec_nanos())))
15695}
15696
15697/// Write a git metadata file through a sibling `.lock` file and atomic rename.
15698///
15699/// This helper is intended for small repository/worktree metadata files such as
15700/// `HEAD`, `config.worktree`, or state files under `.git/`. It deliberately does
15701/// not try to replace object or pack writers, which have their own durability
15702/// and naming rules.
15703pub fn write_metadata_file_atomic(
15704    path: impl AsRef<Path>,
15705    bytes: &[u8],
15706    options: AtomicMetadataWriteOptions,
15707) -> Result<AtomicMetadataWriteResult> {
15708    let path = path.as_ref();
15709    let parent = path.parent().ok_or_else(|| {
15710        GitError::InvalidPath(format!("metadata path has no parent: {}", path.display()))
15711    })?;
15712    if !parent.as_os_str().is_empty() {
15713        fs::create_dir_all(parent)?;
15714    }
15715    let lock_path = metadata_lock_path(path)?;
15716    let mut lock = match fs::OpenOptions::new()
15717        .write(true)
15718        .create_new(true)
15719        .open(&lock_path)
15720    {
15721        Ok(lock) => lock,
15722        Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => {
15723            return Err(GitError::Transaction(format!(
15724                "metadata lock already exists: {}",
15725                lock_path.display()
15726            )));
15727        }
15728        Err(err) => return Err(err.into()),
15729    };
15730    if let Err(err) = lock.write_all(bytes) {
15731        let _ = fs::remove_file(&lock_path);
15732        return Err(err.into());
15733    }
15734    if options.fsync_file
15735        && let Err(err) = lock.sync_all()
15736    {
15737        let _ = fs::remove_file(&lock_path);
15738        return Err(err.into());
15739    }
15740    drop(lock);
15741    if let Err(err) = fs::rename(&lock_path, path) {
15742        let _ = fs::remove_file(&lock_path);
15743        return Err(err.into());
15744    }
15745    if options.fsync_dir
15746        && let Ok(dir) = fs::File::open(parent)
15747    {
15748        dir.sync_all()?;
15749    }
15750    let metadata = fs::metadata(path)?;
15751    Ok(AtomicMetadataWriteResult {
15752        path: path.to_path_buf(),
15753        len: metadata.len(),
15754        mtime: file_mtime_parts(&metadata),
15755    })
15756}
15757
15758fn metadata_lock_path(path: &Path) -> Result<PathBuf> {
15759    let file_name = path.file_name().ok_or_else(|| {
15760        GitError::InvalidPath(format!("metadata path has no filename: {}", path.display()))
15761    })?;
15762    let mut lock_name = file_name.to_os_string();
15763    lock_name.push(".lock");
15764    Ok(path.with_file_name(lock_name))
15765}
15766
15767/// Checks out `target` like [`checkout_detached`], but materializes the
15768/// worktree through the supplied [`SparseCheckout`]: out-of-cone paths are not
15769/// written, get their skip-worktree bit set, and have any stale worktree file
15770/// removed. Existing public checkout entry points are unchanged; this is an
15771/// additive sparse-aware variant.
15772///
15773/// The pattern interpretation is auto-detected ([`SparseCheckoutMode::Auto`]);
15774/// to reconcile an existing checkout under an explicit mode use
15775/// [`apply_sparse_checkout_with_mode`].
15776pub fn checkout_detached_sparse(
15777    worktree_root: impl AsRef<Path>,
15778    git_dir: impl AsRef<Path>,
15779    format: ObjectFormat,
15780    target: &ObjectId,
15781    committer: Vec<u8>,
15782    message: Vec<u8>,
15783    sparse: &SparseCheckout,
15784) -> Result<CheckoutResult> {
15785    let worktree_root = worktree_root.as_ref();
15786    let git_dir = git_dir.as_ref();
15787    let files = checkout_commit_to_index_and_worktree_sparse(
15788        worktree_root,
15789        git_dir,
15790        format,
15791        target,
15792        Some((sparse, SparseCheckoutMode::Auto)),
15793        None,
15794        None,
15795    )?;
15796    let refs = FileRefStore::new(git_dir, format);
15797    let zero = ObjectId::null(format);
15798    let mut tx = refs.transaction();
15799    tx.update(RefUpdate {
15800        name: "HEAD".into(),
15801        expected: None,
15802        new: RefTarget::Direct(*target),
15803        reflog: Some(ReflogEntry {
15804            old_oid: zero,
15805            new_oid: *target,
15806            committer,
15807            message,
15808        }),
15809    });
15810    tx.commit()?;
15811    Ok(CheckoutResult {
15812        branch: target.to_string(),
15813        oid: *target,
15814        files,
15815    })
15816}
15817
15818fn materialize_index_entry_file(
15819    db: &FileObjectDatabase,
15820    worktree_root: &Path,
15821    file_path: &Path,
15822    entry: &IndexEntry,
15823) -> Result<()> {
15824    // A gitlink (mode 160000) has no blob in this object store and materializes
15825    // as a directory (git's `write_entry` S_IFGITLINK arm: mkdir, never read an
15826    // object). Single gitlink rule via `sley_index::is_gitlink`; without it a
15827    // sparse re-materialization of a submodule path would fail with "not found:
15828    // blob object <commit-oid>".
15829    if sley_index::is_gitlink(entry.mode) {
15830        materialize_gitlink_dir(worktree_root, file_path)?;
15831        return Ok(());
15832    }
15833    let object = read_expected_object(db, &entry.oid, ObjectType::Blob)?;
15834    prepare_blob_parent_dirs(worktree_root, file_path)?;
15835    remove_existing_worktree_path(file_path)?;
15836    fs::write(file_path, &object.body)?;
15837    set_worktree_file_mode(file_path, entry.mode)?;
15838    Ok(())
15839}
15840
15841fn set_skip_worktree(entry: &mut IndexEntry) {
15842    entry.flags |= INDEX_FLAG_EXTENDED;
15843    entry.flags_extended |= INDEX_EXTENDED_FLAG_SKIP_WORKTREE;
15844}
15845
15846fn clear_skip_worktree(entry: &mut IndexEntry) {
15847    entry.flags_extended &= !INDEX_EXTENDED_FLAG_SKIP_WORKTREE;
15848    if entry.flags_extended == 0 {
15849        entry.flags &= !INDEX_FLAG_EXTENDED;
15850    }
15851}
15852
15853pub fn restore_worktree_paths_from_head(
15854    worktree_root: impl AsRef<Path>,
15855    git_dir: impl AsRef<Path>,
15856    format: ObjectFormat,
15857    paths: &[PathBuf],
15858) -> Result<RestoreResult> {
15859    let worktree_root = worktree_root.as_ref();
15860    let git_dir = git_dir.as_ref();
15861    let index_path = repository_index_path(git_dir);
15862    let index = if index_path.exists() {
15863        Index::parse(&fs::read(&index_path)?, format)?
15864    } else {
15865        Index {
15866            version: 2,
15867            entries: Vec::new(),
15868            extensions: Vec::new(),
15869            checksum: None,
15870        }
15871    };
15872    let db = FileObjectDatabase::from_git_dir(git_dir, format);
15873    let head_entries = head_tree_entries(git_dir, format, &db)?;
15874    restore_worktree_paths_from_entries(worktree_root, &db, index, &head_entries, paths)
15875}
15876
15877pub fn restore_worktree_paths_from_tree(
15878    worktree_root: impl AsRef<Path>,
15879    git_dir: impl AsRef<Path>,
15880    format: ObjectFormat,
15881    tree_oid: &ObjectId,
15882    paths: &[PathBuf],
15883) -> Result<RestoreResult> {
15884    let worktree_root = worktree_root.as_ref();
15885    let git_dir = git_dir.as_ref();
15886    let index_path = repository_index_path(git_dir);
15887    let index = if index_path.exists() {
15888        Index::parse(&fs::read(&index_path)?, format)?
15889    } else {
15890        Index {
15891            version: 2,
15892            entries: Vec::new(),
15893            extensions: Vec::new(),
15894            checksum: None,
15895        }
15896    };
15897    let db = FileObjectDatabase::from_git_dir(git_dir, format);
15898    let source_entries = tree_entries(&db, format, tree_oid)?;
15899    restore_worktree_paths_from_entries(worktree_root, &db, index, &source_entries, paths)
15900}
15901
15902fn restore_worktree_paths_from_entries(
15903    worktree_root: &Path,
15904    db: &FileObjectDatabase,
15905    index: Index,
15906    source_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
15907    paths: &[PathBuf],
15908) -> Result<RestoreResult> {
15909    let index_entries = index
15910        .entries
15911        .into_iter()
15912        .map(|entry| entry.path.into_bytes())
15913        .collect::<BTreeSet<_>>();
15914    let mut restored = BTreeSet::new();
15915    for path in paths {
15916        let absolute = if path.is_absolute() {
15917            path.clone()
15918        } else {
15919            worktree_root.join(path)
15920        };
15921        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
15922            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
15923        })?;
15924        let git_path = git_path_bytes(relative)?;
15925        let recursive = path == Path::new(".")
15926            || path.to_string_lossy().ends_with('/')
15927            || absolute.is_dir()
15928            || index_entries
15929                .iter()
15930                .any(|entry| index_entry_is_under_path(entry, &git_path))
15931            || source_entries
15932                .keys()
15933                .any(|entry| index_entry_is_under_path(entry, &git_path));
15934        let mut matched_paths = BTreeSet::new();
15935        for path in index_entries.iter().chain(source_entries.keys()) {
15936            if path.as_slice() == git_path.as_slice()
15937                || (recursive && index_entry_is_under_path(path, &git_path))
15938            {
15939                matched_paths.insert(path.clone());
15940            }
15941        }
15942        if matched_paths.is_empty() {
15943            eprintln!(
15944                "error: pathspec '{}' did not match any file(s) known to git",
15945                path.display()
15946            );
15947            return Err(GitError::Exit(1));
15948        }
15949        for path in matched_paths {
15950            if let Some(entry) = source_entries.get(&path) {
15951                restore_head_entry_to_worktree(worktree_root, db, &path, entry)?;
15952            } else {
15953                remove_worktree_file(worktree_root, &path)?;
15954            }
15955            restored.insert(path);
15956        }
15957    }
15958    Ok(RestoreResult {
15959        restored: restored.len(),
15960    })
15961}
15962
15963pub fn remove_index_and_worktree_paths(
15964    worktree_root: impl AsRef<Path>,
15965    git_dir: impl AsRef<Path>,
15966    format: ObjectFormat,
15967    paths: &[PathBuf],
15968    options: RemoveOptions,
15969    config_parameters_env: Option<&str>,
15970) -> Result<RemoveResult> {
15971    let cwd = env::current_dir()?;
15972    let worktree_root = absolute_path_lexically(worktree_root.as_ref(), &cwd);
15973    let git_dir = absolute_path_lexically(git_dir.as_ref(), &cwd);
15974    let worktree_root = worktree_root.as_path();
15975    let git_dir = git_dir.as_path();
15976    let index_path = repository_index_path(git_dir);
15977    let index = if index_path.exists() {
15978        Index::parse(&fs::read(&index_path)?, format)?
15979    } else {
15980        Index {
15981            version: 2,
15982            entries: Vec::new(),
15983            extensions: Vec::new(),
15984            checksum: None,
15985        }
15986    };
15987    let db = FileObjectDatabase::from_git_dir(git_dir, format);
15988    let head_entries = head_tree_entries(git_dir, format, &db)?;
15989    // Stat cache for the local-modification check (git's `ie_match_stat`):
15990    // proves a path unchanged from the cached stat without reading its blob, so
15991    // a `git rm --cached` of an untouched path whose blob was removed still
15992    // succeeds (cf. t1450-fsck cell 90). (`sley_index::IndexStatCache` is a
15993    // distinct type from this crate's same-named probe helper above.)
15994    let rm_stat_cache = sley_index::IndexStatCache::from_index(&index, &index_path);
15995    let Index {
15996        version: index_version,
15997        entries: mut index_entry_list,
15998        extensions: index_extensions,
15999        ..
16000    } = index;
16001    // The set of distinct index paths (any stage) — used for membership tests.
16002    let index_paths: BTreeSet<Vec<u8>> = index_entry_list
16003        .iter()
16004        .map(|entry| entry.path.as_bytes().to_vec())
16005        .collect();
16006    let sparse_dir_paths: BTreeSet<Vec<u8>> = index_entry_list
16007        .iter()
16008        .filter(|entry| entry.is_sparse_dir())
16009        .map(|entry| entry.path.as_bytes().to_vec())
16010        .collect();
16011    // Paths tracked as a gitlink (mode 160000) at stage 0. Removing one of these
16012    // from the worktree is a *submodule* removal: git's builtin/rm.c flags the
16013    // entry `is_submodule = S_ISGITLINK(ce->ce_mode)` and removes the populated
16014    // submodule *directory* via `remove_dir_recursively` rather than `unlink`,
16015    // which would fail with EISDIR ("Is a directory") on the submodule checkout.
16016    // That EISDIR is exactly the gate that blocked the t1013/t7112/t6438/t2013
16017    // submodule setups. Use the single `sley_index::is_gitlink` rule — no new
16018    // predicate. (Unmerged gitlinks have no stage-0 entry and are not submodule
16019    // removals here, matching git, which keys `is_submodule` off the matched ce.)
16020    let stage0_gitlink_paths: BTreeSet<Vec<u8>> = index_entry_list
16021        .iter()
16022        .filter(|entry| entry.stage() == Stage::Normal && sley_index::is_gitlink(entry.mode))
16023        .map(|entry| entry.path.as_bytes().to_vec())
16024        .collect();
16025    let gitlink_paths: BTreeSet<Vec<u8>> = index_entry_list
16026        .iter()
16027        .filter(|entry| sley_index::is_gitlink(entry.mode))
16028        .map(|entry| entry.path.as_bytes().to_vec())
16029        .collect();
16030    let gitlink_oids_by_path: BTreeMap<Vec<u8>, BTreeSet<ObjectId>> = {
16031        let mut by_path: BTreeMap<Vec<u8>, BTreeSet<ObjectId>> = BTreeMap::new();
16032        for entry in index_entry_list
16033            .iter()
16034            .filter(|entry| sley_index::is_gitlink(entry.mode))
16035        {
16036            by_path
16037                .entry(entry.path.as_bytes().to_vec())
16038                .or_default()
16039                .insert(entry.oid);
16040        }
16041        by_path
16042    };
16043    // Paths selected for removal. A single selected path removes ALL of its
16044    // stage entries (so resolving an unmerged path by removal drops stages
16045    // 1/2/3 together), matching git's name-keyed removal.
16046    let mut selected = BTreeSet::new();
16047    for path in paths {
16048        let absolute = if path.is_absolute() {
16049            path.clone()
16050        } else {
16051            worktree_root.join(path)
16052        };
16053        // Capture a directory-only pathspec before lexical normalization drops
16054        // the trailing separator.
16055        let has_trailing_slash = path_has_trailing_separator(&absolute);
16056        let absolute = normalize_absolute_path_lexically(&absolute);
16057        let relative = absolute.strip_prefix(worktree_root).map_err(|_| {
16058            GitError::InvalidPath(format!("path {} is outside worktree", path.display()))
16059        })?;
16060        // A pathspec with a trailing slash (e.g. `git rm dir/`) only matches a
16061        // directory: it must never match a same-named tracked file.
16062        let git_path = git_path_bytes(relative)?;
16063        if !has_trailing_slash && index_paths.contains(&git_path) {
16064            selected.insert(git_path);
16065            continue;
16066        }
16067        if has_trailing_slash && gitlink_paths.contains(&git_path) && absolute.is_dir() {
16068            selected.insert(git_path);
16069            continue;
16070        }
16071        // A wildcard pathspec (e.g. `git rm "*"` or `git rm "dir/*.c"`) matches
16072        // index entries by git's pathspec matcher rather than by literal path or
16073        // directory prefix. Try the glob match first when the spec contains
16074        // wildcard metacharacters; a glob match removes the entries directly
16075        // (no `-r` needed — the pathspec already names the files).
16076        if pathspec_is_glob(&git_path) {
16077            let glob_matched = index_paths
16078                .iter()
16079                .filter(|entry| {
16080                    pathspec_item_matches(&git_path, entry, PathspecMatchMagic::default())
16081                })
16082                .cloned()
16083                .collect::<Vec<_>>();
16084            if !glob_matched.is_empty() {
16085                selected.extend(glob_matched);
16086                continue;
16087            }
16088            if options.ignore_unmatch {
16089                continue;
16090            }
16091            eprintln!(
16092                "fatal: pathspec '{}' did not match any files",
16093                String::from_utf8_lossy(&git_path)
16094            );
16095            return Err(GitError::Exit(128));
16096        }
16097        let matched = index_paths
16098            .iter()
16099            .filter(|entry| {
16100                !sparse_dir_paths.contains(*entry) && index_entry_is_under_path(entry, &git_path)
16101            })
16102            .cloned()
16103            .collect::<Vec<_>>();
16104        if matched.is_empty() {
16105            if options.ignore_unmatch {
16106                continue;
16107            }
16108            eprintln!(
16109                "fatal: pathspec '{}' did not match any files",
16110                String::from_utf8_lossy(&git_path)
16111            );
16112            return Err(GitError::Exit(128));
16113        }
16114        if !options.recursive {
16115            eprintln!(
16116                "fatal: not removing '{}' recursively without -r",
16117                String::from_utf8_lossy(&git_path)
16118            );
16119            return Err(GitError::Exit(128));
16120        }
16121        selected.extend(matched);
16122    }
16123
16124    // `git rm` runs the local-modification safety check unless `-f` is given —
16125    // even for `--cached`. The check (a faithful port of builtin/rm.c's
16126    // `check_local_mod`) buckets each selected path into one of three error
16127    // classes and prints all of them at once (collected, not fail-fast), so a
16128    // single `git rm a b c` reports every offending path. See the message
16129    // assertions in t3600-rm.sh.
16130    if !options.force {
16131        let config =
16132            sley_config::read_repo_config(git_dir, config_parameters_env).unwrap_or_default();
16133        // advice.rmhints (default true) gates the parenthetical "(use ...)" hints.
16134        let show_hints = config.get_bool("advice", None, "rmhints").unwrap_or(true);
16135        // Map each selected path to its stage-0 index entry for the check; an
16136        // unmerged path (no stage 0) is skipped, exactly like git's loop
16137        // (index_name_pos fails, and a non-gitlink ours entry `continue`s).
16138        let stage0: BTreeMap<&[u8], &IndexEntry> = index_entry_list
16139            .iter()
16140            .filter(|entry| entry.stage() == Stage::Normal)
16141            .map(|entry| (entry.path.as_bytes(), entry))
16142            .collect();
16143        let mut files_staged: Vec<&[u8]> = Vec::new();
16144        let mut files_cached: Vec<&[u8]> = Vec::new();
16145        let mut files_local: Vec<&[u8]> = Vec::new();
16146        for path in &selected {
16147            let Some(index_entry) = stage0.get(path.as_slice()) else {
16148                // Unmerged ordinary paths are safe to resolve by removal. An
16149                // unmerged gitlink still needs submodule dirt checks because
16150                // removing its worktree can discard nested changes.
16151                if !gitlink_paths.contains(path) {
16152                    continue;
16153                }
16154                if rm_submodule_has_local_changes(
16155                    worktree_root,
16156                    format,
16157                    path,
16158                    gitlink_oids_by_path.get(path),
16159                ) {
16160                    files_local.push(path);
16161                }
16162                continue;
16163            };
16164            let worktree_file = worktree_path(worktree_root, path)?;
16165            // Is the worktree path different from the index?
16166            //
16167            // Mirror builtin/rm.c's `check_local_mod`: when `lstat` fails with a
16168            // "missing file" error (ENOENT *or* ENOTDIR — the path vanished, or a
16169            // leading component became a file) the file has already gone from the
16170            // working tree, so git `continue`s and never buckets the path. Same
16171            // for a tracked plain path that is now a directory on disk: git
16172            // treats that as ENOENT and skips it (the later worktree-removal step
16173            // is what fails on a non-empty directory).
16174            let local_changes = if sley_index::is_gitlink(index_entry.mode) {
16175                rm_submodule_has_local_changes(
16176                    worktree_root,
16177                    format,
16178                    path,
16179                    gitlink_oids_by_path.get(path),
16180                )
16181            } else {
16182                match fs::symlink_metadata(&worktree_file) {
16183                    Err(err)
16184                        if matches!(
16185                            err.kind(),
16186                            std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
16187                        ) || err.raw_os_error() == Some(20) =>
16188                    {
16189                        // ENOENT/ENOTDIR: already gone — not warning-worthy.
16190                        continue;
16191                    }
16192                    Err(err) => return Err(err.into()),
16193                    Ok(meta) if meta.is_dir() => continue,
16194                    Ok(meta) => {
16195                        // git refreshes the index before `check_local_mod`, so a path
16196                        // whose stat changed but whose content is unchanged is up to
16197                        // date. We mirror that: a clean cached stat short-circuits to
16198                        // "unchanged"; otherwise re-hash the (clean-filtered) worktree
16199                        // content and compare to the index entry's *cached oid* (git's
16200                        // refresh `hash_object`), NOT the stored blob. Comparing to the
16201                        // oid — not the blob bytes — means a removed object does not
16202                        // abort the check (the worktree may still hash to the cached
16203                        // oid), so `git rm --cached` of a path whose blob was deleted
16204                        // still succeeds.
16205                        match rm_stat_cache.index_entry_worktree_stat_verdict(index_entry, &meta) {
16206                            sley_index::StatVerdict::Clean => false,
16207                            sley_index::StatVerdict::Dirty
16208                            | sley_index::StatVerdict::RacyNeedsContentCheck => {
16209                                let worktree_bytes = apply_clean_filter(
16210                                    worktree_root,
16211                                    git_dir,
16212                                    &config,
16213                                    path,
16214                                    &fs::read(&worktree_file)?,
16215                                )?;
16216                                let worktree_oid =
16217                                    EncodedObject::new(ObjectType::Blob, worktree_bytes)
16218                                        .object_id(format)?;
16219                                worktree_oid != index_entry.oid
16220                            }
16221                        }
16222                    }
16223                }
16224            };
16225            // Is the index different from the HEAD commit? (Before the first
16226            // commit, anything staged is treated as changed from HEAD.)
16227            let staged_changes = match head_entries.get(path) {
16228                Some(head_entry) => {
16229                    head_entry.oid != index_entry.oid || head_entry.mode != index_entry.mode
16230                }
16231                None => true,
16232            };
16233            if local_changes && staged_changes {
16234                // `git rm --cached` of an intent-to-add entry is safe.
16235                if !options.cached || !index_entry.is_intent_to_add() {
16236                    files_staged.push(path);
16237                }
16238            } else if !options.cached {
16239                if staged_changes {
16240                    files_cached.push(path);
16241                }
16242                if local_changes {
16243                    files_local.push(path);
16244                }
16245            }
16246        }
16247        let mut errs = false;
16248        print_rm_error_files(
16249            &files_staged,
16250            "the following file has staged content different from both the\nfile and the HEAD:",
16251            "the following files have staged content different from both the\nfile and the HEAD:",
16252            "\n(use -f to force removal)",
16253            show_hints,
16254            &mut errs,
16255        );
16256        print_rm_error_files(
16257            &files_cached,
16258            "the following file has changes staged in the index:",
16259            "the following files have changes staged in the index:",
16260            "\n(use --cached to keep the file, or -f to force removal)",
16261            show_hints,
16262            &mut errs,
16263        );
16264        print_rm_error_files(
16265            &files_local,
16266            "the following file has local modifications:",
16267            "the following files have local modifications:",
16268            "\n(use --cached to keep the file, or -f to force removal)",
16269            show_hints,
16270            &mut errs,
16271        );
16272        if errs {
16273            return Err(GitError::Exit(1));
16274        }
16275    }
16276
16277    if options.dry_run {
16278        return Ok(RemoveResult {
16279            removed: selected.into_iter().collect(),
16280        });
16281    }
16282    let selected_gitlinks = selected
16283        .iter()
16284        .filter(|path| gitlink_paths.contains(*path))
16285        .cloned()
16286        .collect::<Vec<_>>();
16287    if !options.cached
16288        && !selected_gitlinks.is_empty()
16289        && !selected.contains(b".gitmodules".as_slice())
16290    {
16291        ensure_gitmodules_clean_for_submodule_rm(
16292            worktree_root,
16293            git_dir,
16294            format,
16295            &index_entry_list,
16296            &selected_gitlinks,
16297            &config_parameters_env,
16298        )?;
16299    }
16300    // Mirror builtin/rm.c's ordering: remove the worktree files BEFORE writing
16301    // the new index. If the very first removal fails (and nothing has been
16302    // removed yet), abort without committing the index, so a `git rm d` where
16303    // `d` is now a non-empty directory fails AND leaves the index untouched.
16304    // Once any file has been removed we commit to finishing (git does the same).
16305    if !options.cached {
16306        let mut removed_any = false;
16307        for path in &selected {
16308            let is_gitlink = gitlink_paths.contains(path);
16309            let is_stage0_gitlink = stage0_gitlink_paths.contains(path);
16310            match remove_tracked_worktree_path(
16311                worktree_root,
16312                path,
16313                is_gitlink,
16314                is_stage0_gitlink,
16315                options.force,
16316            )?
16317            {
16318                true => removed_any = true,
16319                false if !removed_any => {
16320                    eprintln!(
16321                        "fatal: git rm: '{}': Is a directory",
16322                        String::from_utf8_lossy(path)
16323                    );
16324                    return Err(GitError::Exit(128));
16325                }
16326                false => {}
16327            }
16328        }
16329    }
16330    if !options.cached
16331        && !selected_gitlinks.is_empty()
16332        && !selected.contains(b".gitmodules".as_slice())
16333    {
16334        remove_submodule_sections_from_gitmodules(
16335            worktree_root,
16336            git_dir,
16337            format,
16338            &mut index_entry_list,
16339            &selected_gitlinks,
16340            &config_parameters_env,
16341        )?;
16342    }
16343    let mut resolve_undo_index = Index {
16344        version: index_version,
16345        entries: index_entry_list.clone(),
16346        extensions: index_extensions,
16347        checksum: None,
16348    };
16349    for path in &selected {
16350        let range = index_entries_path_range(&resolve_undo_index.entries, path);
16351        record_resolve_undo_for_range(&mut resolve_undo_index, format, path, range)?;
16352    }
16353
16354    // Keep every entry whose path was not selected, preserving original order
16355    // and all stages of unmerged paths that were not removed.
16356    let entries = index_entry_list
16357        .into_iter()
16358        .filter(|entry| !selected.contains(entry.path.as_bytes()))
16359        .collect::<Vec<_>>();
16360    // Removing entries invalidates the cache-tree (`TREE` extension): a stale
16361    // cached subtree id makes `git diff --cached`/`git status` short-circuit the
16362    // comparison of an affected directory against HEAD and miss the deletion
16363    // (observed: `git rm dir/nested.txt` left a valid `dir/` cache-tree, so the
16364    // deletion never showed in the cached diff). Git invalidates the cache-tree
16365    // on any index mutation; drop it so it is rebuilt on the next write, exactly
16366    // like the `add` path does above.
16367    let extensions = index_extensions_without_cache_tree(&resolve_undo_index.extensions);
16368    let selected_paths = selected.iter().cloned().collect::<Vec<_>>();
16369    let mut index = Index {
16370        version: index_version,
16371        entries,
16372        extensions,
16373        checksum: None,
16374    };
16375    invalidate_untracked_cache_for_git_paths(&mut index, format, &selected_paths)?;
16376    fs::write(index_path, index.write(format)?)?;
16377    Ok(RemoveResult {
16378        removed: selected.into_iter().collect(),
16379    })
16380}
16381
16382/// Remove a tracked path from the working tree, mirroring builtin/rm.c's
16383/// removal loop. For a plain path this is `remove_path`: unlink the file and
16384/// prune now-empty parent directories. For a gitlink (`is_gitlink`, mode
16385/// 160000) it is the submodule branch — git removes the populated submodule
16386/// *directory* with `remove_dir_recursively` (NOT `unlink`, which fails EISDIR),
16387/// descending into and deleting the nested `.git` because the `git rm` call site
16388/// passes `flag` *without* `REMOVE_DIR_KEEP_NESTED_GIT`; it `die`s only if that
16389/// recursive removal genuinely fails.
16390///
16391/// Returns `Ok(true)` when the path was removed, `Ok(false)` when a *plain* path
16392/// could not be unlinked because it is a directory (the caller decides whether
16393/// that aborts the run). A path that has already vanished is a no-op success.
16394fn remove_tracked_worktree_path(
16395    root: &Path,
16396    path: &[u8],
16397    is_gitlink: bool,
16398    is_stage0_gitlink: bool,
16399    force: bool,
16400) -> Result<bool> {
16401    let file = worktree_path(root, path)?;
16402    match fs::symlink_metadata(&file) {
16403        Err(err)
16404            if matches!(
16405                err.kind(),
16406                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
16407            ) =>
16408        {
16409            return Ok(true);
16410        }
16411        Err(err) if err.raw_os_error() == Some(20) => return Ok(true), // ENOTDIR
16412        Err(err) => return Err(err.into()),
16413        Ok(meta) if meta.is_dir() => {
16414            if is_gitlink {
16415                if file.join(".git").is_dir() && !is_stage0_gitlink {
16416                    return Ok(false);
16417                }
16418                if !force && original_cwd_is_inside(&file) {
16419                    let nested_git = file.join(".git");
16420                    if nested_git.is_dir() {
16421                        let _ = fs::remove_dir_all(nested_git);
16422                    }
16423                    return Ok(false);
16424                }
16425                if contains_nested_git_dir(&file) {
16426                    eprintln!(
16427                        "Migrating git directory of '{}' from",
16428                        String::from_utf8_lossy(path)
16429                    );
16430                }
16431                // Submodule removal. Mirror builtin/rm.c's `is_submodule` branch:
16432                // `remove_dir_recursively(&buf, force ? REMOVE_DIR_PURGE_ORIGINAL_CWD : 0)`.
16433                // No `REMOVE_DIR_KEEP_NESTED_GIT` flag, so the whole subtree —
16434                // including the nested `.git` of the populated submodule — is
16435                // removed. git `die`s ("could not remove '<path>'") if the
16436                // recursive removal fails; propagate the IO error to match.
16437                fs::remove_dir_all(&file)?;
16438                if fs::symlink_metadata(&file).is_ok() {
16439                    fs::remove_dir(&file)?;
16440                }
16441                prune_empty_parents(root, file.parent())?;
16442                return Ok(true);
16443            }
16444            // A directory in the worktree where a plain file is tracked cannot
16445            // be unlinked (git's remove_path fails on EISDIR). Report it so the
16446            // caller can abort the removal without committing the index.
16447            return Ok(false);
16448        }
16449        Ok(_) => {}
16450    }
16451    fs::remove_file(&file)?;
16452    prune_empty_parents(root, file.parent())?;
16453    Ok(true)
16454}
16455
16456fn rm_submodule_has_local_changes(
16457    worktree_root: &Path,
16458    format: ObjectFormat,
16459    path: &[u8],
16460    expected_oids: Option<&BTreeSet<ObjectId>>,
16461) -> bool {
16462    let Ok(submodule_root) = worktree_path(worktree_root, path) else {
16463        return false;
16464    };
16465    if !submodule_root.is_dir() {
16466        return false;
16467    }
16468    let head_changed = sley_diff_merge::gitlink_head_oid(&submodule_root, format)
16469        .zip(expected_oids)
16470        .is_some_and(|(head, expected)| !expected.contains(&head));
16471    head_changed || submodule_dirt(&submodule_root) != 0
16472}
16473
16474fn remove_submodule_sections_from_gitmodules(
16475    worktree_root: &Path,
16476    git_dir: &Path,
16477    format: ObjectFormat,
16478    index_entries: &mut Vec<IndexEntry>,
16479    selected_gitlinks: &[Vec<u8>],
16480    config_parameters_env: &Option<&str>,
16481) -> Result<()> {
16482    let gitmodules_path = worktree_root.join(".gitmodules");
16483    let Ok(original) = fs::read(&gitmodules_path) else {
16484        return Ok(());
16485    };
16486    let gitmodules_index = index_entries.iter().position(|entry| {
16487        entry.stage() == Stage::Normal && entry.path.as_bytes() == b".gitmodules"
16488    });
16489    if gitmodules_index.is_none() {
16490        return Ok(());
16491    }
16492    let config = GitConfig::parse(&original)?;
16493    let selected = selected_gitlinks
16494        .iter()
16495        .map(|path| String::from_utf8_lossy(path).into_owned())
16496        .collect::<BTreeSet<_>>();
16497    let mut sections = Vec::new();
16498    for section in &config.sections {
16499        if !section.name.eq_ignore_ascii_case("submodule") {
16500            continue;
16501        }
16502        let Some(name) = section.subsection.as_deref() else {
16503            continue;
16504        };
16505        let path = section
16506            .entries
16507            .iter()
16508            .rev()
16509            .find(|entry| entry.key.eq_ignore_ascii_case("path"))
16510            .and_then(|entry| entry.value.as_deref());
16511        if path.is_some_and(|path| selected.contains(path)) {
16512            sections.push(name.to_string());
16513        }
16514    }
16515    let selected_with_sections = sections
16516        .iter()
16517        .filter_map(|name| {
16518            config
16519                .get("submodule", Some(name), "path")
16520                .map(ToOwned::to_owned)
16521        })
16522        .collect::<BTreeSet<_>>();
16523    for path in &selected {
16524        if !selected_with_sections.contains(path) {
16525            eprintln!("warning: Could not find section in .gitmodules where path={path}");
16526        }
16527    }
16528    if sections.is_empty() {
16529        return Ok(());
16530    }
16531    if gitmodules_worktree_differs_from_index(
16532        worktree_root,
16533        git_dir,
16534        format,
16535        index_entries,
16536        &original,
16537        config_parameters_env,
16538    )? {
16539        eprintln!("error: the following file has local modifications:");
16540        eprintln!("    .gitmodules");
16541        eprintln!("(use --cached to keep the file, or -f to force removal)");
16542        return Err(GitError::Exit(1));
16543    }
16544    let mut edited = original;
16545    for name in sections {
16546        let section_name = format!("submodule.{name}");
16547        match sley_config::raw_edit::rename_or_remove_section(&edited, &section_name, None) {
16548            sley_config::raw_edit::SectionEditOutcome::Changed(out) => edited = out,
16549            sley_config::raw_edit::SectionEditOutcome::NotFound => {
16550                eprintln!("warning: Could not find section in .gitmodules where path={name}");
16551            }
16552            sley_config::raw_edit::SectionEditOutcome::LineTooLong(line) => {
16553                return Err(GitError::InvalidFormat(format!(
16554                    "bad config line {line} in .gitmodules"
16555                )));
16556            }
16557        }
16558    }
16559    fs::write(&gitmodules_path, &edited)?;
16560    stage_gitmodules_after_rm(
16561        worktree_root,
16562        git_dir,
16563        format,
16564        index_entries,
16565        config_parameters_env,
16566    )
16567}
16568
16569fn ensure_gitmodules_clean_for_submodule_rm(
16570    worktree_root: &Path,
16571    git_dir: &Path,
16572    format: ObjectFormat,
16573    index_entries: &[IndexEntry],
16574    selected_gitlinks: &[Vec<u8>],
16575    config_parameters_env: &Option<&str>,
16576) -> Result<()> {
16577    let gitmodules_path = worktree_root.join(".gitmodules");
16578    let Ok(original) = fs::read(&gitmodules_path) else {
16579        return Ok(());
16580    };
16581    if !index_entries
16582        .iter()
16583        .any(|entry| entry.stage() == Stage::Normal && entry.path.as_bytes() == b".gitmodules")
16584    {
16585        return Ok(());
16586    }
16587    let config = GitConfig::parse(&original)?;
16588    let selected = selected_gitlinks
16589        .iter()
16590        .map(|path| String::from_utf8_lossy(path).into_owned())
16591        .collect::<BTreeSet<_>>();
16592    let has_matching_section = config.sections.iter().any(|section| {
16593        section.name.eq_ignore_ascii_case("submodule")
16594            && section
16595                .entries
16596                .iter()
16597                .rev()
16598                .find(|entry| entry.key.eq_ignore_ascii_case("path"))
16599                .and_then(|entry| entry.value.as_deref())
16600                .is_some_and(|path| selected.contains(path))
16601    });
16602    if !has_matching_section {
16603        return Ok(());
16604    }
16605    if gitmodules_worktree_differs_from_index(
16606        worktree_root,
16607        git_dir,
16608        format,
16609        index_entries,
16610        &original,
16611        config_parameters_env,
16612    )? {
16613        eprintln!("error: the following file has local modifications:");
16614        eprintln!("    .gitmodules");
16615        eprintln!("(use --cached to keep the file, or -f to force removal)");
16616        return Err(GitError::Exit(1));
16617    }
16618    Ok(())
16619}
16620
16621fn gitmodules_worktree_differs_from_index(
16622    worktree_root: &Path,
16623    git_dir: &Path,
16624    format: ObjectFormat,
16625    index_entries: &[IndexEntry],
16626    worktree_bytes: &[u8],
16627    config_parameters_env: &Option<&str>,
16628) -> Result<bool> {
16629    let Some(entry) = index_entries
16630        .iter()
16631        .find(|entry| entry.stage() == Stage::Normal && entry.path.as_bytes() == b".gitmodules")
16632    else {
16633        return Ok(false);
16634    };
16635    let config = sley_config::read_repo_config(git_dir, *config_parameters_env).unwrap_or_default();
16636    let clean = apply_clean_filter(
16637        worktree_root,
16638        git_dir,
16639        &config,
16640        b".gitmodules",
16641        worktree_bytes,
16642    )?;
16643    let oid = EncodedObject::new(ObjectType::Blob, clean).object_id(format)?;
16644    Ok(oid != entry.oid)
16645}
16646
16647fn stage_gitmodules_after_rm(
16648    worktree_root: &Path,
16649    git_dir: &Path,
16650    format: ObjectFormat,
16651    index_entries: &mut [IndexEntry],
16652    config_parameters_env: &Option<&str>,
16653) -> Result<()> {
16654    let path = worktree_root.join(".gitmodules");
16655    let bytes = fs::read(&path)?;
16656    let config = sley_config::read_repo_config(git_dir, *config_parameters_env).unwrap_or_default();
16657    let clean = apply_clean_filter(worktree_root, git_dir, &config, b".gitmodules", &bytes)?;
16658    let object = EncodedObject::new(ObjectType::Blob, clean);
16659    let oid = object.object_id(format)?;
16660    let odb = FileObjectDatabase::from_git_dir(git_dir, format);
16661    odb.write_object(object)?;
16662    let metadata = fs::symlink_metadata(&path)?;
16663    let mut entry =
16664        index_entry_from_metadata(BString::from(b".gitmodules".as_slice()), oid, &metadata);
16665    entry.mode = 0o100644;
16666    if let Some(slot) = index_entries
16667        .iter_mut()
16668        .find(|entry| entry.stage() == Stage::Normal && entry.path.as_bytes() == b".gitmodules")
16669    {
16670        *slot = entry;
16671    }
16672    Ok(())
16673}
16674
16675fn prepare_gitmodules_for_moved_gitlinks(
16676    worktree_root: &Path,
16677    git_dir: &Path,
16678    format: ObjectFormat,
16679    index_entries: &[IndexEntry],
16680    moves: &[GitmodulesMove],
16681) -> Result<Option<Vec<u8>>> {
16682    if moves.is_empty() {
16683        return Ok(None);
16684    }
16685    let gitmodules_path = worktree_root.join(".gitmodules");
16686    let Ok(original) = fs::read(&gitmodules_path) else {
16687        return Ok(None);
16688    };
16689    if !index_entries
16690        .iter()
16691        .any(|entry| entry.stage() == Stage::Normal && entry.path.as_bytes() == b".gitmodules")
16692    {
16693        return Ok(None);
16694    }
16695    let config = GitConfig::parse(&original)?;
16696    let mut edits = Vec::new();
16697    for gitlink_move in moves {
16698        let source = String::from_utf8_lossy(&gitlink_move.source).into_owned();
16699        let destination = String::from_utf8_lossy(&gitlink_move.destination).into_owned();
16700        let mut matched = false;
16701        for section in &config.sections {
16702            if !section.name.eq_ignore_ascii_case("submodule") {
16703                continue;
16704            }
16705            let Some(name) = section.subsection.as_deref() else {
16706                continue;
16707            };
16708            let path = section
16709                .entries
16710                .iter()
16711                .rev()
16712                .find(|entry| entry.key.eq_ignore_ascii_case("path"))
16713                .and_then(|entry| entry.value.as_deref());
16714            if path == Some(source.as_str()) {
16715                matched = true;
16716                edits.push((name.to_string(), destination.clone()));
16717            }
16718        }
16719        if !matched {
16720            eprintln!("warning: Could not find section in .gitmodules where path={source}");
16721        }
16722    }
16723    if edits.is_empty() {
16724        return Ok(None);
16725    }
16726    if gitmodules_worktree_differs_from_index(
16727        worktree_root,
16728        git_dir,
16729        format,
16730        index_entries,
16731        &original,
16732        &None,
16733    )? {
16734        eprintln!("fatal: Please stage your changes to .gitmodules or stash them to proceed");
16735        return Err(GitError::Exit(128));
16736    }
16737    let mut edited = original;
16738    for (name, destination) in edits {
16739        let mut editor =
16740            sley_config::raw_edit::RawConfigEditor::new(edited, "submodule", Some(&name), "path");
16741        match editor.set_multivar(Some(&destination), None, None, false) {
16742            sley_config::raw_edit::RawEditOutcome::Changed => {}
16743            sley_config::raw_edit::RawEditOutcome::NothingSet => {
16744                eprintln!("warning: Could not find section in .gitmodules where path={name}");
16745            }
16746        }
16747        edited = editor.into_bytes();
16748    }
16749    Ok(Some(edited))
16750}
16751
16752fn apply_prepared_gitmodules_move(
16753    worktree_root: &Path,
16754    git_dir: &Path,
16755    format: ObjectFormat,
16756    index_entries: &mut [IndexEntry],
16757    edited: Vec<u8>,
16758) -> Result<()> {
16759    fs::write(worktree_root.join(".gitmodules"), edited)?;
16760    stage_gitmodules_after_rm(worktree_root, git_dir, format, index_entries, &None)
16761}
16762
16763fn prepare_moved_gitlink_gitdirs(
16764    worktree_root: &Path,
16765    moves: &[GitmodulesMove],
16766) -> Result<Vec<GitlinkGitdirMove>> {
16767    let mut gitdir_moves = Vec::new();
16768    for gitlink_move in moves {
16769        let source_root = worktree_path(worktree_root, &gitlink_move.source)?;
16770        if !source_root.join(".git").is_file() {
16771            continue;
16772        }
16773        let Some(git_dir) = sley_diff_merge::gitlink_git_dir(&source_root) else {
16774            continue;
16775        };
16776        gitdir_moves.push(GitlinkGitdirMove {
16777            git_dir: normalize_absolute_path_lexically(&git_dir),
16778            destination_root: worktree_path(worktree_root, &gitlink_move.destination)?,
16779        });
16780    }
16781    Ok(gitdir_moves)
16782}
16783
16784fn apply_moved_gitlink_gitdirs(moves: &[GitlinkGitdirMove]) -> Result<()> {
16785    for gitdir_move in moves {
16786        let gitdir_relative =
16787            relative_path_between(&gitdir_move.destination_root, &gitdir_move.git_dir);
16788        let gitdir_value = gitfile_path_value(&gitdir_relative);
16789        fs::write(
16790            gitdir_move.destination_root.join(".git"),
16791            format!("gitdir: {gitdir_value}\n"),
16792        )?;
16793
16794        let config_path = gitdir_move.git_dir.join("config");
16795        let config_bytes = match fs::read(&config_path) {
16796            Ok(bytes) => bytes,
16797            Err(err) if err.kind() == std::io::ErrorKind::NotFound => Vec::new(),
16798            Err(err) => return Err(err.into()),
16799        };
16800        let worktree_relative =
16801            relative_path_between(&gitdir_move.git_dir, &gitdir_move.destination_root);
16802        let worktree_value = gitfile_path_value(&worktree_relative);
16803        let mut editor =
16804            sley_config::raw_edit::RawConfigEditor::new(config_bytes, "core", None, "worktree");
16805        match editor.set_multivar(Some(&worktree_value), None, None, false) {
16806            sley_config::raw_edit::RawEditOutcome::Changed => {
16807                sley_config::raw_edit::write_config_file_locked(
16808                    &config_path,
16809                    &editor.into_bytes(),
16810                    sley_config::raw_edit::ConfigFileWriteOptions::default(),
16811                )
16812                .map_err(|err| GitError::Io(err.to_string()))?;
16813            }
16814            sley_config::raw_edit::RawEditOutcome::NothingSet => {}
16815        }
16816    }
16817    Ok(())
16818}
16819
16820fn relative_path_between(from_dir: &Path, to_path: &Path) -> PathBuf {
16821    let from = normalize_absolute_path_lexically(from_dir);
16822    let to = normalize_absolute_path_lexically(to_path);
16823    let from_components = from.components().collect::<Vec<_>>();
16824    let to_components = to.components().collect::<Vec<_>>();
16825    let mut common = 0usize;
16826    while common < from_components.len()
16827        && common < to_components.len()
16828        && from_components[common] == to_components[common]
16829    {
16830        common += 1;
16831    }
16832    if common == 0 {
16833        return to;
16834    }
16835    let mut relative = PathBuf::new();
16836    for component in &from_components[common..] {
16837        if matches!(component, std::path::Component::Normal(_)) {
16838            relative.push("..");
16839        }
16840    }
16841    for component in &to_components[common..] {
16842        match component {
16843            std::path::Component::Normal(value) => relative.push(value),
16844            std::path::Component::ParentDir => relative.push(".."),
16845            std::path::Component::CurDir
16846            | std::path::Component::RootDir
16847            | std::path::Component::Prefix(_) => {}
16848        }
16849    }
16850    if relative.as_os_str().is_empty() {
16851        relative.push(".");
16852    }
16853    relative
16854}
16855
16856fn gitfile_path_value(path: &Path) -> String {
16857    let mut parts = Vec::new();
16858    let mut absolute = false;
16859    for component in path.components() {
16860        match component {
16861            std::path::Component::Prefix(prefix) => {
16862                parts.push(prefix.as_os_str().to_string_lossy().into_owned());
16863            }
16864            std::path::Component::RootDir => absolute = true,
16865            std::path::Component::CurDir => parts.push(".".to_string()),
16866            std::path::Component::ParentDir => parts.push("..".to_string()),
16867            std::path::Component::Normal(value) => {
16868                parts.push(value.to_string_lossy().into_owned());
16869            }
16870        }
16871    }
16872    let path = parts.join("/");
16873    if absolute { format!("/{path}") } else { path }
16874}
16875
16876fn contains_nested_git_dir(root: &Path) -> bool {
16877    let Ok(entries) = fs::read_dir(root) else {
16878        return false;
16879    };
16880    for entry in entries.flatten() {
16881        let path = entry.path();
16882        if entry.file_name() == ".git" && path.is_dir() {
16883            return true;
16884        }
16885        if path.is_dir() && contains_nested_git_dir(&path) {
16886            return true;
16887        }
16888    }
16889    false
16890}
16891
16892/// Print one batched `git rm` safety error block (mirrors builtin/rm.c's
16893/// `print_error_files`): the main message, the indented list of offending
16894/// paths, and — when `advice.rmhints` is enabled — the trailing hint. Sets
16895/// `*errs` so the caller can fail after collecting every class.
16896fn print_rm_error_files(
16897    files: &[&[u8]],
16898    singular: &str,
16899    plural: &str,
16900    hint: &str,
16901    show_hints: bool,
16902    errs: &mut bool,
16903) {
16904    if files.is_empty() {
16905        return;
16906    }
16907    let mut message = String::from(if files.len() == 1 { singular } else { plural });
16908    for path in files {
16909        message.push_str("\n    ");
16910        message.push_str(&String::from_utf8_lossy(path));
16911    }
16912    if show_hints {
16913        message.push_str(hint);
16914    }
16915    eprintln!("error: {message}");
16916    *errs = true;
16917}
16918
16919pub fn move_index_and_worktree_path(
16920    worktree_root: impl AsRef<Path>,
16921    git_dir: impl AsRef<Path>,
16922    format: ObjectFormat,
16923    source: &Path,
16924    destination: &Path,
16925    options: MoveOptions,
16926) -> Result<MoveResult> {
16927    let worktree_root = worktree_root.as_ref();
16928    let git_dir = git_dir.as_ref();
16929    let index_path = repository_index_path(git_dir);
16930    let mut index = if index_path.exists() {
16931        Index::parse(&fs::read(&index_path)?, format)?
16932    } else {
16933        Index {
16934            version: 2,
16935            entries: Vec::new(),
16936            extensions: Vec::new(),
16937            checksum: None,
16938        }
16939    };
16940    let source_absolute = if source.is_absolute() {
16941        source.to_path_buf()
16942    } else {
16943        worktree_root.join(source)
16944    };
16945    let source_absolute = normalize_absolute_path_lexically(&source_absolute);
16946    let destination_absolute = if destination.is_absolute() {
16947        destination.to_path_buf()
16948    } else {
16949        worktree_root.join(destination)
16950    };
16951    let destination_has_trailing_separator = path_has_trailing_separator(&destination_absolute);
16952    let destination_absolute = normalize_absolute_path_lexically(&destination_absolute);
16953    let mut destination_absolute = if destination_absolute.is_dir() {
16954        let Some(file_name) = source_absolute.file_name() else {
16955            return Err(GitError::InvalidPath(format!(
16956                "invalid source path {}",
16957                source.display()
16958            )));
16959        };
16960        destination_absolute.join(file_name)
16961    } else {
16962        destination_absolute
16963    };
16964    if path_has_trailing_separator(&destination_absolute)
16965        && !destination_absolute.exists()
16966        && source_absolute.is_dir()
16967        && let (Some(parent), Some(file_name)) = (
16968            destination_absolute.parent(),
16969            destination_absolute.file_name(),
16970        )
16971    {
16972        destination_absolute = parent.join(file_name);
16973    }
16974    let source_relative = source_absolute.strip_prefix(worktree_root).map_err(|_| {
16975        GitError::InvalidPath(format!("path {} is outside worktree", source.display()))
16976    })?;
16977    let destination_relative = destination_absolute
16978        .strip_prefix(worktree_root)
16979        .map_err(|_| {
16980            GitError::InvalidPath(format!(
16981                "path {} is outside worktree",
16982                destination.display()
16983            ))
16984        })?;
16985    let source_path = git_path_bytes(source_relative)?;
16986    let destination_path = git_path_bytes(destination_relative)?;
16987    if destination_has_trailing_separator
16988        && !destination_absolute.is_dir()
16989        && !source_absolute.is_dir()
16990    {
16991        if options.skip_errors {
16992            return Ok(MoveResult {
16993                source: source_path,
16994                destination: destination_path,
16995                skipped: true,
16996                fatal: None,
16997                details: Vec::new(),
16998            });
16999        }
17000        let mut destination = String::from_utf8_lossy(&destination_path).into_owned();
17001        destination.push('/');
17002        if options.dry_run {
17003            let fatal = format!(
17004                "fatal: destination directory does not exist, source={}, destination={destination}",
17005                String::from_utf8_lossy(&source_path),
17006            );
17007            return Ok(MoveResult {
17008                source: source_path,
17009                destination: destination.clone().into_bytes(),
17010                skipped: false,
17011                fatal: Some(fatal),
17012                details: Vec::new(),
17013            });
17014        }
17015        eprintln!(
17016            "fatal: destination directory does not exist, source={}, destination={destination}",
17017            String::from_utf8_lossy(&source_path),
17018        );
17019        return Err(GitError::Exit(128));
17020    }
17021    let directory_prefix = {
17022        let mut prefix = source_path.clone();
17023        prefix.push(b'/');
17024        prefix
17025    };
17026    let directory_entries: Vec<_> = index
17027        .entries
17028        .iter()
17029        .filter(|entry| entry.path.as_bytes().starts_with(&directory_prefix))
17030        .cloned()
17031        .collect();
17032    let source_is_conflicted = index.entries.iter().any(|entry| {
17033        (entry.path.as_bytes() == source_path.as_slice()
17034            || entry.path.as_bytes().starts_with(&directory_prefix))
17035            && entry.stage() != Stage::Normal
17036    });
17037    if source_is_conflicted {
17038        if options.skip_errors {
17039            return Ok(MoveResult {
17040                source: source_path,
17041                destination: destination_path,
17042                skipped: true,
17043                fatal: None,
17044                details: Vec::new(),
17045            });
17046        }
17047        if options.dry_run {
17048            let fatal = format!(
17049                "fatal: conflicted, source={}, destination={}",
17050                String::from_utf8_lossy(&source_path),
17051                String::from_utf8_lossy(&destination_path)
17052            );
17053            return Ok(MoveResult {
17054                source: source_path,
17055                destination: destination_path,
17056                skipped: false,
17057                fatal: Some(fatal),
17058                details: Vec::new(),
17059            });
17060        }
17061        eprintln!(
17062            "fatal: conflicted, source={}, destination={}",
17063            String::from_utf8_lossy(&source_path),
17064            String::from_utf8_lossy(&destination_path)
17065        );
17066        return Err(GitError::Exit(128));
17067    }
17068    let source_position = index
17069        .entries
17070        .iter()
17071        .position(|entry| entry.path == source_path && entry.stage() == Stage::Normal);
17072    let source_is_tracked = !directory_entries.is_empty() || source_position.is_some();
17073    if !source_is_tracked {
17074        if options.skip_errors {
17075            return Ok(MoveResult {
17076                source: source_path,
17077                destination: destination_path,
17078                skipped: true,
17079                fatal: None,
17080                details: Vec::new(),
17081            });
17082        }
17083        let source_kind = if source_absolute.exists() {
17084            "not under version control"
17085        } else {
17086            "bad source"
17087        };
17088        if options.dry_run {
17089            let fatal = format!(
17090                "fatal: {source_kind}, source={}, destination={}",
17091                String::from_utf8_lossy(&source_path),
17092                String::from_utf8_lossy(&destination_path)
17093            );
17094            return Ok(MoveResult {
17095                source: source_path,
17096                destination: destination_path,
17097                skipped: false,
17098                fatal: Some(fatal),
17099                details: Vec::new(),
17100            });
17101        }
17102        eprintln!(
17103            "fatal: {source_kind}, source={}, destination={}",
17104            String::from_utf8_lossy(&source_path),
17105            String::from_utf8_lossy(&destination_path)
17106        );
17107        return Err(GitError::Exit(128));
17108    }
17109    if destination_absolute.exists() {
17110        if !options.force {
17111            if options.skip_errors {
17112                return Ok(MoveResult {
17113                    source: source_path,
17114                    destination: destination_path,
17115                    skipped: true,
17116                    fatal: None,
17117                    details: Vec::new(),
17118                });
17119            }
17120            if options.dry_run {
17121                let fatal = format!(
17122                    "fatal: destination exists, source={}, destination={}",
17123                    String::from_utf8_lossy(&source_path),
17124                    String::from_utf8_lossy(&destination_path)
17125                );
17126                return Ok(MoveResult {
17127                    source: source_path,
17128                    destination: destination_path,
17129                    skipped: false,
17130                    fatal: Some(fatal),
17131                    details: Vec::new(),
17132                });
17133            }
17134            eprintln!(
17135                "fatal: destination exists, source={}, destination={}",
17136                String::from_utf8_lossy(&source_path),
17137                String::from_utf8_lossy(&destination_path)
17138            );
17139            return Err(GitError::Exit(128));
17140        }
17141        if !options.dry_run && destination_absolute.is_dir() {
17142            fs::remove_dir_all(&destination_absolute)?;
17143        } else if !options.dry_run {
17144            fs::remove_file(&destination_absolute)?;
17145        }
17146    }
17147    let gitlink_moves = if options.dry_run {
17148        Vec::new()
17149    } else if !directory_entries.is_empty() {
17150        directory_entries
17151            .iter()
17152            .filter(|entry| sley_index::is_gitlink(entry.mode))
17153            .map(|entry| {
17154                let suffix = &entry.path.as_bytes()[source_path.len()..];
17155                let mut destination = destination_path.clone();
17156                destination.extend_from_slice(suffix);
17157                GitmodulesMove {
17158                    source: entry.path.as_bytes().to_vec(),
17159                    destination,
17160                }
17161            })
17162            .collect::<Vec<_>>()
17163    } else if let Some(position) = source_position {
17164        let entry = &index.entries[position];
17165        if sley_index::is_gitlink(entry.mode) {
17166            vec![GitmodulesMove {
17167                source: source_path.clone(),
17168                destination: destination_path.clone(),
17169            }]
17170        } else {
17171            Vec::new()
17172        }
17173    } else {
17174        Vec::new()
17175    };
17176    let gitmodules_move = prepare_gitmodules_for_moved_gitlinks(
17177        worktree_root,
17178        git_dir,
17179        format,
17180        &index.entries,
17181        &gitlink_moves,
17182    )?;
17183    let gitlink_gitdir_moves = prepare_moved_gitlink_gitdirs(worktree_root, &gitlink_moves)?;
17184    if !directory_entries.is_empty() {
17185        let details: Vec<_> = directory_entries
17186            .iter()
17187            .map(|entry| {
17188                let suffix = &entry.path.as_bytes()[source_path.len()..];
17189                let mut destination = destination_path.clone();
17190                destination.extend_from_slice(suffix);
17191                MoveDetail {
17192                    source: entry.path.as_bytes().to_vec(),
17193                    destination,
17194                    skipped: false,
17195                }
17196            })
17197            .collect();
17198        if options.dry_run {
17199            return Ok(MoveResult {
17200                source: source_path,
17201                destination: destination_path,
17202                skipped: false,
17203                fatal: None,
17204                details,
17205            });
17206        }
17207        fs::rename(&source_absolute, &destination_absolute)?;
17208        apply_moved_gitlink_gitdirs(&gitlink_gitdir_moves)?;
17209        let moved_paths: Vec<_> = details
17210            .iter()
17211            .map(|detail| detail.destination.clone())
17212            .collect();
17213        index.entries.retain(|entry| {
17214            !entry.path.as_bytes().starts_with(&directory_prefix)
17215                && !moved_paths
17216                    .iter()
17217                    .any(|m| m.as_slice() == entry.path.as_bytes())
17218        });
17219        for (source_entry, detail) in directory_entries.into_iter().zip(details.iter()) {
17220            let relative_path = git_path_to_relative_path(&detail.destination)?;
17221            let metadata = fs::metadata(worktree_root.join(relative_path))?;
17222            let mut destination_entry =
17223                index_entry_from_metadata(detail.destination.clone(), source_entry.oid, &metadata);
17224            destination_entry.mode = source_entry.mode;
17225            index.entries.push(destination_entry);
17226        }
17227        if let Some(edited) = gitmodules_move {
17228            apply_prepared_gitmodules_move(
17229                worktree_root,
17230                git_dir,
17231                format,
17232                &mut index.entries,
17233                edited,
17234            )?;
17235        }
17236        index
17237            .entries
17238            .sort_by(|left, right| left.path.cmp(&right.path));
17239        index.extensions.clear();
17240        write_repository_index_ref(git_dir, format, &index)?;
17241        return Ok(MoveResult {
17242            source: source_path,
17243            destination: destination_path,
17244            skipped: false,
17245            fatal: None,
17246            details,
17247        });
17248    }
17249
17250    let position = source_position.expect("tracked non-directory source must have an index entry");
17251    if options.dry_run {
17252        return Ok(MoveResult {
17253            source: source_path,
17254            destination: destination_path,
17255            skipped: false,
17256            fatal: None,
17257            details: Vec::new(),
17258        });
17259    }
17260    if let Some(parent) = destination_absolute.parent()
17261        && !parent.exists()
17262    {
17263        if options.skip_errors {
17264            return Ok(MoveResult {
17265                source: source_path,
17266                destination: destination_path,
17267                skipped: true,
17268                fatal: None,
17269                details: Vec::new(),
17270            });
17271        }
17272        eprintln!(
17273            "fatal: renaming '{}' failed: No such file or directory",
17274            String::from_utf8_lossy(&source_path)
17275        );
17276        return Err(GitError::Exit(128));
17277    }
17278    fs::rename(&source_absolute, &destination_absolute)?;
17279    apply_moved_gitlink_gitdirs(&gitlink_gitdir_moves)?;
17280    let source_entry = index.entries.remove(position);
17281    let mut destination_entry = source_entry;
17282    destination_entry.path = destination_path.clone().into();
17283    destination_entry.refresh_name_length();
17284    index.entries.retain(|entry| entry.path != destination_path);
17285    index.entries.push(destination_entry);
17286    if let Some(edited) = gitmodules_move {
17287        apply_prepared_gitmodules_move(worktree_root, git_dir, format, &mut index.entries, edited)?;
17288    }
17289    index
17290        .entries
17291        .sort_by(|left, right| left.path.cmp(&right.path));
17292    index.extensions.clear();
17293    write_repository_index_ref(git_dir, format, &index)?;
17294    Ok(MoveResult {
17295        source: source_path,
17296        destination: destination_path,
17297        skipped: false,
17298        fatal: None,
17299        details: Vec::new(),
17300    })
17301}
17302
17303fn restore_index_entry(
17304    worktree_root: &Path,
17305    git_dir: &Path,
17306    format: ObjectFormat,
17307    db: &FileObjectDatabase,
17308    entry: &IndexEntry,
17309    smudge_config: Option<&GitConfig>,
17310    stat_cache: Option<&IndexStatCache>,
17311) -> Result<Option<IndexEntry>> {
17312    // A gitlink (mode 160000) names a commit in the submodule's repository, not
17313    // a blob here — reading it as a blob fails ("not found: blob object"). git's
17314    // `checkout_entry` S_IFGITLINK arm just ensures the submodule directory
17315    // exists and never touches an object; the submodule's content is `submodule
17316    // update` territory. Single gitlink rule via `sley_index::is_gitlink`.
17317    if sley_index::is_gitlink(entry.mode) {
17318        let dir_path = worktree_path(worktree_root, entry.path.as_bytes())?;
17319        materialize_gitlink_dir(worktree_root, &dir_path)?;
17320        return Ok(None);
17321    }
17322    let file_path = worktree_path(worktree_root, entry.path.as_bytes())?;
17323    if let Some(stat_cache) = stat_cache {
17324        if let Ok(metadata) = fs::symlink_metadata(&file_path) {
17325            if stat_cache
17326                .reuse_index_entry_for_checkout(entry, &metadata)
17327                .is_some()
17328            {
17329                return Ok(None);
17330            }
17331        }
17332    }
17333    let object = read_expected_object(db, &entry.oid, ObjectType::Blob)?;
17334    let body: Cow<'_, [u8]> = match smudge_config {
17335        Some(config) => {
17336            let checks = smudge_attribute_checks_from_index(
17337                worktree_root,
17338                git_dir,
17339                format,
17340                entry.path.as_bytes(),
17341            )?;
17342            apply_smudge_filter_with_attributes_cow_format(
17343                config,
17344                &checks,
17345                entry.path.as_bytes(),
17346                &object.body,
17347                format,
17348            )?
17349        }
17350        None => Cow::Borrowed(&object.body),
17351    };
17352    prepare_blob_parent_dirs(worktree_root, &file_path)?;
17353    remove_existing_worktree_path(&file_path)?;
17354    fs::write(&file_path, &body)?;
17355    set_worktree_file_mode(&file_path, entry.mode)?;
17356    let metadata = fs::symlink_metadata(&file_path)?;
17357    Ok(Some(index_entry_with_refreshed_stat(entry, &metadata)))
17358}
17359
17360fn index_entry_with_refreshed_stat(entry: &IndexEntry, metadata: &fs::Metadata) -> IndexEntry {
17361    let mut refreshed = index_entry_from_metadata(entry.path.clone(), entry.oid, metadata);
17362    refreshed.mode = entry.mode;
17363    refreshed.flags = entry.flags;
17364    refreshed.flags_extended = entry.flags_extended;
17365    refreshed
17366}
17367
17368fn restored_head_index_entry(
17369    _worktree_root: &Path,
17370    _db: &FileObjectDatabase,
17371    path: &[u8],
17372    entry: &TrackedEntry,
17373) -> Result<IndexEntry> {
17374    // This restores the index from a tree (reset --mixed / stash / sparse) WITHOUT
17375    // rewriting the worktree file, so the file on disk may hold different content
17376    // than `entry.oid`. Crucially we must NOT copy the worktree file's stat onto
17377    // this entry: that would make the cached stat match a file whose real content
17378    // hashes to a DIFFERENT oid, breaking git's "stat-match implies oid-match"
17379    // invariant that the status stat-cache relies on. Leave the whole stat tuple
17380    // zeroed, including size, so `reset --mixed --no-refresh` remains stat-dirty
17381    // until an explicit/default refresh validates it (t7102 cell 28).
17382    Ok(IndexEntry {
17383        ctime_seconds: 0,
17384        ctime_nanoseconds: 0,
17385        mtime_seconds: 0,
17386        mtime_nanoseconds: 0,
17387        dev: 0,
17388        ino: 0,
17389        mode: entry.mode,
17390        uid: 0,
17391        gid: 0,
17392        size: 0,
17393        oid: entry.oid,
17394        flags: path.len().min(0x0fff) as u16,
17395        flags_extended: 0,
17396        path: BString::from(path),
17397    })
17398}
17399
17400fn restore_head_entry_to_worktree(
17401    worktree_root: &Path,
17402    db: &FileObjectDatabase,
17403    path: &[u8],
17404    entry: &TrackedEntry,
17405) -> Result<()> {
17406    // Route through the single gitlink-aware materializer: a gitlink has no blob
17407    // here, so `write_worktree_blob_entry` would fail reading the commit-oid as
17408    // a blob. `materialize_tree_entry` owns the gitlink-vs-blob decision (mkdir
17409    // the submodule dir) in ONE place. The returned index entry is unused on
17410    // this worktree-only restore path.
17411    materialize_tree_entry(db, worktree_root, path, entry)?;
17412    Ok(())
17413}
17414
17415fn restore_head_entry_to_worktree_and_index(
17416    worktree_root: &Path,
17417    db: &FileObjectDatabase,
17418    path: &[u8],
17419    entry: &TrackedEntry,
17420) -> Result<IndexEntry> {
17421    // Route through the single gitlink-aware materializer rather than calling
17422    // `write_worktree_blob_entry` directly: a gitlink (mode 160000) has no blob
17423    // in this object store, so the blob read would fail with "not found: blob
17424    // object <commit-oid>". `materialize_tree_entry` owns the
17425    // gitlink-vs-blob/symlink decision (mkdir the submodule dir, never read an
17426    // object) in ONE place, so `checkout <tree> -- <gitlink-path>` /
17427    // `restore --source` inherit the same gitlink correctness as `reset --hard`.
17428    materialize_tree_entry(db, worktree_root, path, entry)
17429}
17430
17431fn index_has_entry_under(entries: &[IndexEntry], directory: &[u8]) -> bool {
17432    entries
17433        .iter()
17434        .any(|entry| index_entry_is_under_path(entry.path.as_bytes(), directory))
17435}
17436
17437fn index_entry_is_under_path(entry_path: &[u8], directory: &[u8]) -> bool {
17438    if directory.is_empty() {
17439        return true;
17440    }
17441    entry_path
17442        .strip_prefix(directory)
17443        .and_then(|rest| rest.strip_prefix(b"/"))
17444        .is_some()
17445}
17446
17447fn index_entry_from_metadata(
17448    path: impl Into<BString>,
17449    oid: ObjectId,
17450    metadata: &fs::Metadata,
17451) -> IndexEntry {
17452    let modified = metadata.modified().ok();
17453    let duration = modified
17454        .and_then(|time| time.duration_since(UNIX_EPOCH).ok())
17455        .unwrap_or_default();
17456    let mode = file_mode(metadata);
17457    let path = path.into();
17458    let flags = path.len().min(0x0fff) as u16;
17459    let mut entry = IndexEntry {
17460        ctime_seconds: duration.as_secs().min(u32::MAX as u64) as u32,
17461        ctime_nanoseconds: duration.subsec_nanos(),
17462        mtime_seconds: duration.as_secs().min(u32::MAX as u64) as u32,
17463        mtime_nanoseconds: duration.subsec_nanos(),
17464        dev: 0,
17465        ino: 0,
17466        mode,
17467        uid: 0,
17468        gid: 0,
17469        size: index_size_from_metadata(metadata),
17470        oid,
17471        flags,
17472        flags_extended: 0,
17473        path,
17474    };
17475    apply_unix_metadata_to_index_entry(&mut entry, metadata);
17476    entry
17477}
17478
17479fn index_entry_from_metadata_with_filemode(
17480    path: impl Into<BString>,
17481    oid: ObjectId,
17482    metadata: &fs::Metadata,
17483    trust_filemode: bool,
17484) -> IndexEntry {
17485    let mut entry = index_entry_from_metadata(path, oid, metadata);
17486    entry.mode = file_mode_with_trust(metadata, trust_filemode);
17487    entry
17488}
17489
17490fn trust_executable_bit_from_git_dir(git_dir: &Path, config_parameters_env: Option<&str>) -> bool {
17491    sley_config::read_repo_config(git_dir, config_parameters_env)
17492        .ok()
17493        .as_ref()
17494        .map(trust_executable_bit)
17495        .unwrap_or(true)
17496}
17497
17498fn trust_executable_bit(config: &GitConfig) -> bool {
17499    config.get_bool("core", None, "filemode").unwrap_or(true)
17500}
17501
17502fn trust_symlinks_from_git_dir(git_dir: &Path, config_parameters_env: Option<&str>) -> bool {
17503    sley_config::read_repo_config(git_dir, config_parameters_env)
17504        .ok()
17505        .as_ref()
17506        .map(trust_symlinks)
17507        .unwrap_or(true)
17508}
17509
17510fn trust_symlinks(config: &GitConfig) -> bool {
17511    config.get_bool("core", None, "symlinks").unwrap_or(true)
17512}
17513
17514fn preferred_unmerged_mode_for_untrusted_worktree(
17515    entries: &[IndexEntry],
17516    trust_filemode: bool,
17517    trust_symlinks: bool,
17518) -> Option<u32> {
17519    if trust_filemode && trust_symlinks {
17520        return None;
17521    }
17522    let preferred = entries
17523        .iter()
17524        .find(|entry| entry.stage() == Stage::Ours)
17525        .or_else(|| entries.iter().find(|entry| entry.stage() == Stage::Base))?;
17526    if (!trust_symlinks && preferred.mode == 0o120000)
17527        || (!trust_filemode && matches!(preferred.mode, 0o100644 | 0o100755))
17528    {
17529        Some(preferred.mode)
17530    } else {
17531        None
17532    }
17533}
17534
17535fn file_mode_with_trust(metadata: &fs::Metadata, trust_filemode: bool) -> u32 {
17536    if trust_filemode {
17537        file_mode(metadata)
17538    } else {
17539        0o100644
17540    }
17541}
17542
17543#[cfg(unix)]
17544fn apply_unix_metadata_to_index_entry(entry: &mut IndexEntry, metadata: &fs::Metadata) {
17545    use std::os::unix::fs::MetadataExt;
17546
17547    entry.ctime_seconds = metadata.ctime().min(u32::MAX as i64).max(0) as u32;
17548    entry.ctime_nanoseconds = metadata.ctime_nsec().min(u32::MAX as i64).max(0) as u32;
17549    entry.dev = metadata.dev() as u32;
17550    entry.ino = metadata.ino() as u32;
17551    entry.uid = metadata.uid();
17552    entry.gid = metadata.gid();
17553}
17554
17555#[cfg(not(unix))]
17556fn apply_unix_metadata_to_index_entry(_entry: &mut IndexEntry, _metadata: &fs::Metadata) {}
17557
17558fn index_size_from_metadata(metadata: &fs::Metadata) -> u32 {
17559    metadata.len().min(u32::MAX as u64) as u32
17560}
17561
17562fn read_expected_object(
17563    db: &FileObjectDatabase,
17564    oid: &ObjectId,
17565    expected: ObjectType,
17566) -> Result<std::sync::Arc<EncodedObject>> {
17567    let object = db
17568        .read_object(oid)
17569        .map_err(|err| expect_missing_object_kind(err, *oid, missing_kind_for_type(expected)))?;
17570    if object.object_type != expected {
17571        return Err(GitError::InvalidObject(format!(
17572            "expected {} {}, found {}",
17573            expected.as_str(),
17574            oid,
17575            object.object_type.as_str()
17576        )));
17577    }
17578    Ok(object)
17579}
17580
17581fn expect_missing_object_kind(
17582    err: GitError,
17583    oid: ObjectId,
17584    expected: MissingObjectKind,
17585) -> GitError {
17586    match err.not_found_kind() {
17587        Some(sley_core::NotFoundKind::Object { .. }) => GitError::object_kind_not_found_in(
17588            oid,
17589            expected,
17590            MissingObjectContext::WorktreeMaterialize,
17591        ),
17592        _ => err,
17593    }
17594}
17595
17596fn missing_kind_for_type(object_type: ObjectType) -> MissingObjectKind {
17597    match object_type {
17598        ObjectType::Blob => MissingObjectKind::Blob,
17599        ObjectType::Tree => MissingObjectKind::Tree,
17600        ObjectType::Commit => MissingObjectKind::Commit,
17601        ObjectType::Tag => MissingObjectKind::Tag,
17602    }
17603}
17604
17605fn read_commit(db: &FileObjectDatabase, format: ObjectFormat, oid: &ObjectId) -> Result<Commit> {
17606    let object = read_expected_object(db, oid, ObjectType::Commit)?;
17607    Commit::parse(format, &object.body)
17608}
17609
17610#[derive(Debug, Clone, PartialEq, Eq)]
17611struct TrackedEntry {
17612    mode: u32,
17613    oid: ObjectId,
17614}
17615
17616/// git's racy-git stat cache: the stage-0 index entries keyed by path (so the
17617/// worktree walk can reuse a cached oid when a file's stat shows it is unchanged
17618/// since it was staged) plus the index *file's* own mtime, which git uses as the
17619/// racy-clean reference timestamp.
17620///
17621/// SAFETY INVARIANT: trusting a cached oid by stat alone is only sound because
17622/// every code path that stamps a worktree stat onto an index entry also hashed
17623/// that exact file content (see `index_entry_from_metadata`), while tree-sourced
17624/// restores (reset --mixed / stash / sparse) leave the stat zeroed
17625/// (`restored_head_index_entry`). So a non-zero, non-racy stat match implies the
17626/// cached oid is the file's true content. When that does not hold we fall through
17627/// to a full read+filter+hash, so a modified file is never reported clean.
17628#[derive(Debug, Clone, Default)]
17629struct IndexStatCache {
17630    entries: HashMap<Vec<u8>, IndexEntry>,
17631    /// The index file's modification time as `(seconds, nanoseconds)`, or `None`
17632    /// when it could not be determined. Used as git's racy-clean reference.
17633    index_mtime: Option<(u64, u64)>,
17634}
17635
17636impl IndexStatCache {
17637    /// Builds the cache from an already-parsed index plus the path of the index
17638    /// file on disk (whose mtime becomes the racy-clean reference). Only stage-0
17639    /// entries are retained; higher merge stages never describe a worktree file.
17640    fn from_index(index: &Index, index_path: &Path) -> Self {
17641        let index_mtime = fs::metadata(index_path)
17642            .ok()
17643            .and_then(|metadata| file_mtime_parts(&metadata));
17644        Self::from_index_mtime(index, index_mtime)
17645    }
17646
17647    fn from_index_mtime(index: &Index, index_mtime: Option<(u64, u64)>) -> Self {
17648        IndexStatCache {
17649            entries: stage0_index_entries(index),
17650            index_mtime,
17651        }
17652    }
17653
17654    fn from_index_mtime_only(index_mtime: Option<(u64, u64)>) -> Self {
17655        IndexStatCache {
17656            entries: HashMap::new(),
17657            index_mtime,
17658        }
17659    }
17660
17661    /// Whether `entry` is "racily clean" in git's sense: its cached mtime is not
17662    /// strictly older than the index file's mtime, so a same-timestamp write
17663    /// could have changed the content without moving the stat. Such entries must
17664    /// always be re-hashed.
17665    ///
17666    /// Conservative by construction: if the index mtime is unknown, or either
17667    /// side's mtime is zero (e.g. a tree-sourced entry whose stat was left
17668    /// zeroed), this returns `true` so the caller re-hashes rather than trusting
17669    /// a stat we cannot prove safe.
17670    fn is_racily_clean(&self, entry: &IndexEntry) -> bool {
17671        let Some(index_mtime) = self.index_mtime else {
17672            return true;
17673        };
17674        if index_mtime == (0, 0) {
17675            return true;
17676        }
17677        let entry_mtime = (
17678            u64::from(entry.mtime_seconds),
17679            u64::from(entry.mtime_nanoseconds),
17680        );
17681        if entry_mtime == (0, 0) {
17682            return true;
17683        }
17684        // Racy unless the index was written strictly after the entry's mtime.
17685        index_mtime <= entry_mtime
17686    }
17687
17688    fn is_racily_clean_ref(&self, entry: &IndexEntryRef<'_>) -> bool {
17689        let Some(index_mtime) = self.index_mtime else {
17690            return true;
17691        };
17692        if index_mtime == (0, 0) {
17693            return true;
17694        }
17695        let entry_mtime = (
17696            u64::from(entry.mtime_seconds),
17697            u64::from(entry.mtime_nanoseconds),
17698        );
17699        if entry_mtime == (0, 0) {
17700            return true;
17701        }
17702        index_mtime <= entry_mtime
17703    }
17704
17705    /// Whether the index has a stage-0 entry for `git_path` (i.e. the path is
17706    /// tracked). Used to skip hashing untracked worktree files.
17707    fn contains(&self, git_path: &[u8]) -> bool {
17708        self.entries.contains_key(git_path)
17709    }
17710
17711    fn tracked_entry(&self, git_path: &[u8]) -> Option<TrackedEntry> {
17712        self.entries.get(git_path).map(|entry| TrackedEntry {
17713            mode: entry.mode,
17714            oid: entry.oid,
17715        })
17716    }
17717
17718    fn index_entry(&self, git_path: &[u8]) -> Option<&IndexEntry> {
17719        self.entries.get(git_path)
17720    }
17721
17722    /// Returns the cached [`TrackedEntry`] for `git_path` (reusing its stored
17723    /// oid, so the caller can SKIP reading, filtering, and hashing the file) only
17724    /// when the worktree file is provably unchanged since it was staged: a
17725    /// stage-0 entry exists, its recorded mode matches the file's current mode
17726    /// (catching pure `chmod`s that do not move mtime), the size+mtime stat
17727    /// check passes, and the entry is not racily clean. Otherwise returns `None`
17728    /// and the caller hashes the file as usual.
17729    fn reuse_tracked_entry(
17730        &self,
17731        git_path: &[u8],
17732        worktree_metadata: &fs::Metadata,
17733    ) -> Option<TrackedEntry> {
17734        let entry = self.entries.get(git_path)?;
17735        self.reuse_index_entry(entry, worktree_metadata)
17736    }
17737
17738    fn reuse_index_entry(
17739        &self,
17740        entry: &IndexEntry,
17741        worktree_metadata: &fs::Metadata,
17742    ) -> Option<TrackedEntry> {
17743        // Gitlink: reusable as-is whenever the worktree path is a directory (a
17744        // submodule is never re-hashed; its cached stat is ignored). Routes
17745        // through the single `sley_index::gitlink_stat_verdict` rule so the
17746        // gitlink-vs-040000 mode mismatch never spuriously rejects it.
17747        if sley_index::is_gitlink(entry.mode) {
17748            return match sley_index::gitlink_stat_verdict(worktree_metadata) {
17749                sley_index::GitlinkStatVerdict::Populated => Some(TrackedEntry {
17750                    mode: entry.mode,
17751                    oid: entry.oid,
17752                }),
17753                sley_index::GitlinkStatVerdict::TypeChanged => None,
17754            };
17755        }
17756        if entry.mode != worktree_entry_mode(worktree_metadata) {
17757            return None;
17758        }
17759        if !worktree_entry_is_uptodate(entry, worktree_metadata) {
17760            return None;
17761        }
17762        if self.is_racily_clean(entry) {
17763            return None;
17764        }
17765        Some(TrackedEntry {
17766            mode: entry.mode,
17767            oid: entry.oid,
17768        })
17769    }
17770
17771    fn reuse_index_entry_for_checkout(
17772        &self,
17773        entry: &IndexEntry,
17774        worktree_metadata: &fs::Metadata,
17775    ) -> Option<TrackedEntry> {
17776        if let Some(tracked) = self.reuse_index_entry(entry, worktree_metadata) {
17777            return Some(tracked);
17778        }
17779        if u64::from(entry.size) != 0 || worktree_metadata.len() == 0 {
17780            return None;
17781        }
17782        if entry.mode != worktree_entry_mode(worktree_metadata) {
17783            return None;
17784        }
17785        let (mtime_seconds, mtime_nanoseconds) = file_mtime_parts(worktree_metadata)?;
17786        if u64::from(entry.mtime_seconds) != mtime_seconds
17787            || u64::from(entry.mtime_nanoseconds) != mtime_nanoseconds
17788        {
17789            return None;
17790        }
17791        if self.is_racily_clean(entry) {
17792            return None;
17793        }
17794        Some(TrackedEntry {
17795            mode: entry.mode,
17796            oid: entry.oid,
17797        })
17798    }
17799
17800    fn reuse_index_entry_ref(
17801        &self,
17802        entry: &IndexEntryRef<'_>,
17803        worktree_metadata: &fs::Metadata,
17804    ) -> Option<TrackedEntry> {
17805        if sley_index::is_gitlink(entry.mode) {
17806            return match sley_index::gitlink_stat_verdict(worktree_metadata) {
17807                sley_index::GitlinkStatVerdict::Populated => Some(TrackedEntry {
17808                    mode: entry.mode,
17809                    oid: entry.oid,
17810                }),
17811                sley_index::GitlinkStatVerdict::TypeChanged => None,
17812            };
17813        }
17814        if entry.mode != worktree_entry_mode(worktree_metadata) {
17815            return None;
17816        }
17817        if !worktree_entry_ref_is_uptodate(entry, worktree_metadata) {
17818            return None;
17819        }
17820        if self.is_racily_clean_ref(entry) {
17821            return None;
17822        }
17823        Some(TrackedEntry {
17824            mode: entry.mode,
17825            oid: entry.oid,
17826        })
17827    }
17828
17829    /// The stage-0 gitlink (mode 160000) index entry at `git_path`, if any.
17830    fn gitlink_entry(&self, git_path: &[u8]) -> Option<&IndexEntry> {
17831        self.entries
17832            .get(git_path)
17833            .filter(|entry| sley_index::is_gitlink(entry.mode))
17834    }
17835}
17836
17837fn read_index_entries(
17838    git_dir: &Path,
17839    format: ObjectFormat,
17840) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
17841    let db = FileObjectDatabase::from_git_dir(git_dir, format);
17842    Ok(read_index_entries_with_stat_cache(git_dir, format, &db)?.0)
17843}
17844
17845fn read_all_index_paths(git_dir: &Path, format: ObjectFormat) -> Result<BTreeSet<Vec<u8>>> {
17846    let index_path = repository_index_path(git_dir);
17847    let bytes = match fs::read(index_path) {
17848        Ok(bytes) => bytes,
17849        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(BTreeSet::new()),
17850        Err(err) => return Err(err.into()),
17851    };
17852    let index = Index::parse(&bytes, format)?;
17853    Ok(index
17854        .entries
17855        .into_iter()
17856        .map(|entry| entry.path.into_bytes())
17857        .collect())
17858}
17859
17860fn resolve_head_tree_oid(
17861    git_dir: &Path,
17862    format: ObjectFormat,
17863    db: &FileObjectDatabase,
17864) -> Result<Option<ObjectId>> {
17865    let Some(commit_oid) = resolve_head_commit_oid(git_dir, format)? else {
17866        return Ok(None);
17867    };
17868    if let Some(tree_oid) = sley_rev::commit_graph_tree_oid(git_dir, format, &commit_oid)? {
17869        return Ok(Some(tree_oid));
17870    }
17871    let object = read_expected_object(db, &commit_oid, ObjectType::Commit)?;
17872    let commit = Commit::parse_ref(format, &object.body)?;
17873    Ok(Some(commit.tree))
17874}
17875
17876fn resolve_head_commit_oid(git_dir: &Path, format: ObjectFormat) -> Result<Option<ObjectId>> {
17877    let refs = FileRefStore::new(git_dir, format);
17878    sley_refs::resolve_ref_peeled(&refs, "HEAD")
17879}
17880
17881fn status_row_is_untracked_or_ignored(entry: ShortStatusRow<'_>) -> bool {
17882    matches!((entry.index, entry.worktree), (b'?', b'?') | (b'!', b'!'))
17883}
17884
17885fn checkout_switch_head_symbolic(
17886    refs: &FileRefStore,
17887    branch_ref: String,
17888    committer: Vec<u8>,
17889    branch: &str,
17890    old_oid: Option<ObjectId>,
17891    new_oid: Option<ObjectId>,
17892) -> Result<()> {
17893    // Reflog "from" side: the previous branch's short name, or the commit id
17894    // when HEAD was detached (git's `checkout: moving from X to Y` shape,
17895    // which `@{-N}` resolution parses).
17896    let from = match refs.read_ref("HEAD") {
17897        Ok(Some(RefTarget::Symbolic(name))) => name
17898            .strip_prefix("refs/heads/")
17899            .unwrap_or(&name)
17900            .to_string(),
17901        Ok(Some(RefTarget::Direct(oid))) => oid.to_hex(),
17902        _ => "HEAD".to_string(),
17903    };
17904    let mut tx = refs.transaction();
17905    let reflog = match (old_oid, new_oid) {
17906        (Some(old_oid), Some(new_oid)) => Some(ReflogEntry {
17907            old_oid,
17908            new_oid,
17909            committer,
17910            message: format!("checkout: moving from {from} to {branch}").into_bytes(),
17911        }),
17912        _ => None,
17913    };
17914    tx.update(RefUpdate {
17915        name: "HEAD".into(),
17916        expected: None,
17917        new: RefTarget::Symbolic(branch_ref),
17918        reflog,
17919    });
17920    tx.commit()
17921}
17922
17923fn cache_tree_is_valid(tree: &CacheTree) -> bool {
17924    if tree.entry_count < 0 || tree.oid.is_none() {
17925        return false;
17926    }
17927    tree.subtrees
17928        .iter()
17929        .all(|child| cache_tree_is_valid(&child.tree))
17930}
17931
17932fn head_matches_index_from_cache_tree(
17933    index: &Index,
17934    format: ObjectFormat,
17935    head_tree_oid: &ObjectId,
17936    stage0_entry_count: usize,
17937) -> Result<bool> {
17938    let cache_tree = match index.cache_tree(format) {
17939        Ok(Some(cache_tree)) => cache_tree,
17940        Ok(None) | Err(_) => return Ok(false),
17941    };
17942    if !cache_tree_is_valid(&cache_tree) {
17943        return Ok(false);
17944    }
17945    let Some(root_oid) = cache_tree.oid.as_ref() else {
17946        return Ok(false);
17947    };
17948    if root_oid != head_tree_oid {
17949        return Ok(false);
17950    }
17951    Ok(cache_tree.entry_count as usize == stage0_entry_count)
17952}
17953
17954fn head_matches_borrowed_index_from_cache_tree(
17955    index: &BorrowedIndex<'_>,
17956    format: ObjectFormat,
17957    head_tree_oid: &ObjectId,
17958    stage0_entry_count: usize,
17959) -> Result<bool> {
17960    let cache_tree = match index.cache_tree(format) {
17961        Ok(Some(cache_tree)) => cache_tree,
17962        Ok(None) | Err(_) => return Ok(false),
17963    };
17964    if !cache_tree_is_valid(&cache_tree) {
17965        return Ok(false);
17966    }
17967    let Some(root_oid) = cache_tree.oid.as_ref() else {
17968        return Ok(false);
17969    };
17970    if root_oid != head_tree_oid {
17971        return Ok(false);
17972    }
17973    Ok(cache_tree.entry_count as usize == stage0_entry_count)
17974}
17975
17976/// Parses the index a single time and returns both the path -> [`TrackedEntry`]
17977/// map used for status comparisons AND the [`IndexStatCache`] used to short-cut
17978/// the worktree walk, avoiding a second parse of the same file.
17979fn read_index_entries_with_stat_cache(
17980    git_dir: &Path,
17981    format: ObjectFormat,
17982    db: &FileObjectDatabase,
17983) -> Result<(BTreeMap<Vec<u8>, TrackedEntry>, IndexStatCache, bool)> {
17984    let (index, stat_cache, head_matches_index) = read_index_with_stat_cache(git_dir, format, db)?;
17985    let tracked = index_entries_from_index(index);
17986    Ok((tracked, stat_cache, head_matches_index))
17987}
17988
17989fn index_entries_from_index(index: Index) -> BTreeMap<Vec<u8>, TrackedEntry> {
17990    index
17991        .entries
17992        .into_iter()
17993        .filter(|entry| entry.stage() == Stage::Normal)
17994        .map(|entry| {
17995            (
17996                entry.path.into_bytes(),
17997                TrackedEntry {
17998                    mode: entry.mode,
17999                    oid: entry.oid,
18000                },
18001            )
18002        })
18003        .collect()
18004}
18005
18006fn read_index_with_stat_cache(
18007    git_dir: &Path,
18008    format: ObjectFormat,
18009    db: &FileObjectDatabase,
18010) -> Result<(Index, IndexStatCache, bool)> {
18011    read_index_with_stat_cache_entries(git_dir, format, db, true)
18012}
18013
18014fn read_index_with_stat_cache_entries(
18015    git_dir: &Path,
18016    format: ObjectFormat,
18017    db: &FileObjectDatabase,
18018    include_entries: bool,
18019) -> Result<(Index, IndexStatCache, bool)> {
18020    let index_path = repository_index_path(git_dir);
18021    let index_metadata = match fs::metadata(&index_path) {
18022        Ok(metadata) => metadata,
18023        Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
18024            return Ok((
18025                Index {
18026                    version: 2,
18027                    entries: Vec::new(),
18028                    extensions: Vec::new(),
18029                    checksum: None,
18030                },
18031                IndexStatCache::default(),
18032                false,
18033            ));
18034        }
18035        Err(err) => return Err(err.into()),
18036    };
18037    let index = sley_index::read_repository_index(git_dir, format)?;
18038    let index_mtime = file_mtime_parts(&index_metadata);
18039    let stage0_entry_count = index
18040        .entries
18041        .iter()
18042        .filter(|entry| index_entry_stage(entry) == 0)
18043        .count();
18044    let stat_cache = if include_entries {
18045        IndexStatCache::from_index_mtime(&index, index_mtime)
18046    } else {
18047        IndexStatCache::from_index_mtime_only(index_mtime)
18048    };
18049    let head_matches_index = match resolve_head_tree_oid(git_dir, format, db)? {
18050        Some(head_tree_oid) => {
18051            head_matches_index_from_cache_tree(&index, format, &head_tree_oid, stage0_entry_count)?
18052        }
18053        None => false,
18054    };
18055    Ok((index, stat_cache, head_matches_index))
18056}
18057
18058fn head_tree_entries(
18059    git_dir: &Path,
18060    format: ObjectFormat,
18061    db: &FileObjectDatabase,
18062) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
18063    let refs = FileRefStore::new(git_dir, format);
18064    let Some(head) = refs.read_ref("HEAD")? else {
18065        return Ok(BTreeMap::new());
18066    };
18067    let commit_oid = match head {
18068        RefTarget::Direct(oid) => Some(oid),
18069        RefTarget::Symbolic(name) => match refs.read_ref(&name)? {
18070            Some(RefTarget::Direct(oid)) => Some(oid),
18071            _ => None,
18072        },
18073    };
18074    let Some(commit_oid) = commit_oid else {
18075        return Ok(BTreeMap::new());
18076    };
18077    let object = read_expected_object(db, &commit_oid, ObjectType::Commit)?;
18078    let commit = Commit::parse_ref(format, &object.body)?;
18079    let mut entries = BTreeMap::new();
18080    collect_tree_entries(db, format, &commit.tree, &mut entries)?;
18081    Ok(entries)
18082}
18083
18084fn tree_entries(
18085    db: &FileObjectDatabase,
18086    format: ObjectFormat,
18087    tree_oid: &ObjectId,
18088) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
18089    let mut entries = BTreeMap::new();
18090    collect_tree_entries(db, format, tree_oid, &mut entries)?;
18091    Ok(entries)
18092}
18093
18094/// Flatten a tree's blob leaves into `entries`, keyed by full path.
18095///
18096/// Delegates to the canonical [`sley_diff_merge::flatten_tree`] (the local
18097/// recursive flattener was a byte-identical copy) and adapts its
18098/// `(mode, oid)` tuples into this module's [`TrackedEntry`]. Entries already
18099/// present in `entries` are overwritten, matching the previous insert-based
18100/// behaviour.
18101fn collect_tree_entries(
18102    db: &FileObjectDatabase,
18103    format: ObjectFormat,
18104    tree_oid: &ObjectId,
18105    entries: &mut BTreeMap<Vec<u8>, TrackedEntry>,
18106) -> Result<()> {
18107    for (path, (mode, oid)) in sley_diff_merge::flatten_tree(db, format, tree_oid)? {
18108        entries.insert(path, TrackedEntry { mode, oid });
18109    }
18110    Ok(())
18111}
18112
18113/// Like a full worktree walk, but accepts the index's [`IndexStatCache`] so the
18114/// walk can reuse a cached oid for files that are provably unchanged since they
18115/// were staged, skipping the read+filter+hash for those paths. Passing `None`
18116/// hashes every file when no stat cache is supplied.
18117fn worktree_entries_with_stat_cache(
18118    worktree_root: &Path,
18119    git_dir: &Path,
18120    format: ObjectFormat,
18121    stat_cache: Option<&IndexStatCache>,
18122    tracked_paths: Option<&BTreeSet<Vec<u8>>>,
18123    ignores: Option<&mut IgnoreMatcher>,
18124) -> Result<BTreeMap<Vec<u8>, TrackedEntry>> {
18125    Ok(worktree_entries_with_submodule_dirt(
18126        worktree_root,
18127        git_dir,
18128        format,
18129        stat_cache,
18130        tracked_paths,
18131        ignores,
18132    )?
18133    .0)
18134}
18135
18136/// Tracked worktree entries keyed by repo path, plus the dirt mask
18137/// ([`DIRTY_SUBMODULE_MODIFIED`] / [`DIRTY_SUBMODULE_UNTRACKED`]) for every
18138/// tracked gitlink path whose submodule working tree is dirty.
18139type WorktreeEntriesWithDirt = (BTreeMap<Vec<u8>, TrackedEntry>, BTreeMap<Vec<u8>, u8>);
18140
18141/// Status worktree snapshot: tracked/untracked entries, gitlink dirt masks, and
18142/// tracked paths observed in the worktree.
18143type StatusWorktreeSnapshot = (
18144    BTreeMap<Vec<u8>, TrackedEntry>,
18145    BTreeMap<Vec<u8>, u8>,
18146    HashSet<Vec<u8>>,
18147);
18148
18149/// Like [`worktree_entries_with_stat_cache`], but also reports, for every
18150/// tracked gitlink path whose submodule working tree is dirty, the dirt mask
18151/// ([`DIRTY_SUBMODULE_MODIFIED`] / [`DIRTY_SUBMODULE_UNTRACKED`]).
18152fn worktree_entries_with_submodule_dirt(
18153    worktree_root: &Path,
18154    git_dir: &Path,
18155    format: ObjectFormat,
18156    stat_cache: Option<&IndexStatCache>,
18157    tracked_paths: Option<&BTreeSet<Vec<u8>>>,
18158    ignores: Option<&mut IgnoreMatcher>,
18159) -> Result<WorktreeEntriesWithDirt> {
18160    let mut entries = BTreeMap::new();
18161    let mut submodule_dirt_map = BTreeMap::new();
18162    let mut tracked_presence = HashSet::new();
18163    // Worktree blobs are compared to the index by OID, so they must be passed
18164    // through the clean filter (core.autocrlf / .gitattributes) first -- exactly
18165    // as `git add` would store them. With no filter configured this is an exact
18166    // passthrough, so unfiltered repositories see identical OIDs.
18167    let config = sley_config::read_repo_config(git_dir, None).unwrap_or_default();
18168    // Seed the matcher with the repo-wide sources only; each directory's
18169    // `.gitattributes` is folded in by `collect_worktree_entries` as it descends,
18170    // so the worktree is read exactly once (a separate full-tree attribute pass was
18171    // a second traversal of every directory).
18172    let mut attr_matcher = AttributeMatcher::from_worktree_base(worktree_root);
18173    let attr_requested = filter_attribute_names();
18174    let mut context = WorktreeEntriesWalk {
18175        git_dir,
18176        format,
18177        config: &config,
18178        matcher: &mut attr_matcher,
18179        requested: &attr_requested,
18180        stat_cache,
18181        known_tracked_paths: tracked_paths,
18182        tracked_paths,
18183        ignores,
18184        entries: &mut entries,
18185        submodule_dirt: &mut submodule_dirt_map,
18186        tracked_presence: &mut tracked_presence,
18187        record_clean_tracked: true,
18188    };
18189    collect_worktree_entries(&mut context, worktree_root, &[])?;
18190    Ok((entries, submodule_dirt_map))
18191}
18192
18193fn status_worktree_entries_with_submodule_dirt(
18194    worktree_root: &Path,
18195    git_dir: &Path,
18196    format: ObjectFormat,
18197    stat_cache: &IndexStatCache,
18198    known_tracked_paths: Option<&BTreeSet<Vec<u8>>>,
18199    tracked_paths: Option<&BTreeSet<Vec<u8>>>,
18200    ignores: Option<&mut IgnoreMatcher>,
18201) -> Result<StatusWorktreeSnapshot> {
18202    let mut entries = BTreeMap::new();
18203    let mut submodule_dirt_map = BTreeMap::new();
18204    let mut tracked_presence = HashSet::new();
18205    let config = sley_config::read_repo_config(git_dir, None).unwrap_or_default();
18206    let mut attr_matcher = AttributeMatcher::from_worktree_base(worktree_root);
18207    let attr_requested = filter_attribute_names();
18208    let mut context = WorktreeEntriesWalk {
18209        git_dir,
18210        format,
18211        config: &config,
18212        matcher: &mut attr_matcher,
18213        requested: &attr_requested,
18214        stat_cache: Some(stat_cache),
18215        known_tracked_paths,
18216        tracked_paths,
18217        ignores,
18218        entries: &mut entries,
18219        submodule_dirt: &mut submodule_dirt_map,
18220        tracked_presence: &mut tracked_presence,
18221        record_clean_tracked: false,
18222    };
18223    collect_worktree_entries(&mut context, worktree_root, &[])?;
18224    Ok((entries, submodule_dirt_map, tracked_presence))
18225}
18226
18227fn worktree_entry_for_git_path(
18228    worktree_root: &Path,
18229    git_dir: &Path,
18230    format: ObjectFormat,
18231    git_path: &[u8],
18232    expected_oid: &ObjectId,
18233    expected_mode: u32,
18234    stat_cache: Option<&IndexStatCache>,
18235) -> Result<Option<TrackedEntry>> {
18236    let absolute = worktree_root.join(repo_path_to_os_path(git_path)?);
18237    let metadata = match fs::symlink_metadata(&absolute) {
18238        Ok(metadata) => metadata,
18239        Err(err)
18240            if matches!(
18241                err.kind(),
18242                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
18243            ) =>
18244        {
18245            return Ok(None);
18246        }
18247        Err(err) => return Err(err.into()),
18248    };
18249
18250    if sley_index::is_gitlink(expected_mode) {
18251        if !metadata.is_dir() {
18252            return Ok(Some(TrackedEntry {
18253                mode: worktree_entry_mode(&metadata),
18254                oid: ObjectId::null(format),
18255            }));
18256        }
18257        let oid = sley_diff_merge::gitlink_head_oid(&absolute, format).unwrap_or(*expected_oid);
18258        return Ok(Some(TrackedEntry {
18259            mode: sley_index::GITLINK_MODE,
18260            oid,
18261        }));
18262    }
18263
18264    if metadata.is_dir() {
18265        return Ok(Some(TrackedEntry {
18266            mode: worktree_entry_mode(&metadata),
18267            oid: ObjectId::null(format),
18268        }));
18269    }
18270
18271    if !(metadata.is_file() || metadata.file_type().is_symlink()) {
18272        return Ok(Some(TrackedEntry {
18273            mode: worktree_entry_mode(&metadata),
18274            oid: ObjectId::null(format),
18275        }));
18276    }
18277
18278    if let Some(tracked) =
18279        stat_cache.and_then(|cache| cache.reuse_tracked_entry(git_path, &metadata))
18280    {
18281        return Ok(Some(tracked));
18282    }
18283
18284    let mode = worktree_entry_mode(&metadata);
18285    let body = if metadata.file_type().is_symlink() {
18286        symlink_target_bytes(&absolute)?
18287    } else {
18288        let config = sley_config::read_repo_config(git_dir, None).unwrap_or_default();
18289        let body = fs::read(&absolute)?;
18290        let clean = apply_clean_filter(worktree_root, git_dir, &config, git_path, &body)?;
18291        let oid = match stat_cache.and_then(|cache| cache.index_entry(git_path)) {
18292            Some(index_entry) => clean_filtered_oid_for_status(
18293                format,
18294                &body,
18295                clean,
18296                index_entry.oid,
18297                index_entry.size,
18298                &metadata,
18299            )?,
18300            None => EncodedObject::new(ObjectType::Blob, clean).object_id(format)?,
18301        };
18302        return Ok(Some(TrackedEntry { mode, oid }));
18303    };
18304    let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
18305    Ok(Some(TrackedEntry { mode, oid }))
18306}
18307
18308fn worktree_entry_for_index_entry_with_attributes(
18309    worktree_root: &Path,
18310    git_dir: &Path,
18311    format: ObjectFormat,
18312    index_entry: &IndexEntry,
18313    stat_cache: &IndexStatCache,
18314    clean_filter: &mut Option<TrackedOnlyCleanFilter>,
18315) -> Result<Option<TrackedEntry>> {
18316    let git_path = index_entry.path.as_bytes();
18317    let expected_mode = index_entry.mode;
18318    let absolute = worktree_root.join(repo_path_to_os_path(git_path)?);
18319    let metadata = match fs::symlink_metadata(&absolute) {
18320        Ok(metadata) => metadata,
18321        Err(err)
18322            if matches!(
18323                err.kind(),
18324                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
18325            ) =>
18326        {
18327            return Ok(None);
18328        }
18329        Err(err) => return Err(err.into()),
18330    };
18331    let file_type = metadata.file_type();
18332
18333    if sley_index::is_gitlink(expected_mode) {
18334        if !file_type.is_dir() {
18335            return Ok(Some(TrackedEntry {
18336                mode: worktree_entry_mode(&metadata),
18337                oid: ObjectId::null(format),
18338            }));
18339        }
18340        let oid = sley_diff_merge::gitlink_head_oid(&absolute, format).unwrap_or(index_entry.oid);
18341        return Ok(Some(TrackedEntry {
18342            mode: sley_index::GITLINK_MODE,
18343            oid,
18344        }));
18345    }
18346
18347    if file_type.is_dir() {
18348        if expected_mode != 0o040000 {
18349            return Ok(None);
18350        }
18351        return Ok(Some(TrackedEntry {
18352            mode: worktree_entry_mode(&metadata),
18353            oid: ObjectId::null(format),
18354        }));
18355    }
18356
18357    if !(file_type.is_file() || file_type.is_symlink()) {
18358        return Ok(Some(TrackedEntry {
18359            mode: worktree_entry_mode(&metadata),
18360            oid: ObjectId::null(format),
18361        }));
18362    }
18363
18364    if let Some(tracked) = stat_cache.reuse_index_entry(index_entry, &metadata) {
18365        return Ok(Some(tracked));
18366    }
18367
18368    let mode = worktree_entry_mode(&metadata);
18369    let body = if file_type.is_symlink() {
18370        symlink_target_bytes(&absolute)?
18371    } else {
18372        let body = fs::read(&absolute)?;
18373        let clean_filter = tracked_only_clean_filter(clean_filter, worktree_root, git_dir);
18374        clean_filter.read_attributes_for_path(worktree_root, git_path)?;
18375        let checks =
18376            clean_filter
18377                .matcher
18378                .attributes_for_path(git_path, &clean_filter.requested, false);
18379        let clean =
18380            apply_clean_filter_with_attributes(&clean_filter.config, &checks, git_path, &body)?;
18381        let oid = clean_filtered_oid_for_status(
18382            format,
18383            &body,
18384            clean,
18385            index_entry.oid,
18386            index_entry.size,
18387            &metadata,
18388        )?;
18389        return Ok(Some(TrackedEntry { mode, oid }));
18390    };
18391    let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
18392    Ok(Some(TrackedEntry { mode, oid }))
18393}
18394
18395fn worktree_entry_for_index_entry_ref_with_attributes(
18396    worktree_root: &Path,
18397    git_dir: &Path,
18398    format: ObjectFormat,
18399    index_entry: &IndexEntryRef<'_>,
18400    stat_cache: &IndexStatCache,
18401    clean_filter: &mut Option<TrackedOnlyCleanFilter>,
18402) -> Result<Option<TrackedEntry>> {
18403    let git_path = index_entry.path;
18404    let expected_mode = index_entry.mode;
18405    let absolute = worktree_root.join(repo_path_to_os_path(git_path)?);
18406    let metadata = match fs::symlink_metadata(&absolute) {
18407        Ok(metadata) => metadata,
18408        Err(err)
18409            if matches!(
18410                err.kind(),
18411                std::io::ErrorKind::NotFound | std::io::ErrorKind::NotADirectory
18412            ) =>
18413        {
18414            return Ok(None);
18415        }
18416        Err(err) => return Err(err.into()),
18417    };
18418    let file_type = metadata.file_type();
18419
18420    if sley_index::is_gitlink(expected_mode) {
18421        if !file_type.is_dir() {
18422            return Ok(Some(TrackedEntry {
18423                mode: worktree_entry_mode(&metadata),
18424                oid: ObjectId::null(format),
18425            }));
18426        }
18427        let oid = sley_diff_merge::gitlink_head_oid(&absolute, format).unwrap_or(index_entry.oid);
18428        return Ok(Some(TrackedEntry {
18429            mode: sley_index::GITLINK_MODE,
18430            oid,
18431        }));
18432    }
18433
18434    if file_type.is_dir() {
18435        if expected_mode != 0o040000 {
18436            return Ok(None);
18437        }
18438        return Ok(Some(TrackedEntry {
18439            mode: worktree_entry_mode(&metadata),
18440            oid: ObjectId::null(format),
18441        }));
18442    }
18443
18444    if !(file_type.is_file() || file_type.is_symlink()) {
18445        return Ok(Some(TrackedEntry {
18446            mode: worktree_entry_mode(&metadata),
18447            oid: ObjectId::null(format),
18448        }));
18449    }
18450
18451    if let Some(tracked) = stat_cache.reuse_index_entry_ref(index_entry, &metadata) {
18452        return Ok(Some(tracked));
18453    }
18454
18455    let mode = worktree_entry_mode(&metadata);
18456    let body = if file_type.is_symlink() {
18457        symlink_target_bytes(&absolute)?
18458    } else {
18459        let body = fs::read(&absolute)?;
18460        let clean_filter = tracked_only_clean_filter(clean_filter, worktree_root, git_dir);
18461        clean_filter.read_attributes_for_path(worktree_root, git_path)?;
18462        let checks =
18463            clean_filter
18464                .matcher
18465                .attributes_for_path(git_path, &clean_filter.requested, false);
18466        let clean =
18467            apply_clean_filter_with_attributes(&clean_filter.config, &checks, git_path, &body)?;
18468        let oid = clean_filtered_oid_for_status(
18469            format,
18470            &body,
18471            clean,
18472            index_entry.oid,
18473            index_entry.size,
18474            &metadata,
18475        )?;
18476        return Ok(Some(TrackedEntry { mode, oid }));
18477    };
18478    let oid = EncodedObject::new(ObjectType::Blob, body).object_id(format)?;
18479    Ok(Some(TrackedEntry { mode, oid }))
18480}
18481
18482fn clean_filtered_oid_for_status(
18483    format: ObjectFormat,
18484    raw_body: &[u8],
18485    clean_body: Vec<u8>,
18486    index_oid: ObjectId,
18487    index_size: u32,
18488    metadata: &fs::Metadata,
18489) -> Result<ObjectId> {
18490    let clean_oid = EncodedObject::new(ObjectType::Blob, clean_body).object_id(format)?;
18491    let metadata_size = index_size_from_metadata(metadata);
18492    if clean_oid == index_oid && index_size != 0 && index_size != metadata_size {
18493        return EncodedObject::new(ObjectType::Blob, raw_body.to_vec()).object_id(format);
18494    }
18495    Ok(clean_oid)
18496}
18497
18498struct TrackedOnlyCleanFilter {
18499    config: GitConfig,
18500    matcher: AttributeMatcher,
18501    requested: Vec<Vec<u8>>,
18502    attribute_dirs: BTreeSet<Vec<u8>>,
18503}
18504
18505impl TrackedOnlyCleanFilter {
18506    fn read_attributes_for_path(&mut self, worktree_root: &Path, git_path: &[u8]) -> Result<()> {
18507        self.read_attribute_dir(worktree_root, &[])?;
18508        let mut prefix = Vec::new();
18509        let mut parts = git_path.split(|byte| *byte == b'/').peekable();
18510        while let Some(part) = parts.next() {
18511            if parts.peek().is_none() {
18512                break;
18513            }
18514            if !prefix.is_empty() {
18515                prefix.push(b'/');
18516            }
18517            prefix.extend_from_slice(part);
18518            self.read_attribute_dir(worktree_root, &prefix)?;
18519        }
18520        Ok(())
18521    }
18522
18523    fn read_attribute_dir(&mut self, worktree_root: &Path, git_path: &[u8]) -> Result<()> {
18524        if !self.attribute_dirs.insert(git_path.to_vec()) {
18525            return Ok(());
18526        }
18527        let dir = if git_path.is_empty() {
18528            worktree_root.to_path_buf()
18529        } else {
18530            worktree_root.join(repo_path_to_os_path(git_path)?)
18531        };
18532        read_dir_attribute_patterns(worktree_root, &dir, &mut self.matcher)
18533    }
18534}
18535
18536fn tracked_only_clean_filter<'a>(
18537    clean_filter: &'a mut Option<TrackedOnlyCleanFilter>,
18538    worktree_root: &Path,
18539    git_dir: &Path,
18540) -> &'a mut TrackedOnlyCleanFilter {
18541    if clean_filter.is_none() {
18542        *clean_filter = Some(TrackedOnlyCleanFilter {
18543            config: sley_config::read_repo_config(git_dir, None).unwrap_or_default(),
18544            matcher: AttributeMatcher::from_worktree_base(worktree_root),
18545            requested: filter_attribute_names(),
18546            attribute_dirs: BTreeSet::new(),
18547        });
18548    }
18549    clean_filter
18550        .as_mut()
18551        .expect("tracked-only clean filter initialized")
18552}
18553
18554fn tracked_only_clean_filter_with_config<'a>(
18555    clean_filter: &'a mut Option<TrackedOnlyCleanFilter>,
18556    worktree_root: &Path,
18557    config: &GitConfig,
18558) -> &'a mut TrackedOnlyCleanFilter {
18559    if clean_filter.is_none() {
18560        *clean_filter = Some(TrackedOnlyCleanFilter {
18561            config: config.clone(),
18562            matcher: AttributeMatcher::from_worktree_base(worktree_root),
18563            requested: filter_attribute_names(),
18564            attribute_dirs: BTreeSet::new(),
18565        });
18566    }
18567    clean_filter
18568        .as_mut()
18569        .expect("tracked-only clean filter initialized")
18570}
18571
18572struct WorktreeEntriesWalk<'a> {
18573    git_dir: &'a Path,
18574    format: ObjectFormat,
18575    config: &'a GitConfig,
18576    matcher: &'a mut AttributeMatcher,
18577    requested: &'a [Vec<u8>],
18578    stat_cache: Option<&'a IndexStatCache>,
18579    known_tracked_paths: Option<&'a BTreeSet<Vec<u8>>>,
18580    tracked_paths: Option<&'a BTreeSet<Vec<u8>>>,
18581    ignores: Option<&'a mut IgnoreMatcher>,
18582    entries: &'a mut BTreeMap<Vec<u8>, TrackedEntry>,
18583    /// Dirt masks for tracked gitlink paths whose submodule worktree is dirty.
18584    submodule_dirt: &'a mut BTreeMap<Vec<u8>, u8>,
18585    tracked_presence: &'a mut HashSet<Vec<u8>>,
18586    record_clean_tracked: bool,
18587}
18588
18589impl WorktreeEntriesWalk<'_> {
18590    fn mark_tracked_present(&mut self, git_path: &[u8]) {
18591        self.tracked_presence.insert(git_path.to_vec());
18592    }
18593
18594    fn tracked_entry_for(&self, git_path: &[u8]) -> Option<TrackedEntry> {
18595        self.stat_cache
18596            .and_then(|cache| cache.tracked_entry(git_path))
18597    }
18598
18599    fn should_record_tracked_entry(&self, git_path: &[u8], entry: &TrackedEntry) -> bool {
18600        self.record_clean_tracked
18601            || self
18602                .tracked_entry_for(git_path)
18603                .is_none_or(|tracked| tracked != *entry)
18604    }
18605}
18606
18607fn git_path_append_component(parent: &[u8], component: &std::ffi::OsStr) -> Vec<u8> {
18608    let component = os_str_component_bytes(component);
18609    let separator = usize::from(!parent.is_empty());
18610    let mut path = Vec::with_capacity(parent.len() + separator + component.len());
18611    if !parent.is_empty() {
18612        path.extend_from_slice(parent);
18613        path.push(b'/');
18614    }
18615    path.extend_from_slice(component.as_ref());
18616    path
18617}
18618
18619fn git_path_push_component(path: &mut Vec<u8>, component: &std::ffi::OsStr) -> usize {
18620    let original_len = path.len();
18621    let component = os_str_component_bytes(component);
18622    if !path.is_empty() {
18623        path.push(b'/');
18624    }
18625    path.extend_from_slice(component.as_ref());
18626    original_len
18627}
18628
18629#[cfg(unix)]
18630fn os_str_component_bytes(component: &std::ffi::OsStr) -> Cow<'_, [u8]> {
18631    use std::os::unix::ffi::OsStrExt;
18632
18633    Cow::Borrowed(component.as_bytes())
18634}
18635
18636#[cfg(not(unix))]
18637fn os_str_component_bytes(component: &std::ffi::OsStr) -> Cow<'_, [u8]> {
18638    Cow::Owned(component.to_string_lossy().into_owned().into_bytes())
18639}
18640
18641fn collect_worktree_entries(
18642    context: &mut WorktreeEntriesWalk<'_>,
18643    dir: &Path,
18644    dir_git_path: &[u8],
18645) -> Result<()> {
18646    if is_same_path(dir, context.git_dir) {
18647        return Ok(());
18648    }
18649    // Fold this directory's `.gitattributes` into the matcher before processing its
18650    // files, so lookups for files here (and below) see it. This is what lets the
18651    // walk read the tree once instead of doing a separate full-tree attribute pass.
18652    read_dir_attribute_patterns_for_base(dir, dir_git_path, context.matcher)?;
18653    if let Some(ignores) = context.ignores.as_deref_mut() {
18654        read_dir_ignore_patterns_for_base(dir, dir_git_path, ignores)?;
18655    }
18656    let mut dir_entries = fs::read_dir(dir)?.collect::<std::result::Result<Vec<_>, _>>()?;
18657    dir_entries.sort_by_key(|entry| entry.file_name());
18658    for entry in dir_entries {
18659        let file_name = entry.file_name();
18660        let path = entry.path();
18661        if is_dot_git_entry(&path) {
18662            continue;
18663        }
18664        if is_same_path(&path, context.git_dir) {
18665            continue;
18666        }
18667        let metadata = entry.metadata()?;
18668        let git_path = git_path_append_component(dir_git_path, &file_name);
18669        if context
18670            .ignores
18671            .as_ref()
18672            .is_some_and(|ignores| ignores.is_ignored(&git_path, metadata.is_dir()))
18673        {
18674            let tracked = context.known_tracked_paths.is_some_and(|tracked_paths| {
18675                if metadata.is_dir() {
18676                    tracked_paths_may_contain(tracked_paths, &git_path)
18677                } else {
18678                    tracked_paths.contains(&git_path)
18679                }
18680            });
18681            if !tracked {
18682                continue;
18683            }
18684            if metadata.is_dir() {
18685                collect_worktree_entries(context, &path, &git_path)?;
18686                continue;
18687            }
18688        }
18689        if metadata.is_dir() {
18690            // A directory staged as a gitlink (mode 160000) is opaque: the walk
18691            // never descends into it. Its worktree "content" is the commit the
18692            // embedded repository has checked out (upstream ce_compare_gitlink):
18693            // a populated submodule reports its HEAD (plus a dirt mask when its
18694            // own tree has modified/untracked content); an unpopulated
18695            // directory — no repository, or no commit checked out — always
18696            // matches the staged oid.
18697            if let Some(index_entry) = context
18698                .stat_cache
18699                .and_then(|cache| cache.gitlink_entry(&git_path))
18700            {
18701                context.mark_tracked_present(&git_path);
18702                let oid = sley_diff_merge::gitlink_head_oid(&path, context.format)
18703                    .unwrap_or(index_entry.oid);
18704                let dirt = submodule_dirt(&path);
18705                if dirt != 0 {
18706                    context.submodule_dirt.insert(git_path.clone(), dirt);
18707                }
18708                let tracked = TrackedEntry {
18709                    mode: sley_index::GITLINK_MODE,
18710                    oid,
18711                };
18712                if dirt != 0 || context.should_record_tracked_entry(&git_path, &tracked) {
18713                    context.entries.insert(git_path, tracked);
18714                }
18715                continue;
18716            }
18717            if is_nested_repository_boundary(&path, context.git_dir) {
18718                if let Some(tracked_paths) = context.tracked_paths
18719                    && !tracked_paths_may_contain(tracked_paths, &git_path)
18720                {
18721                    continue;
18722                }
18723                context.entries.insert(
18724                    git_path,
18725                    TrackedEntry {
18726                        mode: 0o040000,
18727                        oid: ObjectId::null(context.format),
18728                    },
18729                );
18730                continue;
18731            }
18732            if let Some(tracked_paths) = context.tracked_paths
18733                && !tracked_paths_may_contain(tracked_paths, &git_path)
18734            {
18735                continue;
18736            }
18737            collect_worktree_entries(context, &path, &git_path)?;
18738        } else if metadata.is_file() || metadata.file_type().is_symlink() {
18739            if let Some(tracked_paths) = context.tracked_paths
18740                && !tracked_paths.contains(&git_path)
18741            {
18742                continue;
18743            }
18744            let entry_mode = worktree_entry_mode(&metadata);
18745            // git's racy-git stat shortcut: when the index's cached stat proves
18746            // this file is unchanged since it was staged, reuse the staged oid
18747            // and skip the read+filter+hash entirely. `reuse_tracked_entry`
18748            // returns `Some` ONLY for a non-racy size+mtime+mode match, so a
18749            // modified file always falls through to the full hash below and is
18750            // never silently reported clean.
18751            if let Some(tracked) = context
18752                .stat_cache
18753                .and_then(|cache| cache.reuse_tracked_entry(&git_path, &metadata))
18754            {
18755                context.mark_tracked_present(&git_path);
18756                if context.record_clean_tracked {
18757                    context.entries.insert(git_path, tracked);
18758                }
18759                continue;
18760            }
18761            // A file absent from the index is untracked: status and the
18762            // index-vs-worktree diff report it by *presence* (`??` / nothing), never
18763            // by content, so computing its oid is wasted work — git never hashes
18764            // untracked files. Record presence with a null oid and skip the
18765            // read+filter+hash. Without a stat cache we cannot tell tracked from
18766            // untracked, so fall through and hash as before.
18767            if context
18768                .stat_cache
18769                .is_some_and(|cache| !cache.contains(&git_path))
18770            {
18771                context.entries.insert(
18772                    git_path,
18773                    TrackedEntry {
18774                        mode: entry_mode,
18775                        oid: ObjectId::null(context.format),
18776                    },
18777                );
18778                continue;
18779            }
18780            let body = if metadata.file_type().is_symlink() {
18781                // The blob for a symlink is the raw link target; clean filters
18782                // never apply because git treats symlink content as opaque.
18783                symlink_target_bytes(&path)?
18784            } else {
18785                let body = fs::read(&path)?;
18786                // Resolve this path's attributes against the prebuilt matcher (a cheap
18787                // pattern match) and apply the clean filter -- no per-file matcher
18788                // rebuild. With no attributes/autocrlf configured this is an exact
18789                // passthrough, so the stored OID is unchanged.
18790                let checks =
18791                    context
18792                        .matcher
18793                        .attributes_for_path(&git_path, context.requested, false);
18794                let clean =
18795                    apply_clean_filter_with_attributes(context.config, &checks, &git_path, &body)?;
18796                let oid = match context
18797                    .stat_cache
18798                    .and_then(|cache| cache.index_entry(&git_path))
18799                {
18800                    Some(index_entry) => clean_filtered_oid_for_status(
18801                        context.format,
18802                        &body,
18803                        clean,
18804                        index_entry.oid,
18805                        index_entry.size,
18806                        &metadata,
18807                    )?,
18808                    None => {
18809                        EncodedObject::new(ObjectType::Blob, clean).object_id(context.format)?
18810                    }
18811                };
18812                let tracked = TrackedEntry {
18813                    mode: entry_mode,
18814                    oid,
18815                };
18816                if context
18817                    .stat_cache
18818                    .is_some_and(|cache| cache.contains(&git_path))
18819                {
18820                    context.mark_tracked_present(&git_path);
18821                    if context.should_record_tracked_entry(&git_path, &tracked) {
18822                        context.entries.insert(git_path, tracked);
18823                    }
18824                } else {
18825                    context.entries.insert(git_path, tracked);
18826                }
18827                continue;
18828            };
18829            let oid = EncodedObject::new(ObjectType::Blob, body).object_id(context.format)?;
18830            let tracked = TrackedEntry {
18831                mode: entry_mode,
18832                oid,
18833            };
18834            if context
18835                .stat_cache
18836                .is_some_and(|cache| cache.contains(&git_path))
18837            {
18838                context.mark_tracked_present(&git_path);
18839                if context.should_record_tracked_entry(&git_path, &tracked) {
18840                    context.entries.insert(git_path, tracked);
18841                }
18842            } else {
18843                context.entries.insert(git_path, tracked);
18844            }
18845        }
18846    }
18847    Ok(())
18848}
18849
18850fn tracked_paths_may_contain(tracked_paths: &BTreeSet<Vec<u8>>, directory: &[u8]) -> bool {
18851    if tracked_paths.contains(directory) {
18852        return true;
18853    }
18854    let mut prefix = Vec::with_capacity(directory.len() + 1);
18855    prefix.extend_from_slice(directory);
18856    prefix.push(b'/');
18857    tracked_paths
18858        .range::<[u8], _>((
18859            std::ops::Bound::Included(prefix.as_slice()),
18860            std::ops::Bound::Unbounded,
18861        ))
18862        .next()
18863        .is_some_and(|path| path.starts_with(&prefix))
18864}
18865
18866fn is_same_path(left: &Path, right: &Path) -> bool {
18867    left == right
18868}
18869
18870/// Whether `path`'s final component is `.git`. Git never lists a `.git` entry at
18871/// any depth (a repository's own `.git`, a submodule gitlink file, or an embedded
18872/// repository's `.git` directory) as untracked content.
18873fn is_dot_git_entry(path: &Path) -> bool {
18874    path.file_name() == Some(std::ffi::OsStr::new(".git"))
18875}
18876
18877/// Whether `path` is a directory containing an embedded repository's `.git`
18878/// *directory*, or a `.git` file whose `gitdir:` pointer resolves to an
18879/// existing directory (a submodule worktree). Git treats both as a repository
18880/// boundary (listing the directory as `dir/`); an *invalid* `.git` file (no
18881/// resolvable `gitdir:` target) is not a boundary — Git descends into the
18882/// directory and lists its other untracked contents normally.
18883fn is_nested_repository_boundary(path: &Path, git_dir: &Path) -> bool {
18884    let dot_git = path.join(".git");
18885    if dot_git.is_dir() {
18886        if is_same_path(&dot_git, git_dir) {
18887            return false;
18888        }
18889        return true;
18890    }
18891    sley_diff_merge::gitlink_git_dir(path).is_some_and(|embedded| !is_same_path(&embedded, git_dir))
18892}
18893
18894fn active_repository_worktree_dir(path: &Path, git_dir: &Path) -> bool {
18895    sley_diff_merge::gitlink_git_dir(path).is_some_and(|embedded| is_same_path(&embedded, git_dir))
18896}
18897
18898/// Whether `path` is an embedded repository's `.git` directory or a path inside it.
18899fn is_embedded_git_internals(root: &Path, path: &Path) -> bool {
18900    let Ok(relative) = path.strip_prefix(root) else {
18901        return false;
18902    };
18903    let mut current = root.to_path_buf();
18904    for component in relative.components() {
18905        if matches!(component, std::path::Component::Normal(name) if name == ".git")
18906            && current != root
18907            && current.join(".git").is_dir()
18908        {
18909            return true;
18910        }
18911        current.push(component);
18912    }
18913    false
18914}
18915
18916fn worktree_entry_mode(metadata: &fs::Metadata) -> u32 {
18917    if metadata.file_type().is_symlink() {
18918        0o120000
18919    } else if metadata.is_dir() {
18920        0o040000
18921    } else {
18922        file_mode(metadata)
18923    }
18924}
18925
18926fn worktree_path(root: &Path, path: &[u8]) -> Result<PathBuf> {
18927    let text = std::str::from_utf8(path).map_err(|err| GitError::InvalidPath(err.to_string()))?;
18928    let relative = PathBuf::from(text);
18929    if relative.is_absolute()
18930        || relative.components().any(|component| {
18931            matches!(
18932                component,
18933                std::path::Component::ParentDir | std::path::Component::Prefix(_)
18934            )
18935        })
18936    {
18937        return Err(GitError::InvalidPath(format!(
18938            "invalid worktree path {text}"
18939        )));
18940    }
18941    Ok(root.join(relative))
18942}
18943
18944fn remove_worktree_file(root: &Path, path: &[u8]) -> Result<()> {
18945    let file = worktree_path(root, path)?;
18946    if !file.exists() {
18947        return Ok(());
18948    }
18949    if file.is_dir() {
18950        // A tracked path that is a directory on disk is a gitlink: upstream
18951        // checkout/reset never recurses into a submodule's working tree. It
18952        // rmdirs the path when empty (remove_scheduled_dirs) and leaves a
18953        // populated submodule in place.
18954        match fs::remove_dir(&file) {
18955            Ok(()) => prune_empty_parents(root, file.parent())?,
18956            Err(err) if err.kind() == std::io::ErrorKind::DirectoryNotEmpty => {}
18957            Err(err) => return Err(err.into()),
18958        }
18959        return Ok(());
18960    }
18961    fs::remove_file(&file)?;
18962    prune_empty_parents(root, file.parent())?;
18963    Ok(())
18964}
18965
18966fn prune_empty_parents(root: &Path, mut dir: Option<&Path>) -> Result<()> {
18967    while let Some(path) = dir {
18968        if path == root || path_is_original_cwd(path) {
18969            break;
18970        }
18971        match fs::remove_dir(path) {
18972            Ok(()) => dir = path.parent(),
18973            Err(err) if err.kind() == std::io::ErrorKind::NotFound => dir = path.parent(),
18974            Err(err) if err.kind() == std::io::ErrorKind::DirectoryNotEmpty => break,
18975            Err(err) => return Err(err.into()),
18976        }
18977    }
18978    Ok(())
18979}
18980
18981fn original_cwd_absolute() -> Option<PathBuf> {
18982    let cwd = sley_core::original_cwd().or_else(|| env::current_dir().ok())?;
18983    Some(fs::canonicalize(&cwd).unwrap_or(cwd))
18984}
18985
18986fn path_is_original_cwd(path: &Path) -> bool {
18987    let Some(cwd) = original_cwd_absolute() else {
18988        return false;
18989    };
18990    let path = fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf());
18991    path == cwd
18992}
18993
18994fn original_cwd_is_inside(path: &Path) -> bool {
18995    let Some(cwd) = original_cwd_absolute() else {
18996        return false;
18997    };
18998    let path = fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf());
18999    cwd == path || cwd.starts_with(&path)
19000}
19001
19002fn refuse_if_current_working_directory_becomes_file(
19003    worktree_root: &Path,
19004    target_entries: &BTreeMap<Vec<u8>, TrackedEntry>,
19005) -> Result<()> {
19006    for (path, entry) in target_entries {
19007        if sley_index::is_gitlink(entry.mode) || (entry.mode & 0o170000) == 0o040000 {
19008            continue;
19009        }
19010        let path = worktree_path(worktree_root, path)?;
19011        if path_is_original_cwd(&path)
19012            && fs::symlink_metadata(&path).is_ok_and(|metadata| metadata.is_dir())
19013        {
19014            return refuse_remove_current_working_directory(&path);
19015        }
19016    }
19017    Ok(())
19018}
19019
19020fn refuse_remove_current_working_directory(path: &Path) -> Result<()> {
19021    eprintln!(
19022        "error: Refusing to remove the current working directory:\n{}",
19023        path.display()
19024    );
19025    Err(GitError::Exit(128))
19026}
19027
19028fn git_tree_entry_cmp(
19029    left_name: &[u8],
19030    left_mode: u32,
19031    right_name: &[u8],
19032    right_mode: u32,
19033) -> Ordering {
19034    let shared = left_name.len().min(right_name.len());
19035    let name_order = left_name[..shared].cmp(&right_name[..shared]);
19036    if name_order != Ordering::Equal {
19037        return name_order;
19038    }
19039    let left_end = left_name.len() == shared;
19040    let right_end = right_name.len() == shared;
19041    match (left_end, right_end) {
19042        (true, true) => Ordering::Equal,
19043        (true, false) => tree_name_terminator(left_mode).cmp(&right_name[shared]),
19044        (false, true) => left_name[shared].cmp(&tree_name_terminator(right_mode)),
19045        (false, false) => Ordering::Equal,
19046    }
19047}
19048
19049fn tree_name_terminator(mode: u32) -> u8 {
19050    if mode == 0o040000 { b'/' } else { 0 }
19051}
19052
19053#[cfg(unix)]
19054fn file_mode(metadata: &fs::Metadata) -> u32 {
19055    use std::os::unix::fs::PermissionsExt;
19056    if metadata.permissions().mode() & 0o111 != 0 {
19057        0o100755
19058    } else {
19059        0o100644
19060    }
19061}
19062
19063#[cfg(not(unix))]
19064fn file_mode(_metadata: &fs::Metadata) -> u32 {
19065    0o100644
19066}
19067
19068/// The blob content git stores for a symlink: the raw bytes of the link target
19069/// exactly as `readlink(2)` returns them. On Unix the target is an opaque byte
19070/// string, so we take the `OsStr` bytes verbatim (no UTF-8 round-trip, no path
19071/// re-componentization that could rewrite separators).
19072#[cfg(unix)]
19073fn symlink_target_bytes(path: &Path) -> Result<Vec<u8>> {
19074    use std::os::unix::ffi::OsStrExt;
19075    let target = fs::read_link(path)?;
19076    Ok(target.as_os_str().as_bytes().to_vec())
19077}
19078
19079#[cfg(not(unix))]
19080fn symlink_target_bytes(path: &Path) -> Result<Vec<u8>> {
19081    let target = fs::read_link(path)?;
19082    // git normalizes symlink targets to forward slashes on platforms whose
19083    // native separator is `\`.
19084    Ok(target.to_string_lossy().replace('\\', "/").into_bytes())
19085}
19086
19087fn git_path_bytes(path: &Path) -> Result<Vec<u8>> {
19088    if path.components().any(|component| {
19089        matches!(
19090            component,
19091            std::path::Component::ParentDir | std::path::Component::Prefix(_)
19092        )
19093    }) {
19094        return Err(GitError::InvalidPath(format!(
19095            "invalid index path {}",
19096            path.display()
19097        )));
19098    }
19099    Ok(path
19100        .components()
19101        .filter_map(|component| match component {
19102            std::path::Component::Normal(value) => Some(value.to_string_lossy().into_owned()),
19103            _ => None,
19104        })
19105        .collect::<Vec<_>>()
19106        .join("/")
19107        .into_bytes())
19108}
19109
19110fn normalize_absolute_path_lexically(path: &Path) -> PathBuf {
19111    let mut normalized = PathBuf::new();
19112    for component in path.components() {
19113        match component {
19114            std::path::Component::CurDir => {}
19115            std::path::Component::ParentDir => {
19116                normalized.pop();
19117            }
19118            std::path::Component::Normal(_)
19119            | std::path::Component::RootDir
19120            | std::path::Component::Prefix(_) => normalized.push(component.as_os_str()),
19121        }
19122    }
19123    normalized
19124}
19125
19126fn absolute_path_lexically(path: &Path, cwd: &Path) -> PathBuf {
19127    if path.is_absolute() {
19128        normalize_absolute_path_lexically(path)
19129    } else {
19130        normalize_absolute_path_lexically(&cwd.join(path))
19131    }
19132}
19133
19134fn repo_path_to_os_path(path: &[u8]) -> Result<PathBuf> {
19135    #[cfg(unix)]
19136    {
19137        use std::os::unix::ffi::OsStrExt;
19138
19139        Ok(PathBuf::from(std::ffi::OsStr::from_bytes(path)))
19140    }
19141
19142    #[cfg(not(unix))]
19143    {
19144        let path = std::str::from_utf8(path)
19145            .map_err(|_| GitError::InvalidPath("index path is not utf8".into()))?;
19146        Ok(path.split('/').collect())
19147    }
19148}
19149
19150fn git_path_to_relative_path(path: &[u8]) -> Result<PathBuf> {
19151    let path = std::str::from_utf8(path)
19152        .map_err(|err| GitError::InvalidPath(format!("invalid utf-8 index path: {err}")))?;
19153    Ok(path.split('/').collect())
19154}
19155
19156fn path_has_trailing_separator(path: &Path) -> bool {
19157    path.as_os_str()
19158        .to_string_lossy()
19159        .ends_with(std::path::MAIN_SEPARATOR)
19160}
19161
19162#[cfg(test)]
19163mod tests {
19164    use super::*;
19165    use sley_odb::ObjectReader;
19166    use std::sync::atomic::{AtomicU64, Ordering};
19167
19168    static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
19169
19170    fn short_status(
19171        worktree_root: impl AsRef<Path>,
19172        git_dir: impl AsRef<Path>,
19173        format: ObjectFormat,
19174    ) -> Result<Vec<ShortStatusEntry>> {
19175        let mut entries = Vec::new();
19176        stream_short_status(worktree_root, git_dir, format, |entry| {
19177            entries.push(entry.to_owned_entry());
19178            Ok(StreamControl::Continue)
19179        })?;
19180        Ok(entries)
19181    }
19182
19183    #[test]
19184    fn atomic_metadata_writer_writes_and_reports_stat() {
19185        let root = temp_root();
19186        let path = root.join(".git").join("HEAD");
19187
19188        let result = write_metadata_file_atomic(
19189            &path,
19190            b"ref: refs/heads/main\n",
19191            AtomicMetadataWriteOptions::default(),
19192        )
19193        .expect("write metadata");
19194
19195        assert_eq!(
19196            fs::read(&path).expect("read metadata"),
19197            b"ref: refs/heads/main\n"
19198        );
19199        assert_eq!(result.path, path);
19200        assert_eq!(result.len, b"ref: refs/heads/main\n".len() as u64);
19201        assert!(result.mtime.is_some());
19202        assert!(!path.with_file_name("HEAD.lock").exists());
19203        fs::remove_dir_all(root).expect("test operation should succeed");
19204    }
19205
19206    #[test]
19207    fn atomic_metadata_writer_existing_lock_preserves_original() {
19208        let root = temp_root();
19209        let git_dir = root.join(".git");
19210        fs::create_dir_all(&git_dir).expect("create git dir");
19211        let path = git_dir.join("HEAD");
19212        let lock = git_dir.join("HEAD.lock");
19213        fs::write(&path, b"ref: refs/heads/main\n").expect("write original");
19214        fs::write(&lock, b"held\n").expect("write lock");
19215
19216        let err = write_metadata_file_atomic(
19217            &path,
19218            b"ref: refs/heads/other\n",
19219            AtomicMetadataWriteOptions::default(),
19220        )
19221        .expect_err("held lock must fail");
19222
19223        assert!(matches!(err, GitError::Transaction(_)));
19224        assert_eq!(
19225            fs::read(&path).expect("read original"),
19226            b"ref: refs/heads/main\n"
19227        );
19228        assert_eq!(fs::read(&lock).expect("read lock"), b"held\n");
19229        fs::remove_dir_all(root).expect("test operation should succeed");
19230    }
19231
19232    // --- `ls-files --eol` stat/attr helpers (mirror convert.c) ---------------
19233
19234    #[test]
19235    fn convert_stats_ascii_classifies_eol_content() {
19236        assert_eq!(convert_stats_ascii(b""), "none");
19237        assert_eq!(convert_stats_ascii(b"abc"), "none");
19238        assert_eq!(convert_stats_ascii(b"a\nb\n"), "lf");
19239        assert_eq!(convert_stats_ascii(b"a\r\nb\r\n"), "crlf");
19240        assert_eq!(convert_stats_ascii(b"a\r\nb\n"), "mixed");
19241        // A lone CR makes the content binary (-text), matching git.
19242        assert_eq!(convert_stats_ascii(b"a\rb"), "-text");
19243        // A NUL byte is binary.
19244        assert_eq!(convert_stats_ascii(b"a\0b\n"), "-text");
19245        // A trailing ^Z (EOF) is not counted as non-printable.
19246        assert_eq!(convert_stats_ascii(b"abc\n\x1a"), "lf");
19247    }
19248
19249    fn attr_check(name: &[u8], state: Option<AttributeState>) -> AttributeCheck {
19250        AttributeCheck {
19251            attribute: name.to_vec(),
19252            state,
19253        }
19254    }
19255
19256    #[test]
19257    fn convert_attr_ascii_matches_git_attr_action() {
19258        // No attributes at all: empty attr field.
19259        assert_eq!(convert_attr_ascii(&[]), "");
19260        // text (set) -> "text"; -text (unset) -> "-text".
19261        assert_eq!(
19262            convert_attr_ascii(&[attr_check(b"text", Some(AttributeState::Set))]),
19263            "text"
19264        );
19265        assert_eq!(
19266            convert_attr_ascii(&[attr_check(b"text", Some(AttributeState::Unset))]),
19267            "-text"
19268        );
19269        // text=auto -> "text=auto"; with eol=crlf/lf the AUTO variants.
19270        assert_eq!(
19271            convert_attr_ascii(&[attr_check(
19272                b"text",
19273                Some(AttributeState::Value(b"auto".to_vec()))
19274            )]),
19275            "text=auto"
19276        );
19277        assert_eq!(
19278            convert_attr_ascii(&[
19279                attr_check(b"text", Some(AttributeState::Value(b"auto".to_vec()))),
19280                attr_check(b"eol", Some(AttributeState::Value(b"crlf".to_vec()))),
19281            ]),
19282            "text=auto eol=crlf"
19283        );
19284        assert_eq!(
19285            convert_attr_ascii(&[
19286                attr_check(b"text", Some(AttributeState::Value(b"auto".to_vec()))),
19287                attr_check(b"eol", Some(AttributeState::Value(b"lf".to_vec()))),
19288            ]),
19289            "text=auto eol=lf"
19290        );
19291        // eol=crlf/lf alone (no text) forces text + the eol direction.
19292        assert_eq!(
19293            convert_attr_ascii(&[attr_check(
19294                b"eol",
19295                Some(AttributeState::Value(b"crlf".to_vec()))
19296            )]),
19297            "text eol=crlf"
19298        );
19299        assert_eq!(
19300            convert_attr_ascii(&[attr_check(
19301                b"eol",
19302                Some(AttributeState::Value(b"lf".to_vec()))
19303            )]),
19304            "text eol=lf"
19305        );
19306        // -text overrides any eol attribute (binary wins).
19307        assert_eq!(
19308            convert_attr_ascii(&[
19309                attr_check(b"text", Some(AttributeState::Unset)),
19310                attr_check(b"eol", Some(AttributeState::Value(b"crlf".to_vec()))),
19311            ]),
19312            "-text"
19313        );
19314    }
19315
19316    #[test]
19317    fn smudge_safety_guard_skips_irreversible_autocrlf() {
19318        // text=auto eol=crlf (AUTO_CRLF): convert pure-LF, but leave content
19319        // alone when it already has a CR or CRLF, or is binary.
19320        let auto = ContentFilterPlan {
19321            text: TextDecision::Auto,
19322            eol: EolConversion::Crlf,
19323            ident: false,
19324            driver: None,
19325        };
19326        assert!(auto.will_convert_lf_to_crlf(b"a\nb\n"));
19327        assert!(!auto.will_convert_lf_to_crlf(b"a\r\nb\n")); // has CRLF
19328        assert!(!auto.will_convert_lf_to_crlf(b"a\nb\rc")); // lone CR (binary)
19329        assert!(!auto.will_convert_lf_to_crlf(b"abc")); // no naked LF
19330
19331        // text eol=crlf (TEXT_CRLF): no safety guard — always convert naked LF
19332        // even when a CR/CRLF is already present.
19333        let text = ContentFilterPlan {
19334            text: TextDecision::Text,
19335            eol: EolConversion::Crlf,
19336            ident: false,
19337            driver: None,
19338        };
19339        assert!(text.will_convert_lf_to_crlf(b"a\r\nb\nc\n"));
19340        assert!(!text.will_convert_lf_to_crlf(b"a\r\nb\r\n")); // no naked LF
19341    }
19342
19343    /// Build an in-memory ignore matcher from raw `.gitignore` lines (no disk).
19344    fn ignore_matcher(patterns: &[&[u8]]) -> IgnoreMatcher {
19345        let mut matcher = IgnoreMatcher::default();
19346        let owned: Vec<Vec<u8>> = patterns.iter().map(|p| p.to_vec()).collect();
19347        matcher.extend_patterns(&owned);
19348        matcher
19349    }
19350
19351    #[test]
19352    fn ignore_match_kind_fast_paths_match_the_wildcard_engine() {
19353        // Literal: exact basename anywhere; not a superstring.
19354        let matcher = ignore_matcher(&[b"Pods"]);
19355        assert!(matcher.is_ignored(b"a/b/Pods", true));
19356        assert!(matcher.is_ignored(b"Pods", false));
19357        assert!(!matcher.is_ignored(b"Pods_not", false));
19358        assert!(matches!(
19359            classify_ignore_pattern(b"Pods"),
19360            MatchKind::Literal
19361        ));
19362
19363        // Suffix `*.log`: basename ending in `.log` at any depth.
19364        let matcher = ignore_matcher(&[b"*.log"]);
19365        assert!(matcher.is_ignored(b"x.log", false));
19366        assert!(matcher.is_ignored(b"a/b/x.log", false));
19367        assert!(matcher.is_ignored(b".log", false));
19368        assert!(!matcher.is_ignored(b"x.logx", false));
19369        assert!(matches!(
19370            classify_ignore_pattern(b"*.log"),
19371            MatchKind::Suffix
19372        ));
19373
19374        // Prefix `build*`: basename starting with `build`.
19375        let matcher = ignore_matcher(&[b"build*"]);
19376        assert!(matcher.is_ignored(b"buildfoo", false));
19377        assert!(matcher.is_ignored(b"a/build", false));
19378        assert!(!matcher.is_ignored(b"xbuild", false));
19379        assert!(matches!(
19380            classify_ignore_pattern(b"build*"),
19381            MatchKind::Prefix
19382        ));
19383    }
19384
19385    #[test]
19386    fn ignore_anchored_suffix_does_not_cross_slash() {
19387        // `/*.log` is anchored: matches `.log` files only at the matcher base,
19388        // never in a subdirectory — the slash guard in `match_segment`.
19389        let matcher = ignore_matcher(&[b"/*.log"]);
19390        assert!(matcher.is_ignored(b"x.log", false));
19391        assert!(!matcher.is_ignored(b"sub/x.log", false));
19392
19393        // Anchored literal likewise only matches at root.
19394        let matcher = ignore_matcher(&[b"/foo"]);
19395        assert!(matcher.is_ignored(b"foo", false));
19396        assert!(!matcher.is_ignored(b"a/foo", false));
19397    }
19398
19399    #[test]
19400    fn ignore_anchored_directory_glob_matches_root_directory() {
19401        let matcher = ignore_matcher(&[b"/tmp-*/"]);
19402        assert!(matcher.is_ignored(b"tmp-info-only", true));
19403        assert!(matcher.is_ignored(b"tmp-info-only/file.txt", false));
19404        assert!(!matcher.is_ignored(b"nested/tmp-info-only", true));
19405        assert!(!matcher.is_ignored(b"tmp-info-only", false));
19406    }
19407
19408    #[test]
19409    fn ignore_negated_directory_glob_does_not_reinclude_files() {
19410        // t0008-ignores "directories and ** matches": a negated directory-only
19411        // pattern re-includes *directories* but never the *files* inside them
19412        // (git: re-including a dir with `!dir/` still needs an explicit
19413        // `!dir/*` to reach its files). Verified against git 2.54 check-ignore:
19414        //   data/file              -> data/**           (ignored)
19415        //   data/data1/file1       -> data/**           (ignored, NOT !data/**/)
19416        //   data/data1/file1.txt   -> !data/**/*.txt    (re-included)
19417        //   data/data1   (dir)     -> !data/**/         (re-included)
19418        let matcher = ignore_matcher(&[b"data/**", b"!data/**/", b"!data/**/*.txt"]);
19419        // Files stay ignored: `!data/**/` must not win the file leaf scan.
19420        assert!(matcher.is_ignored(b"data/file", false));
19421        assert!(matcher.is_ignored(b"data/data1/file1", false));
19422        assert!(matcher.is_ignored(b"data/data2/file2", false));
19423        // `.txt` files are re-included by the explicit non-dir negation.
19424        assert!(!matcher.is_ignored(b"data/data1/file1.txt", false));
19425        assert!(!matcher.is_ignored(b"data/data2/file2.txt", false));
19426        // Directories ARE re-included by `!data/**/` (the directory-glob gain
19427        // from `fix: match git status ignored directory globs`).
19428        assert!(!matcher.is_ignored(b"data/data1", true));
19429        assert!(!matcher.is_ignored(b"data/data2", true));
19430    }
19431
19432    #[test]
19433    fn ignore_double_star_prefix_collapses_to_basename() {
19434        // `**/X` ≡ `X` for slash-free X (verified against `git check-ignore`).
19435        let matcher = ignore_matcher(&[b"**/Pods"]);
19436        assert!(matcher.is_ignored(b"a/b/Pods", true));
19437        assert!(matcher.is_ignored(b"Pods", true));
19438        assert!(!matcher.is_ignored(b"Pods_not", false));
19439
19440        let matcher = ignore_matcher(&[b"**/*.jks"]);
19441        assert!(matcher.is_ignored(b"x.jks", false));
19442        assert!(matcher.is_ignored(b"a/deep/y.jks", false));
19443        assert!(!matcher.is_ignored(b"x.jksx", false));
19444
19445        // `**/A/B` keeps a slash in the tail, so it stays a real glob and must
19446        // match the trailing path at any depth.
19447        let matcher = ignore_matcher(&[b"**/Flutter/ephemeral"]);
19448        assert!(matcher.is_ignored(b"Flutter/ephemeral", true));
19449        assert!(matcher.is_ignored(b"a/Flutter/ephemeral", true));
19450        assert!(!matcher.is_ignored(b"Flutter/other", true));
19451        assert!(matches!(
19452            classify_ignore_pattern(b"**/Flutter/ephemeral"),
19453            MatchKind::PathSuffix
19454        ));
19455    }
19456
19457    #[test]
19458    fn ignore_slash_glob_literal_basename_bucket_preserves_matches() {
19459        let matcher = ignore_matcher(&[b"**/android/**/GeneratedPluginRegistrant.java"]);
19460        assert!(
19461            matcher
19462                .buckets
19463                .glob_path_literal_basename
19464                .contains_key(b"GeneratedPluginRegistrant.java".as_slice())
19465        );
19466        assert!(matcher.is_ignored(
19467            b"packages/app/android/src/GeneratedPluginRegistrant.java",
19468            false
19469        ));
19470        assert!(matcher.is_ignored(
19471            b"android/app/src/main/java/io/flutter/GeneratedPluginRegistrant.java",
19472            false
19473        ));
19474        assert!(!matcher.is_ignored(b"android/app/src/main/java/io/flutter/Other.java", false));
19475
19476        let matcher = ignore_matcher(&[b"**/ios/**/Pods/"]);
19477        assert!(
19478            matcher
19479                .buckets
19480                .glob_directory_literal_basename
19481                .contains_key(b"Pods".as_slice())
19482        );
19483        assert!(matcher.is_ignored(b"ios/Runner/Pods", true));
19484        assert!(matcher.is_ignored(b"dev/app/ios/Runner/Pods/Manifest.lock", false));
19485        assert!(!matcher.is_ignored(b"dev/app/ios/Runner/Podfile", false));
19486
19487        let matcher = ignore_matcher(&[b"**/ios/**/*.mode1v3"]);
19488        assert!(
19489            !matcher.buckets.glob_path_suffix_basename.is_empty(),
19490            "suffix-final slash glob should be prefiltered by basename suffix"
19491        );
19492        assert!(matcher.is_ignored(b"apps/ios/Runner/default.mode1v3", false));
19493        assert!(!matcher.is_ignored(b"apps/ios/Runner/default.mode2v3", false));
19494
19495        let matcher = ignore_matcher(&[b"**/ios/Runner/GeneratedPluginRegistrant.*"]);
19496        assert!(
19497            !matcher.buckets.glob_path_prefix_basename.is_empty(),
19498            "prefix-final slash glob should be prefiltered by basename prefix"
19499        );
19500        assert!(matcher.is_ignored(b"apps/ios/Runner/GeneratedPluginRegistrant.swift", false));
19501        assert!(!matcher.is_ignored(
19502            b"apps/ios/Runner/OtherGeneratedPluginRegistrant.swift",
19503            false
19504        ));
19505
19506        let matcher = ignore_matcher(&[b"ios/Scenarios/*.framework/"]);
19507        assert!(
19508            !matcher.buckets.glob_directory_suffix_basename.is_empty(),
19509            "directory suffix-final slash glob should be prefiltered by directory component"
19510        );
19511        assert!(matcher.is_ignored(b"ios/Scenarios/App.framework", true));
19512        assert!(matcher.is_ignored(b"ios/Scenarios/App.framework/Info.plist", false));
19513        assert!(!matcher.is_ignored(b"ios/Scenarios/App.xcframework/Info.plist", false));
19514    }
19515
19516    #[test]
19517    fn ignore_complex_globs_still_use_the_engine() {
19518        let matcher = ignore_matcher(&[b"*.[Cc]ache"]);
19519        assert!(matcher.is_ignored(b"x.cache", false));
19520        assert!(matcher.is_ignored(b"x.Cache", false));
19521        assert!(!matcher.is_ignored(b"x.xache", false));
19522        assert!(matches!(
19523            classify_ignore_pattern(b"*.[Cc]ache"),
19524            MatchKind::Glob
19525        ));
19526
19527        let matcher = ignore_matcher(&[b"Icon?"]);
19528        assert!(matcher.is_ignored(b"IconA", false));
19529        assert!(!matcher.is_ignored(b"Icon", false));
19530        assert!(!matcher.is_ignored(b"IconAB", false));
19531
19532        // Multi-star is not a simple prefix/suffix.
19533        assert!(matches!(
19534            classify_ignore_pattern(b"app.*.symbols"),
19535            MatchKind::Glob
19536        ));
19537        assert!(matches!(classify_ignore_pattern(b"a*b*c"), MatchKind::Glob));
19538
19539        let matcher = ignore_matcher(&[b".vscode/*", b"dev/devicelab/ABresults*.json"]);
19540        assert!(matcher.is_ignored(b".vscode/settings.json", false));
19541        assert!(!matcher.is_ignored(b"pkg/.vscode/settings.json", false));
19542        assert!(matcher.is_ignored(b"dev/devicelab/ABresults-1.json", false));
19543        assert!(!matcher.is_ignored(b"dev/devicelab/results-1.json", false));
19544    }
19545
19546    #[test]
19547    fn ignore_negation_still_applies_after_fast_paths() {
19548        // Last match wins: a negated literal un-ignores a suffix-matched file.
19549        let matcher = ignore_matcher(&[b"*.log", b"!keep.log"]);
19550        assert!(matcher.is_ignored(b"a/x.log", false));
19551        assert!(!matcher.is_ignored(b"a/keep.log", false));
19552    }
19553
19554    #[test]
19555    fn read_expected_object_missing_blob_exposes_oid_and_kind() {
19556        let root = temp_root();
19557        let git_dir = root.join(".git");
19558        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
19559        let db = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
19560        let missing = ObjectId::empty_blob(ObjectFormat::Sha1);
19561
19562        let err = read_expected_object(&db, &missing, ObjectType::Blob)
19563            .expect_err("missing blob should error");
19564        let kind = err.not_found_kind().expect("typed not found");
19565        assert_eq!(kind.object_id(), Some(missing));
19566        assert_eq!(kind.missing_object_kind(), Some(MissingObjectKind::Blob));
19567        assert_eq!(
19568            kind.missing_object_context(),
19569            Some(MissingObjectContext::WorktreeMaterialize)
19570        );
19571        fs::remove_dir_all(root).expect("test operation should succeed");
19572    }
19573
19574    #[test]
19575    fn update_index_adds_file_entry_and_blob() {
19576        let root = temp_root();
19577        let git_dir = root.join(".git");
19578        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
19579        fs::write(root.join("hello.txt"), b"hello\n").expect("test operation should succeed");
19580        let result = add_paths_to_index(
19581            &root,
19582            &git_dir,
19583            ObjectFormat::Sha1,
19584            &[PathBuf::from("hello.txt")],
19585        )
19586        .expect("test operation should succeed");
19587        assert_eq!(result.entries, 1);
19588        let index = Index::parse_v2_sha1(
19589            &fs::read(repository_index_path(git_dir)).expect("test operation should succeed"),
19590        )
19591        .expect("test operation should succeed");
19592        assert_eq!(index.entries[0].path, b"hello.txt");
19593        fs::remove_dir_all(root).expect("test operation should succeed");
19594    }
19595
19596    #[test]
19597    fn update_index_and_write_tree_support_sha256() {
19598        let root = temp_root();
19599        let git_dir = root.join(".git");
19600        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
19601        fs::write(root.join("hello.txt"), b"hello\n").expect("test operation should succeed");
19602        let result = add_paths_to_index(
19603            &root,
19604            &git_dir,
19605            ObjectFormat::Sha256,
19606            &[PathBuf::from("hello.txt")],
19607        )
19608        .expect("test operation should succeed");
19609        assert_eq!(result.entries, 1);
19610
19611        let index = Index::parse(
19612            &fs::read(repository_index_path(&git_dir)).expect("test operation should succeed"),
19613            ObjectFormat::Sha256,
19614        )
19615        .expect("test operation should succeed");
19616        assert_eq!(index.entries[0].path, b"hello.txt");
19617        assert_eq!(index.entries[0].oid.format(), ObjectFormat::Sha256);
19618
19619        let tree_oid = write_tree_from_index(&git_dir, ObjectFormat::Sha256)
19620            .expect("test operation should succeed");
19621        assert_eq!(tree_oid.format(), ObjectFormat::Sha256);
19622        let odb = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha256);
19623        let tree = odb
19624            .read_object(&tree_oid)
19625            .expect("test operation should succeed");
19626        assert_eq!(tree.object_type, ObjectType::Tree);
19627        fs::remove_dir_all(root).expect("test operation should succeed");
19628    }
19629
19630    #[test]
19631    fn write_tree_from_index_writes_nested_tree_objects() {
19632        let root = temp_root();
19633        let git_dir = root.join(".git");
19634        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
19635        fs::create_dir_all(root.join("src")).expect("test operation should succeed");
19636        fs::write(root.join("README.md"), b"readme\n").expect("test operation should succeed");
19637        fs::write(root.join("src").join("lib.rs"), b"pub fn demo() {}\n")
19638            .expect("test operation should succeed");
19639        let result = add_paths_to_index(
19640            &root,
19641            &git_dir,
19642            ObjectFormat::Sha1,
19643            &[PathBuf::from("README.md"), PathBuf::from("src/lib.rs")],
19644        )
19645        .expect("test operation should succeed");
19646        assert_eq!(result.entries, 2);
19647        let tree_oid = write_tree_from_index(&git_dir, ObjectFormat::Sha1)
19648            .expect("test operation should succeed");
19649        let odb = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
19650        let tree = odb
19651            .read_object(&tree_oid)
19652            .expect("test operation should succeed");
19653        assert_eq!(tree.object_type, ObjectType::Tree);
19654        fs::remove_dir_all(root).expect("test operation should succeed");
19655    }
19656
19657    #[test]
19658    fn write_tree_from_index_expands_empty_primary_split_index() {
19659        let root = temp_root();
19660        let git_dir = root.join(".git");
19661        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
19662        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
19663        add_paths_to_index(&root, &git_dir, ObjectFormat::Sha1, &[PathBuf::from("f.txt")])
19664            .expect("test operation should succeed");
19665        let expected = write_tree_from_index(&git_dir, ObjectFormat::Sha1)
19666            .expect("test operation should succeed");
19667
19668        enable_split_index(&git_dir, ObjectFormat::Sha1).expect("test operation should succeed");
19669        let primary = read_index(&git_dir);
19670        assert!(
19671            primary.entries.is_empty(),
19672            "fixture should put all entries in the shared index"
19673        );
19674        assert!(
19675            primary
19676                .split_index_link(ObjectFormat::Sha1)
19677                .expect("test operation should succeed")
19678                .is_some(),
19679            "fixture should write a split-index link extension"
19680        );
19681
19682        let actual = write_tree_from_index(&git_dir, ObjectFormat::Sha1)
19683            .expect("test operation should succeed");
19684        assert_eq!(actual, expected);
19685
19686        fs::remove_dir_all(root).expect("test operation should succeed");
19687    }
19688
19689    #[test]
19690    fn short_status_reports_added_and_untracked_paths() {
19691        let root = temp_root();
19692        let git_dir = root.join(".git");
19693        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
19694        fs::write(root.join("hello.txt"), b"hello\n").expect("test operation should succeed");
19695        fs::write(root.join("extra.txt"), b"extra\n").expect("test operation should succeed");
19696        add_paths_to_index(
19697            &root,
19698            &git_dir,
19699            ObjectFormat::Sha1,
19700            &[PathBuf::from("hello.txt")],
19701        )
19702        .expect("test operation should succeed");
19703        let status = short_status(&root, &git_dir, ObjectFormat::Sha1)
19704            .expect("test operation should succeed");
19705        assert_eq!(
19706            status
19707                .iter()
19708                .map(ShortStatusEntry::line)
19709                .collect::<Vec<_>>(),
19710            vec!["A  hello.txt", "?? extra.txt"]
19711        );
19712        fs::remove_dir_all(root).expect("test operation should succeed");
19713    }
19714
19715    #[test]
19716    fn worktree_root_is_none_for_bare_repository() {
19717        // A bare git_dir (basename `.git`) with `core.bare = true` must resolve to
19718        // `Ok(None)` rather than falling through to the "parent of .git" case.
19719        let root = temp_root();
19720        let git_dir = root.join(".git");
19721        fs::create_dir_all(&git_dir).expect("create bare git dir");
19722        // Hermetic minimal config — do not depend on host gitconfig.
19723        fs::write(git_dir.join("config"), b"[core]\n\tbare = true\n").expect("write bare config");
19724
19725        assert_eq!(
19726            worktree_root_for_git_dir(&git_dir).expect("resolve bare worktree root"),
19727            None,
19728            "a bare repository has no working tree"
19729        );
19730
19731        fs::remove_dir_all(root).expect("test operation should succeed");
19732    }
19733
19734    #[test]
19735    fn worktree_root_is_parent_for_non_bare_dot_git() {
19736        // A non-bare `.git` directory (no core.bare / core.bare = false) still
19737        // resolves to its parent — the ordinary non-bare layout.
19738        let root = temp_root();
19739        let work = root.join("work");
19740        let git_dir = work.join(".git");
19741        fs::create_dir_all(&git_dir).expect("create non-bare git dir");
19742        fs::write(git_dir.join("config"), b"[core]\n\tbare = false\n")
19743            .expect("write non-bare config");
19744
19745        assert_eq!(
19746            worktree_root_for_git_dir(&git_dir).expect("resolve non-bare worktree root"),
19747            Some(work.clone()),
19748            "a non-bare .git dir resolves to its parent"
19749        );
19750
19751        fs::remove_dir_all(root).expect("test operation should succeed");
19752    }
19753
19754    fn temp_root() -> PathBuf {
19755        let path = std::env::temp_dir().join(format!(
19756            "sley-worktree-{}-{}",
19757            std::process::id(),
19758            TEMP_COUNTER.fetch_add(1, Ordering::Relaxed)
19759        ));
19760        fs::create_dir_all(&path).expect("test operation should succeed");
19761        path
19762    }
19763
19764    fn index_entry_for<'a>(index: &'a Index, path: &[u8]) -> &'a IndexEntry {
19765        index
19766            .entries
19767            .iter()
19768            .find(|entry| entry.path == path)
19769            .unwrap_or_else(|| panic!("missing index entry for {}", String::from_utf8_lossy(path)))
19770    }
19771
19772    fn read_index(git_dir: &Path) -> Index {
19773        Index::parse(
19774            &fs::read(repository_index_path(git_dir)).expect("test operation should succeed"),
19775            ObjectFormat::Sha1,
19776        )
19777        .expect("test operation should succeed")
19778    }
19779
19780    /// Stages `paths` from the worktree, writes their tree, wraps it in a commit
19781    /// object, and points `refs/heads/main` + `HEAD` at it. Returns the commit
19782    /// id. After this call the index reflects the committed tree.
19783    fn build_commit(root: &Path, git_dir: &Path, paths: &[&str]) -> ObjectId {
19784        let path_bufs = paths.iter().map(PathBuf::from).collect::<Vec<_>>();
19785        add_paths_to_index(root, git_dir, ObjectFormat::Sha1, &path_bufs)
19786            .expect("test operation should succeed");
19787        let tree = write_tree_from_index(git_dir, ObjectFormat::Sha1)
19788            .expect("test operation should succeed");
19789        let mut body = Vec::new();
19790        body.extend_from_slice(format!("tree {tree}\n").as_bytes());
19791        body.extend_from_slice(b"author Test <test@example.com> 0 +0000\n");
19792        body.extend_from_slice(b"committer Test <test@example.com> 0 +0000\n");
19793        body.extend_from_slice(b"\n");
19794        body.extend_from_slice(b"sparse fixture\n");
19795        let odb = FileObjectDatabase::from_git_dir(git_dir, ObjectFormat::Sha1);
19796        let commit = odb
19797            .write_object(EncodedObject::new(ObjectType::Commit, body))
19798            .expect("test operation should succeed");
19799        let refs = FileRefStore::new(git_dir, ObjectFormat::Sha1);
19800        let mut tx = refs.transaction();
19801        tx.update(RefUpdate {
19802            name: "refs/heads/main".into(),
19803            expected: None,
19804            new: RefTarget::Direct(commit),
19805            reflog: None,
19806        });
19807        tx.update(RefUpdate {
19808            name: "HEAD".into(),
19809            expected: None,
19810            new: RefTarget::Symbolic("refs/heads/main".into()),
19811            reflog: None,
19812        });
19813        tx.commit().expect("test operation should succeed");
19814        commit
19815    }
19816
19817    fn full_sparse(patterns: &[&[u8]]) -> SparseCheckout {
19818        SparseCheckout {
19819            patterns: patterns.iter().map(|pattern| pattern.to_vec()).collect(),
19820            sparse_index: false,
19821        }
19822    }
19823
19824    #[test]
19825    fn apply_sparse_checkout_full_mode_skips_out_of_cone_paths() {
19826        let root = temp_root();
19827        let git_dir = root.join(".git");
19828        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
19829        fs::create_dir_all(root.join("in")).expect("test operation should succeed");
19830        fs::create_dir_all(root.join("out")).expect("test operation should succeed");
19831        fs::write(root.join("in").join("keep.txt"), b"keep\n")
19832            .expect("test operation should succeed");
19833        fs::write(root.join("out").join("drop.txt"), b"drop\n")
19834            .expect("test operation should succeed");
19835        fs::write(root.join("top.txt"), b"top\n").expect("test operation should succeed");
19836        build_commit(&root, &git_dir, &["in/keep.txt", "out/drop.txt", "top.txt"]);
19837
19838        // Full (non-cone) pattern: keep only the `in/` subtree.
19839        let sparse = full_sparse(&[b"/in/"]);
19840        let result = apply_sparse_checkout_with_mode(
19841            &root,
19842            &git_dir,
19843            ObjectFormat::Sha1,
19844            &sparse,
19845            SparseCheckoutMode::Full,
19846        )
19847        .expect("test operation should succeed");
19848
19849        assert!(root.join("in").join("keep.txt").exists());
19850        assert!(!root.join("out").join("drop.txt").exists());
19851        assert!(!root.join("top.txt").exists());
19852        assert!(result.materialized.contains(&b"in/keep.txt".to_vec()));
19853        assert!(result.skipped.contains(&b"out/drop.txt".to_vec()));
19854        assert!(result.skipped.contains(&b"top.txt".to_vec()));
19855
19856        let index = read_index(&git_dir);
19857        assert!(!index_entry_skip_worktree(index_entry_for(
19858            &index,
19859            b"in/keep.txt"
19860        )));
19861        assert!(index_entry_skip_worktree(index_entry_for(
19862            &index,
19863            b"out/drop.txt"
19864        )));
19865        assert!(index_entry_skip_worktree(index_entry_for(
19866            &index, b"top.txt"
19867        )));
19868        // Out-of-cone entries are preserved in the index, just not on disk.
19869        assert_eq!(index.entries.len(), 3);
19870        fs::remove_dir_all(root).expect("test operation should succeed");
19871    }
19872
19873    #[test]
19874    fn apply_sparse_checkout_toggle_rematerializes() {
19875        let root = temp_root();
19876        let git_dir = root.join(".git");
19877        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
19878        fs::create_dir_all(root.join("a")).expect("test operation should succeed");
19879        fs::create_dir_all(root.join("b")).expect("test operation should succeed");
19880        fs::write(root.join("a").join("file.txt"), b"a\n").expect("test operation should succeed");
19881        fs::write(root.join("b").join("file.txt"), b"b\n").expect("test operation should succeed");
19882        build_commit(&root, &git_dir, &["a/file.txt", "b/file.txt"]);
19883
19884        // First narrow to `a/`.
19885        apply_sparse_checkout_with_mode(
19886            &root,
19887            &git_dir,
19888            ObjectFormat::Sha1,
19889            &full_sparse(&[b"/a/"]),
19890            SparseCheckoutMode::Full,
19891        )
19892        .expect("test operation should succeed");
19893        assert!(root.join("a").join("file.txt").exists());
19894        assert!(!root.join("b").join("file.txt").exists());
19895        let index = read_index(&git_dir);
19896        assert!(index_entry_skip_worktree(index_entry_for(
19897            &index,
19898            b"b/file.txt"
19899        )));
19900
19901        // Now switch the cone to `b/`: `a/` must leave, `b/` must come back with
19902        // the correct content, and the skip-worktree bits must flip.
19903        apply_sparse_checkout_with_mode(
19904            &root,
19905            &git_dir,
19906            ObjectFormat::Sha1,
19907            &full_sparse(&[b"/b/"]),
19908            SparseCheckoutMode::Full,
19909        )
19910        .expect("test operation should succeed");
19911        assert!(!root.join("a").join("file.txt").exists());
19912        assert!(root.join("b").join("file.txt").exists());
19913        assert_eq!(
19914            fs::read(root.join("b").join("file.txt")).expect("test operation should succeed"),
19915            b"b\n"
19916        );
19917        let index = read_index(&git_dir);
19918        assert!(index_entry_skip_worktree(index_entry_for(
19919            &index,
19920            b"a/file.txt"
19921        )));
19922        assert!(!index_entry_skip_worktree(index_entry_for(
19923            &index,
19924            b"b/file.txt"
19925        )));
19926        fs::remove_dir_all(root).expect("test operation should succeed");
19927    }
19928
19929    #[test]
19930    fn apply_sparse_checkout_cone_mode_matches_directory_prefixes() {
19931        let root = temp_root();
19932        let git_dir = root.join(".git");
19933        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
19934        fs::create_dir_all(root.join("kept").join("nested"))
19935            .expect("test operation should succeed");
19936        fs::create_dir_all(root.join("other")).expect("test operation should succeed");
19937        fs::write(root.join("kept").join("a.txt"), b"a\n").expect("test operation should succeed");
19938        fs::write(root.join("kept").join("nested").join("b.txt"), b"b\n")
19939            .expect("test operation should succeed");
19940        fs::write(root.join("other").join("c.txt"), b"c\n").expect("test operation should succeed");
19941        fs::write(root.join("root.txt"), b"r\n").expect("test operation should succeed");
19942        build_commit(
19943            &root,
19944            &git_dir,
19945            &["kept/a.txt", "kept/nested/b.txt", "other/c.txt", "root.txt"],
19946        );
19947
19948        // Standard cone patterns: top-level files plus the whole `kept/` tree.
19949        let sparse = SparseCheckout {
19950            patterns: vec![b"/*".to_vec(), b"!/*/".to_vec(), b"/kept/".to_vec()],
19951            sparse_index: false,
19952        };
19953        // Auto mode should detect cone shape on its own.
19954        assert!(patterns_are_cone(&sparse.patterns));
19955        apply_sparse_checkout(&root, &git_dir, ObjectFormat::Sha1, &sparse)
19956            .expect("test operation should succeed");
19957
19958        assert!(root.join("root.txt").exists());
19959        assert!(root.join("kept").join("a.txt").exists());
19960        assert!(root.join("kept").join("nested").join("b.txt").exists());
19961        assert!(!root.join("other").join("c.txt").exists());
19962
19963        let index = read_index(&git_dir);
19964        assert!(!index_entry_skip_worktree(index_entry_for(
19965            &index,
19966            b"root.txt"
19967        )));
19968        assert!(!index_entry_skip_worktree(index_entry_for(
19969            &index,
19970            b"kept/a.txt"
19971        )));
19972        assert!(!index_entry_skip_worktree(index_entry_for(
19973            &index,
19974            b"kept/nested/b.txt"
19975        )));
19976        assert!(index_entry_skip_worktree(index_entry_for(
19977            &index,
19978            b"other/c.txt"
19979        )));
19980        fs::remove_dir_all(root).expect("test operation should succeed");
19981    }
19982
19983    #[test]
19984    fn apply_sparse_checkout_cone_parent_guards_keep_only_direct_files() {
19985        let sparse = SparseCheckout {
19986            patterns: vec![
19987                b"/*".to_vec(),
19988                b"!/*/".to_vec(),
19989                b"/deep/".to_vec(),
19990                b"!/deep/*/".to_vec(),
19991                b"/deep/kept/".to_vec(),
19992            ],
19993            sparse_index: false,
19994        };
19995
19996        assert!(path_in_sparse_checkout(
19997            b"deep/file.txt",
19998            &sparse,
19999            SparseCheckoutMode::Cone
20000        ));
20001        assert!(path_in_sparse_checkout(
20002            b"deep/kept/file.txt",
20003            &sparse,
20004            SparseCheckoutMode::Cone
20005        ));
20006        assert!(!path_in_sparse_checkout(
20007            b"deep/dropped/file.txt",
20008            &sparse,
20009            SparseCheckoutMode::Cone
20010        ));
20011    }
20012
20013    #[test]
20014    fn apply_sparse_checkout_honors_preexisting_skip_worktree_via_idempotence() {
20015        let root = temp_root();
20016        let git_dir = root.join(".git");
20017        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
20018        fs::create_dir_all(root.join("in")).expect("test operation should succeed");
20019        fs::create_dir_all(root.join("out")).expect("test operation should succeed");
20020        fs::write(root.join("in").join("keep.txt"), b"keep\n")
20021            .expect("test operation should succeed");
20022        fs::write(root.join("out").join("drop.txt"), b"drop\n")
20023            .expect("test operation should succeed");
20024        build_commit(&root, &git_dir, &["in/keep.txt", "out/drop.txt"]);
20025
20026        let sparse = full_sparse(&[b"/in/"]);
20027        apply_sparse_checkout_with_mode(
20028            &root,
20029            &git_dir,
20030            ObjectFormat::Sha1,
20031            &sparse,
20032            SparseCheckoutMode::Full,
20033        )
20034        .expect("test operation should succeed");
20035        assert!(!root.join("out").join("drop.txt").exists());
20036
20037        // Re-applying the same spec is a no-op: the already-skipped file stays
20038        // absent and the bit stays set (we do not resurrect it).
20039        let result = apply_sparse_checkout_with_mode(
20040            &root,
20041            &git_dir,
20042            ObjectFormat::Sha1,
20043            &sparse,
20044            SparseCheckoutMode::Full,
20045        )
20046        .expect("test operation should succeed");
20047        assert!(!root.join("out").join("drop.txt").exists());
20048        assert!(root.join("in").join("keep.txt").exists());
20049        assert!(result.skipped.contains(&b"out/drop.txt".to_vec()));
20050        let index = read_index(&git_dir);
20051        assert!(index_entry_skip_worktree(index_entry_for(
20052            &index,
20053            b"out/drop.txt"
20054        )));
20055        fs::remove_dir_all(root).expect("test operation should succeed");
20056    }
20057
20058    #[test]
20059    fn checkout_detached_sparse_only_writes_in_cone_paths() {
20060        let root = temp_root();
20061        let git_dir = root.join(".git");
20062        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
20063        fs::create_dir_all(root.join("keep")).expect("test operation should succeed");
20064        fs::create_dir_all(root.join("skip")).expect("test operation should succeed");
20065        fs::write(root.join("keep").join("a.txt"), b"a\n").expect("test operation should succeed");
20066        fs::write(root.join("skip").join("b.txt"), b"b\n").expect("test operation should succeed");
20067        let commit = build_commit(&root, &git_dir, &["keep/a.txt", "skip/b.txt"]);
20068
20069        // The worktree is clean and matches the commit. A sparse checkout must
20070        // keep the in-cone file and evict the out-of-cone one.
20071        let sparse = full_sparse(&[b"/keep/"]);
20072        let result = checkout_detached_sparse(
20073            &root,
20074            &git_dir,
20075            ObjectFormat::Sha1,
20076            &commit,
20077            b"Test <test@example.com> 0 +0000".to_vec(),
20078            b"checkout".to_vec(),
20079            &sparse,
20080        )
20081        .expect("test operation should succeed");
20082        assert_eq!(result.files, 2);
20083
20084        assert!(root.join("keep").join("a.txt").exists());
20085        assert_eq!(
20086            fs::read(root.join("keep").join("a.txt")).expect("test operation should succeed"),
20087            b"a\n"
20088        );
20089        assert!(!root.join("skip").join("b.txt").exists());
20090
20091        let index = read_index(&git_dir);
20092        assert_eq!(index.entries.len(), 2);
20093        assert!(!index_entry_skip_worktree(index_entry_for(
20094            &index,
20095            b"keep/a.txt"
20096        )));
20097        let skipped = index_entry_for(&index, b"skip/b.txt");
20098        assert!(index_entry_skip_worktree(skipped));
20099        // The skipped entry still carries the committed blob id and mode.
20100        assert_eq!(skipped.mode, 0o100644);
20101        fs::remove_dir_all(root).expect("test operation should succeed");
20102    }
20103
20104    // ----- content filtering: EOL / autocrlf + clean/smudge drivers -----
20105
20106    /// Build a [`GitConfig`] from raw config text.
20107    fn config_from(text: &str) -> GitConfig {
20108        GitConfig::parse(text.as_bytes()).expect("test operation should succeed")
20109    }
20110
20111    /// Conformance grid for git's `output_eol(crlf_action)` decision table
20112    /// (convert.c) on the smudge side, exercised across the same
20113    /// attr × autocrlf × eol × content matrix as upstream t0027/t0026.
20114    ///
20115    /// Each row asserts the smudge output for a representative content shape.
20116    /// The cases that historically under-converted are the non-`auto` `text`
20117    /// paths (the auto-only safety guard must NOT fire) and the
20118    /// `autocrlf=true overrides core.eol` precedence rows.
20119    #[test]
20120    fn smudge_output_eol_decision_table() {
20121        // Naked-LF-only blob (the canonical "should gain CRLF" case).
20122        const LF: &[u8] = b"a\nb\nc\n";
20123        // Mixed CRLF + naked LF: a non-auto crlf action converts the naked LFs
20124        // to CRLF (whole file becomes CRLF); an auto action leaves it untouched.
20125        const CRLF_MIX_LF: &[u8] = b"a\r\nb\nc\r\n";
20126        // Naked LF plus a lone CR: non-auto converts LFs, keeping the lone CR.
20127        const LF_MIX_CR: &[u8] = b"a\nb\rc\n";
20128
20129        let smudge = |cfg: &str, attrline: Option<&[u8]>, input: &[u8]| -> Vec<u8> {
20130            let config = config_from(cfg);
20131            let checks = match attrline {
20132                Some(line) => {
20133                    let mut matcher = AttributeMatcher::default();
20134                    read_attribute_patterns_from_bytes(line, &mut matcher, &[], b".gitattributes");
20135                    matcher.attributes_for_path(b"f.txt", &filter_attribute_names(), false)
20136                }
20137                None => Vec::new(),
20138            };
20139            apply_smudge_filter_with_attributes(&config, &checks, b"f.txt", input)
20140                .expect("smudge must succeed")
20141        };
20142
20143        // --- attr=text (CRLF_TEXT_*): non-auto, the safety guard must not fire.
20144        // text + eol=crlf => CRLF_TEXT_CRLF: every naked LF gains CR.
20145        let attr_text_crlf: &[u8] = b"*.txt text eol=crlf";
20146        for cfg in [
20147            "[core]\n\tautocrlf = false\n\teol = lf\n",
20148            "[core]\n\tautocrlf = false\n\teol = crlf\n",
20149            "[core]\n\tautocrlf = true\n\teol = lf\n",
20150            "[core]\n\tautocrlf = input\n",
20151        ] {
20152            assert_eq!(
20153                smudge(cfg, Some(attr_text_crlf), LF),
20154                b"a\r\nb\r\nc\r\n",
20155                "text eol=crlf must add CR to naked LF (cfg={cfg:?})"
20156            );
20157            assert_eq!(
20158                smudge(cfg, Some(attr_text_crlf), CRLF_MIX_LF),
20159                b"a\r\nb\r\nc\r\n",
20160                "text eol=crlf must convert mixed content fully (cfg={cfg:?})"
20161            );
20162            assert_eq!(
20163                smudge(cfg, Some(attr_text_crlf), LF_MIX_CR),
20164                b"a\r\nb\rc\r\n",
20165                "text eol=crlf keeps the lone CR but adds CR to naked LF (cfg={cfg:?})"
20166            );
20167        }
20168
20169        // --- attr=text, no eol attr: CRLF_TEXT, resolved by text_eol_is_crlf().
20170        // autocrlf=true wins over core.eol=lf (the precedence fix).
20171        assert_eq!(
20172            smudge(
20173                "[core]\n\tautocrlf = true\n\teol = lf\n",
20174                Some(b"*.txt text"),
20175                LF
20176            ),
20177            b"a\r\nb\r\nc\r\n",
20178            "autocrlf=true must override core.eol=lf for plain text attr"
20179        );
20180        // autocrlf unset, core.eol=crlf => CRLF.
20181        assert_eq!(
20182            smudge("[core]\n\teol = crlf\n", Some(b"*.txt text"), LF),
20183            b"a\r\nb\r\nc\r\n",
20184            "core.eol=crlf adds CR to naked LF for plain text attr"
20185        );
20186        // autocrlf unset, core.eol=lf (and native LF on this host) => no CR.
20187        assert_eq!(
20188            smudge("[core]\n\teol = lf\n", Some(b"*.txt text"), LF),
20189            LF,
20190            "core.eol=lf leaves naked LF untouched on smudge"
20191        );
20192        // text + autocrlf=input => CRLF_TEXT_INPUT: no CR on smudge.
20193        assert_eq!(
20194            smudge("[core]\n\tautocrlf = input\n", Some(b"*.txt text"), LF),
20195            LF,
20196            "autocrlf=input overrides core.eol; no CR on smudge"
20197        );
20198
20199        // --- attr=text=auto (CRLF_AUTO_*): the safety guard DOES fire.
20200        // auto + autocrlf=true + naked-LF-only => convert.
20201        assert_eq!(
20202            smudge("[core]\n\tautocrlf = true\n", Some(b"*.txt text=auto"), LF),
20203            b"a\r\nb\r\nc\r\n",
20204            "text=auto converts a clean naked-LF file"
20205        );
20206        // auto + already has a CR/CRLF => leave untouched (irreversible guard).
20207        assert_eq!(
20208            smudge(
20209                "[core]\n\tautocrlf = true\n",
20210                Some(b"*.txt text=auto"),
20211                CRLF_MIX_LF
20212            ),
20213            CRLF_MIX_LF,
20214            "text=auto must not touch content that already has CRLF"
20215        );
20216        assert_eq!(
20217            smudge(
20218                "[core]\n\tautocrlf = true\n",
20219                Some(b"*.txt text=auto"),
20220                LF_MIX_CR
20221            ),
20222            LF_MIX_CR,
20223            "text=auto must not touch content that already has a lone CR"
20224        );
20225
20226        // --- no attr, autocrlf=true => CRLF_AUTO_CRLF (auto guard applies).
20227        assert_eq!(
20228            smudge("[core]\n\tautocrlf = true\n\teol = lf\n", None, LF),
20229            b"a\r\nb\r\nc\r\n",
20230            "autocrlf=true (no attr) converts clean naked-LF and overrides core.eol=lf"
20231        );
20232        // --- no attr, autocrlf=false => CRLF_BINARY: never convert.
20233        assert_eq!(
20234            smudge("[core]\n\teol = crlf\n", None, LF),
20235            LF,
20236            "no attr + autocrlf=false leaves content untouched even with core.eol=crlf"
20237        );
20238        // --- -text (CRLF_BINARY): never convert regardless of config.
20239        assert_eq!(
20240            smudge("[core]\n\tautocrlf = true\n", Some(b"*.txt -text"), LF),
20241            LF,
20242            "-text is binary: never convert"
20243        );
20244    }
20245
20246    /// Resolve attribute checks against an on-disk `.gitattributes` in `root`.
20247    fn attrs(root: &Path, path: &[u8]) -> Vec<AttributeCheck> {
20248        filter_attribute_checks(root, path).expect("test operation should succeed")
20249    }
20250
20251    #[test]
20252    fn standard_attribute_matcher_matches_per_path_lookup() {
20253        let root = temp_root();
20254        fs::create_dir_all(root.join(".git").join("info")).expect("test operation should succeed");
20255        fs::create_dir_all(root.join("src").join("nested")).expect("test operation should succeed");
20256        fs::write(root.join(".gitattributes"), b"*.rs diff=rust\n")
20257            .expect("test operation should succeed");
20258        fs::write(
20259            root.join("src").join(".gitattributes"),
20260            b"*.rs diff=python\n",
20261        )
20262        .expect("test operation should succeed");
20263        fs::write(
20264            root.join(".git").join("info").join("attributes"),
20265            b"src/nested/*.rs diff=java\n",
20266        )
20267        .expect("test operation should succeed");
20268
20269        let requested = vec![b"diff".to_vec()];
20270        let path = b"src/nested/file.rs";
20271        let per_path = standard_attributes_for_path(&root, path, &requested, false)
20272            .expect("test operation should succeed");
20273        let matcher = StandardAttributeMatcher::from_worktree_root(&root)
20274            .expect("test operation should succeed");
20275        assert_eq!(
20276            matcher.attributes_for_path(path, &requested, false),
20277            per_path
20278        );
20279
20280        fs::remove_dir_all(root).expect("test operation should succeed");
20281    }
20282
20283    #[test]
20284    fn filter_attribute_lookup_reads_only_path_chain() {
20285        let root = temp_root();
20286        fs::create_dir_all(root.join(".git").join("info")).expect("test operation should succeed");
20287        fs::create_dir_all(root.join("src").join("nested")).expect("test operation should succeed");
20288        fs::create_dir_all(root.join("sibling")).expect("test operation should succeed");
20289        fs::write(root.join(".gitattributes"), b"*.txt text\n")
20290            .expect("test operation should succeed");
20291        fs::write(root.join("src").join(".gitattributes"), b"*.txt -text\n")
20292            .expect("test operation should succeed");
20293        fs::write(
20294            root.join("sibling").join(".gitattributes"),
20295            b"*.txt eol=crlf\n",
20296        )
20297        .expect("test operation should succeed");
20298        fs::write(
20299            root.join(".git").join("info").join("attributes"),
20300            b"src/nested/*.txt eol=lf\n",
20301        )
20302        .expect("test operation should succeed");
20303
20304        let path = b"src/nested/file.txt";
20305        let full = standard_attributes_for_path(&root, path, &filter_attribute_names(), false)
20306            .expect("test operation should succeed");
20307        assert_eq!(filter_attribute_checks(&root, path).unwrap(), full);
20308
20309        fs::remove_dir_all(root).expect("test operation should succeed");
20310    }
20311
20312    #[test]
20313    fn crlf_to_lf_collapses_only_pairs() {
20314        assert_eq!(
20315            convert_crlf_to_lf_cow(Cow::Borrowed(b"a\r\nb\r\n")).as_ref(),
20316            b"a\nb\n"
20317        );
20318        // A lone CR (no following LF) is preserved.
20319        assert_eq!(
20320            convert_crlf_to_lf_cow(Cow::Borrowed(b"a\rb")).as_ref(),
20321            b"a\rb"
20322        );
20323        // An already-LF stream is unchanged.
20324        assert!(matches!(
20325            convert_crlf_to_lf_cow(Cow::Borrowed(b"a\nb\n")),
20326            Cow::Borrowed(_)
20327        ));
20328    }
20329
20330    #[test]
20331    fn lf_to_crlf_does_not_double_convert() {
20332        assert_eq!(convert_lf_to_crlf(b"a\nb\n"), b"a\r\nb\r\n");
20333        // Existing CRLF is left intact (no extra CR added).
20334        assert_eq!(convert_lf_to_crlf(b"a\r\nb\r\n"), b"a\r\nb\r\n");
20335    }
20336
20337    #[test]
20338    fn autocrlf_round_trip_clean_then_smudge() {
20339        // autocrlf=true: worktree CRLF -> blob LF on clean, blob LF -> worktree
20340        // CRLF on smudge.
20341        let config = config_from("[core]\n\tautocrlf = true\n");
20342        let checks: Vec<AttributeCheck> = Vec::new();
20343        let worktree = b"line1\r\nline2\r\n";
20344        let blob = apply_clean_filter_with_attributes(&config, &checks, b"file.txt", worktree)
20345            .expect("test operation should succeed");
20346        assert_eq!(blob, b"line1\nline2\n", "clean must normalize CRLF to LF");
20347        let restored = apply_smudge_filter_with_attributes(&config, &checks, b"file.txt", &blob)
20348            .expect("test operation should succeed");
20349        assert_eq!(
20350            restored, worktree,
20351            "smudge must restore CRLF from the LF blob"
20352        );
20353    }
20354
20355    #[test]
20356    fn conv_flags_from_config_matches_git_defaults() {
20357        // Unset core.safecrlf defaults to WARN (git's global_conv_flags_eol).
20358        assert_eq!(ConvFlags::from_config(&config_from("")), ConvFlags::Warn);
20359        assert_eq!(
20360            ConvFlags::from_config(&config_from("[core]\n\tsafecrlf = warn\n")),
20361            ConvFlags::Warn
20362        );
20363        assert_eq!(
20364            ConvFlags::from_config(&config_from("[core]\n\tsafecrlf = WARN\n")),
20365            ConvFlags::Warn
20366        );
20367        assert_eq!(
20368            ConvFlags::from_config(&config_from("[core]\n\tsafecrlf = true\n")),
20369            ConvFlags::Die
20370        );
20371        assert_eq!(
20372            ConvFlags::from_config(&config_from("[core]\n\tsafecrlf = false\n")),
20373            ConvFlags::Off
20374        );
20375    }
20376
20377    #[test]
20378    fn safecrlf_warn_does_not_change_clean_bytes() {
20379        // The warning is purely additive: byte output is identical whether
20380        // safecrlf is off or warn.
20381        let config = config_from("[core]\n\tautocrlf = true\n");
20382        let checks: Vec<AttributeCheck> = Vec::new();
20383        let worktree = b"a\nb\nc\n";
20384        let plain = apply_clean_filter_with_attributes(&config, &checks, b"f.txt", worktree)
20385            .expect("clean");
20386        let warned = apply_clean_filter_with_attributes_cow_safecrlf(
20387            &config,
20388            &checks,
20389            b"f.txt",
20390            worktree,
20391            ConvFlags::Warn,
20392            SafeCrlfIndexBlob::None,
20393        )
20394        .expect("clean with safecrlf")
20395        .into_owned();
20396        assert_eq!(plain, warned, "safecrlf must not alter the cleaned bytes");
20397    }
20398
20399    #[test]
20400    fn safecrlf_die_errors_on_lf_to_crlf_round_trip() {
20401        // autocrlf=true on a pure-LF file: checkout would add CRLF, so the
20402        // round-trip is irreversible and safecrlf=true dies (exit 128).
20403        let config = config_from("[core]\n\tautocrlf = true\n");
20404        let checks: Vec<AttributeCheck> = Vec::new();
20405        let err = apply_clean_filter_with_attributes_cow_safecrlf(
20406            &config,
20407            &checks,
20408            b"f.txt",
20409            b"a\nb\n",
20410            ConvFlags::Die,
20411            SafeCrlfIndexBlob::None,
20412        )
20413        .expect_err("die must error");
20414        assert!(matches!(err, GitError::Exit(128)));
20415    }
20416
20417    #[test]
20418    fn safecrlf_die_errors_on_crlf_to_lf_round_trip() {
20419        // autocrlf=input on a CRLF file: clean strips CRLF and checkout never
20420        // restores it, so safecrlf=true dies.
20421        let config = config_from("[core]\n\tautocrlf = input\n");
20422        let checks: Vec<AttributeCheck> = Vec::new();
20423        let err = apply_clean_filter_with_attributes_cow_safecrlf(
20424            &config,
20425            &checks,
20426            b"f.txt",
20427            b"a\r\nb\r\n",
20428            ConvFlags::Die,
20429            SafeCrlfIndexBlob::None,
20430        )
20431        .expect_err("die must error");
20432        assert!(matches!(err, GitError::Exit(128)));
20433    }
20434
20435    #[test]
20436    fn safecrlf_reversible_round_trip_does_not_warn_or_die() {
20437        // A CRLF file under autocrlf=true survives the round trip (clean to LF,
20438        // smudge back to CRLF), so even safecrlf=true is silent.
20439        let config = config_from("[core]\n\tautocrlf = true\n");
20440        let checks: Vec<AttributeCheck> = Vec::new();
20441        let out = apply_clean_filter_with_attributes_cow_safecrlf(
20442            &config,
20443            &checks,
20444            b"f.txt",
20445            b"a\r\nb\r\n",
20446            ConvFlags::Die,
20447            SafeCrlfIndexBlob::None,
20448        )
20449        .expect("reversible round trip must not die");
20450        assert_eq!(out.as_ref(), b"a\nb\n");
20451    }
20452
20453    #[test]
20454    fn safecrlf_binary_content_is_silent() {
20455        // autocrlf=true with NUL-containing (binary) content: no conversion and
20456        // no warning/die, mirroring git's early-return in crlf_to_git.
20457        let config = config_from("[core]\n\tautocrlf = true\n");
20458        let checks: Vec<AttributeCheck> = Vec::new();
20459        let body: &[u8] = b"a\nb\0c\n";
20460        let out = apply_clean_filter_with_attributes_cow_safecrlf(
20461            &config,
20462            &checks,
20463            b"f.bin",
20464            body,
20465            ConvFlags::Die,
20466            SafeCrlfIndexBlob::None,
20467        )
20468        .expect("binary content must not die");
20469        assert_eq!(out.as_ref(), body, "binary content is never converted");
20470    }
20471
20472    #[test]
20473    fn safecrlf_off_is_silent_even_on_irreversible_round_trip() {
20474        let config = config_from("[core]\n\tautocrlf = true\n");
20475        let checks: Vec<AttributeCheck> = Vec::new();
20476        let out = apply_clean_filter_with_attributes_cow_safecrlf(
20477            &config,
20478            &checks,
20479            b"f.txt",
20480            b"a\nb\n",
20481            ConvFlags::Off,
20482            SafeCrlfIndexBlob::None,
20483        )
20484        .expect("safecrlf=off never errors");
20485        // autocrlf=true does not convert on clean (only smudge), so bytes pass through.
20486        assert_eq!(out.as_ref(), b"a\nb\n");
20487    }
20488
20489    #[test]
20490    fn autocrlf_input_normalizes_on_clean_but_not_smudge() {
20491        // autocrlf=input: clean normalizes to LF, smudge leaves LF as-is.
20492        let config = config_from("[core]\n\tautocrlf = input\n");
20493        let checks: Vec<AttributeCheck> = Vec::new();
20494        let blob = apply_clean_filter_with_attributes(&config, &checks, b"file.txt", b"a\r\nb\r\n")
20495            .expect("test operation should succeed");
20496        assert_eq!(blob, b"a\nb\n");
20497        let smudged = apply_smudge_filter_with_attributes(&config, &checks, b"file.txt", &blob)
20498            .expect("test operation should succeed");
20499        assert_eq!(
20500            smudged, b"a\nb\n",
20501            "input mode must not add carriage returns"
20502        );
20503    }
20504
20505    #[test]
20506    fn eol_crlf_attribute_drives_conversion_without_config() {
20507        // No core.autocrlf; the `eol=crlf` attribute alone forces conversion.
20508        let config = config_from("");
20509        let checks = vec![AttributeCheck {
20510            attribute: b"eol".to_vec(),
20511            state: Some(AttributeState::Value(b"crlf".to_vec())),
20512        }];
20513        let blob = apply_clean_filter_with_attributes(&config, &checks, b"a.txt", b"x\r\ny\r\n")
20514            .expect("test operation should succeed");
20515        assert_eq!(blob, b"x\ny\n");
20516        let smudged = apply_smudge_filter_with_attributes(&config, &checks, b"a.txt", &blob)
20517            .expect("test operation should succeed");
20518        assert_eq!(smudged, b"x\r\ny\r\n");
20519    }
20520
20521    #[test]
20522    fn binary_attribute_disables_eol_conversion() {
20523        // `-text` (binary) must leave CRLF/NUL content untouched in both
20524        // directions even when autocrlf=true.
20525        let config = config_from("[core]\n\tautocrlf = true\n");
20526        let checks = vec![AttributeCheck {
20527            attribute: b"text".to_vec(),
20528            state: Some(AttributeState::Unset),
20529        }];
20530        let content = b"\x00\x01\r\n\x02\r\n".to_vec();
20531        let blob = apply_clean_filter_with_attributes(&config, &checks, b"data.bin", &content)
20532            .expect("test operation should succeed");
20533        assert_eq!(blob, content, "binary file must not be CRLF-normalized");
20534        let smudged = apply_smudge_filter_with_attributes(&config, &checks, b"data.bin", &blob)
20535            .expect("test operation should succeed");
20536        assert_eq!(
20537            smudged, content,
20538            "binary file must not gain carriage returns"
20539        );
20540    }
20541
20542    #[test]
20543    fn autocrlf_auto_skips_binary_looking_content() {
20544        // text=auto (via autocrlf) must not convert content that contains NUL.
20545        let config = config_from("[core]\n\tautocrlf = true\n");
20546        let checks: Vec<AttributeCheck> = Vec::new();
20547        let content = b"a\r\n\x00b\r\n".to_vec();
20548        let blob = apply_clean_filter_with_attributes(&config, &checks, b"f", &content)
20549            .expect("test operation should succeed");
20550        assert_eq!(blob, content, "binary-looking content stays untouched");
20551    }
20552
20553    #[test]
20554    fn autocrlf_via_add_and_checkout_round_trips() {
20555        // End-to-end: a CRLF worktree file is stored as an LF blob by the
20556        // filtered add path, and restored as CRLF by the filtered checkout.
20557        let root = temp_root();
20558        let git_dir = root.join(".git");
20559        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
20560        let config = config_from("[core]\n\tautocrlf = true\n");
20561
20562        fs::write(root.join("crlf.txt"), b"alpha\r\nbeta\r\n")
20563            .expect("test operation should succeed");
20564        add_paths_to_index_filtered(
20565            &root,
20566            &git_dir,
20567            ObjectFormat::Sha1,
20568            &[PathBuf::from("crlf.txt")],
20569            &config,
20570        )
20571        .expect("test operation should succeed");
20572
20573        // The stored blob must be LF-normalized.
20574        let index = read_index(&git_dir);
20575        let entry = index_entry_for(&index, b"crlf.txt");
20576        let odb = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
20577        let blob = odb
20578            .read_object(&entry.oid)
20579            .expect("test operation should succeed");
20580        assert_eq!(blob.body, b"alpha\nbeta\n");
20581
20582        // Commit and point HEAD at it, then re-checkout with smudge filtering.
20583        let tree = write_tree_from_index(&git_dir, ObjectFormat::Sha1)
20584            .expect("test operation should succeed");
20585        let mut body = Vec::new();
20586        body.extend_from_slice(format!("tree {tree}\n").as_bytes());
20587        body.extend_from_slice(b"author T <t@e> 0 +0000\ncommitter T <t@e> 0 +0000\n\nm\n");
20588        let odb = FileObjectDatabase::from_git_dir(&git_dir, ObjectFormat::Sha1);
20589        let commit = odb
20590            .write_object(EncodedObject::new(ObjectType::Commit, body))
20591            .expect("test operation should succeed");
20592        let refs = FileRefStore::new(&git_dir, ObjectFormat::Sha1);
20593        let mut tx = refs.transaction();
20594        tx.update(RefUpdate {
20595            name: "HEAD".into(),
20596            expected: None,
20597            new: RefTarget::Direct(commit),
20598            reflog: None,
20599        });
20600        tx.commit().expect("test operation should succeed");
20601
20602        // Make the worktree match the committed (LF) blob so the tree is clean
20603        // for checkout; `short_status`/`worktree_entries` compare by content
20604        // hash and are not filter-aware. Checkout will then smudge it to CRLF.
20605        fs::write(root.join("crlf.txt"), b"alpha\nbeta\n").expect("test operation should succeed");
20606        checkout_detached_filtered(
20607            &root,
20608            &git_dir,
20609            ObjectFormat::Sha1,
20610            &commit,
20611            b"T <t@e> 0 +0000".to_vec(),
20612            b"co".to_vec(),
20613            &config,
20614        )
20615        .expect("test operation should succeed");
20616        assert_eq!(
20617            fs::read(root.join("crlf.txt")).expect("test operation should succeed"),
20618            b"alpha\r\nbeta\r\n",
20619            "checkout must restore CRLF line endings"
20620        );
20621        fs::remove_dir_all(root).expect("test operation should succeed");
20622    }
20623
20624    #[test]
20625    fn driver_filter_clean_and_smudge_transform_both_directions() {
20626        // filter=case: clean upper-cases (worktree -> blob), smudge lower-cases
20627        // (blob -> worktree).
20628        let config =
20629            config_from("[filter \"case\"]\n\tclean = tr a-z A-Z\n\tsmudge = tr A-Z a-z\n");
20630        let checks = vec![AttributeCheck {
20631            attribute: b"filter".to_vec(),
20632            state: Some(AttributeState::Value(b"case".to_vec())),
20633        }];
20634        let blob = apply_clean_filter_with_attributes(&config, &checks, b"f.txt", b"Hello World")
20635            .expect("test operation should succeed");
20636        assert_eq!(blob, b"HELLO WORLD", "clean driver must upper-case");
20637        let worktree =
20638            apply_smudge_filter_with_attributes(&config, &checks, b"f.txt", b"HELLO WORLD")
20639                .expect("test operation should succeed");
20640        assert_eq!(worktree, b"hello world", "smudge driver must lower-case");
20641    }
20642
20643    #[test]
20644    fn driver_filter_resolved_from_gitattributes_file() {
20645        // The filter name is read from a real `.gitattributes`, the commands from
20646        // config; exercises the public worktree-rooted entry points.
20647        let root = temp_root();
20648        let git_dir = root.join(".git");
20649        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
20650        fs::write(root.join(".gitattributes"), b"*.dat filter=rot\n")
20651            .expect("test operation should succeed");
20652        let config =
20653            config_from("[filter \"rot\"]\n\tclean = sed s/a/b/g\n\tsmudge = sed s/b/a/g\n");
20654        // Clean reads attributes from the live worktree `.gitattributes`.
20655        let blob = apply_clean_filter(&root, &git_dir, &config, b"x.dat", b"banana")
20656            .expect("test operation should succeed");
20657        assert_eq!(blob, b"bbnbnb");
20658        // Smudge reads attributes from the index (the worktree file may not
20659        // exist yet during checkout), so stage `.gitattributes` first.
20660        add_paths_to_index(
20661            &root,
20662            &git_dir,
20663            ObjectFormat::Sha1,
20664            &[PathBuf::from(".gitattributes")],
20665        )
20666        .expect("test operation should succeed");
20667        let smudged = apply_smudge_filter(
20668            &root,
20669            &git_dir,
20670            ObjectFormat::Sha1,
20671            &config,
20672            b"x.dat",
20673            &blob,
20674        )
20675        .expect("test operation should succeed");
20676        // sed s/b/a/g is not a perfect inverse, but verifies the smudge command
20677        // ran on the blob bytes.
20678        assert_eq!(smudged, b"aanana");
20679        fs::remove_dir_all(root).expect("test operation should succeed");
20680    }
20681
20682    #[test]
20683    fn required_filter_failure_is_fatal() {
20684        // A required filter whose command fails must surface an error.
20685        let config = config_from("[filter \"boom\"]\n\tclean = false\n\trequired = true\n");
20686        let checks = vec![AttributeCheck {
20687            attribute: b"filter".to_vec(),
20688            state: Some(AttributeState::Value(b"boom".to_vec())),
20689        }];
20690        let err = apply_clean_filter_with_attributes(&config, &checks, b"f", b"data")
20691            .expect_err("required filter failure must error");
20692        assert!(matches!(err, GitError::Command(_)), "got {err:?}");
20693    }
20694
20695    #[test]
20696    fn required_filter_missing_command_is_fatal() {
20697        // required=true but no clean command for this direction is also fatal.
20698        let config = config_from("[filter \"need\"]\n\tsmudge = cat\n\trequired = true\n");
20699        let checks = vec![AttributeCheck {
20700            attribute: b"filter".to_vec(),
20701            state: Some(AttributeState::Value(b"need".to_vec())),
20702        }];
20703        let err = apply_clean_filter_with_attributes(&config, &checks, b"f", b"data")
20704            .expect_err("required filter without a clean command must error");
20705        assert!(matches!(err, GitError::Exit(128)), "got {err:?}");
20706    }
20707
20708    #[test]
20709    fn non_required_filter_failure_passes_through() {
20710        // A non-required filter that fails must pass the content through
20711        // unchanged rather than erroring.
20712        let config = config_from("[filter \"opt\"]\n\tclean = false\n");
20713        let checks = vec![AttributeCheck {
20714            attribute: b"filter".to_vec(),
20715            state: Some(AttributeState::Value(b"opt".to_vec())),
20716        }];
20717        let out = apply_clean_filter_with_attributes(&config, &checks, b"f", b"keepme")
20718            .expect("test operation should succeed");
20719        assert_eq!(
20720            out, b"keepme",
20721            "optional filter failure passes content through"
20722        );
20723    }
20724
20725    #[test]
20726    fn filter_with_no_command_is_noop() {
20727        // filter=name with no configured commands and not required is ignored.
20728        let config = config_from("");
20729        let checks = vec![AttributeCheck {
20730            attribute: b"filter".to_vec(),
20731            state: Some(AttributeState::Value(b"ghost".to_vec())),
20732        }];
20733        let out = apply_clean_filter_with_attributes(&config, &checks, b"f", b"unchanged")
20734            .expect("test operation should succeed");
20735        assert_eq!(out, b"unchanged");
20736    }
20737
20738    #[test]
20739    fn driver_and_eol_compose_on_clean_and_smudge() {
20740        // filter=case + autocrlf=true: clean runs the driver then CRLF->LF;
20741        // smudge runs LF->CRLF then the driver.
20742        let config = config_from(
20743            "[core]\n\tautocrlf = true\n[filter \"case\"]\n\tclean = tr a-z A-Z\n\tsmudge = tr A-Z a-z\n",
20744        );
20745        let checks = vec![
20746            AttributeCheck {
20747                attribute: b"filter".to_vec(),
20748                state: Some(AttributeState::Value(b"case".to_vec())),
20749            },
20750            AttributeCheck {
20751                attribute: b"text".to_vec(),
20752                state: Some(AttributeState::Set),
20753            },
20754        ];
20755        let blob = apply_clean_filter_with_attributes(&config, &checks, b"f.txt", b"ab\r\ncd\r\n")
20756            .expect("test operation should succeed");
20757        assert_eq!(blob, b"AB\nCD\n", "clean: upper-case then CRLF->LF");
20758        let worktree = apply_smudge_filter_with_attributes(&config, &checks, b"f.txt", &blob)
20759            .expect("test operation should succeed");
20760        assert_eq!(
20761            worktree, b"ab\r\ncd\r\n",
20762            "smudge: LF->CRLF then lower-case"
20763        );
20764    }
20765
20766    #[test]
20767    fn attrs_helper_reads_filter_from_disk() {
20768        let root = temp_root();
20769        fs::write(root.join(".gitattributes"), b"*.txt text\n*.bin -text\n")
20770            .expect("test operation should succeed");
20771        let text = attrs(&root, b"a.txt");
20772        assert!(
20773            text.iter()
20774                .any(|c| c.attribute == b"text" && c.state == Some(AttributeState::Set))
20775        );
20776        let bin = attrs(&root, b"a.bin");
20777        assert!(
20778            bin.iter()
20779                .any(|c| c.attribute == b"text" && c.state == Some(AttributeState::Unset))
20780        );
20781        fs::remove_dir_all(root).expect("test operation should succeed");
20782    }
20783
20784    /// Builds a stat cache holding a single stage-0 entry whose size+mtime match
20785    /// `file`'s real metadata, with the index-file mtime placed strictly after
20786    /// the entry mtime so the entry reads as non-racy by default. The entry's oid
20787    /// is `oid` and its mode is `mode`.
20788    fn stat_cache_for(file: &Path, oid: ObjectId, mode: u32) -> (IndexStatCache, IndexEntry) {
20789        let metadata = fs::metadata(file).expect("test operation should succeed");
20790        let mut entry = index_entry_from_metadata(b"f.txt".to_vec(), oid, &metadata);
20791        entry.mode = mode;
20792        let index_mtime = Some((u64::from(entry.mtime_seconds) + 10, 0));
20793        let mut entries = HashMap::new();
20794        entries.insert(entry.path.as_bytes().to_vec(), entry.clone());
20795        (
20796            IndexStatCache {
20797                entries,
20798                index_mtime,
20799            },
20800            entry,
20801        )
20802    }
20803
20804    #[test]
20805    fn reuse_tracked_entry_only_reuses_clean_non_racy_match() {
20806        let root = temp_root();
20807        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
20808        let file = root.join("f.txt");
20809        let metadata = fs::metadata(&file).expect("test operation should succeed");
20810        let real_mode = file_mode(&metadata);
20811        let oid = EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec())
20812            .object_id(ObjectFormat::Sha1)
20813            .expect("test operation should succeed");
20814
20815        // Clean, non-racy, matching stat + mode -> reuse the cached oid.
20816        let (cache, _) = stat_cache_for(&file, oid, real_mode);
20817        let reused = cache.reuse_tracked_entry(b"f.txt", &metadata);
20818        assert_eq!(
20819            reused,
20820            Some(TrackedEntry {
20821                mode: real_mode,
20822                oid,
20823            }),
20824            "a clean non-racy stat+mode match must reuse the staged oid"
20825        );
20826
20827        // No stage-0 entry for the path -> must hash.
20828        assert_eq!(
20829            cache.reuse_tracked_entry(b"other.txt", &metadata),
20830            None,
20831            "a path with no cached entry must fall through to hashing"
20832        );
20833
20834        // Size differs from the file -> must hash.
20835        let (mut size_cache, mut shrunk) = stat_cache_for(&file, oid, real_mode);
20836        shrunk.size = shrunk.size.saturating_sub(1);
20837        size_cache.entries.insert(shrunk.path.to_vec(), shrunk);
20838        assert_eq!(
20839            size_cache.reuse_tracked_entry(b"f.txt", &metadata),
20840            None,
20841            "a size mismatch must fall through to hashing"
20842        );
20843
20844        // Mode differs (e.g. a chmod that did not move mtime) -> must hash.
20845        let (mode_cache, _) = stat_cache_for(&file, oid, 0o100755);
20846        assert_eq!(
20847            mode_cache.reuse_tracked_entry(b"f.txt", &metadata),
20848            None,
20849            "a mode mismatch must fall through to hashing"
20850        );
20851
20852        // Racily clean (index mtime not strictly after the entry mtime) -> hash.
20853        let (mut racy_cache, entry) = stat_cache_for(&file, oid, real_mode);
20854        racy_cache.index_mtime = Some((
20855            u64::from(entry.mtime_seconds),
20856            u64::from(entry.mtime_nanoseconds),
20857        ));
20858        assert_eq!(
20859            racy_cache.reuse_tracked_entry(b"f.txt", &metadata),
20860            None,
20861            "a racily-clean entry must always be re-hashed"
20862        );
20863
20864        // Unknown index mtime is treated as racy -> hash.
20865        let (mut unknown_cache, _) = stat_cache_for(
20866            &file,
20867            EncodedObject::new(ObjectType::Blob, b"hello\n".to_vec())
20868                .object_id(ObjectFormat::Sha1)
20869                .expect("test operation should succeed"),
20870            real_mode,
20871        );
20872        unknown_cache.index_mtime = None;
20873        assert_eq!(
20874            unknown_cache.reuse_tracked_entry(b"f.txt", &metadata),
20875            None,
20876            "an unknown index mtime must be treated conservatively as racy"
20877        );
20878
20879        fs::remove_dir_all(root).expect("test operation should succeed");
20880    }
20881
20882    #[test]
20883    fn index_stat_probe_cache_serves_many_paths_from_one_index_parse() {
20884        let root = temp_root();
20885        let git_dir = root.join(".git");
20886        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
20887        fs::write(root.join("a.txt"), b"alpha\n").expect("test operation should succeed");
20888        fs::write(root.join("b.txt"), b"bravo\n").expect("test operation should succeed");
20889        build_commit(&root, &git_dir, &["a.txt", "b.txt"]);
20890
20891        let cache = IndexStatProbeCache::from_repository_index(&git_dir, ObjectFormat::Sha1)
20892            .expect("probe cache");
20893        assert_eq!(cache.len(), 2);
20894        assert!(cache.contains_git_path(b"a.txt"));
20895        assert!(cache.contains_git_path(b"b.txt"));
20896        let a = cache.probe_for_git_path(b"a.txt").expect("a probe");
20897        let b = cache.probe_for_git_path(b"b.txt").expect("b probe");
20898        assert_eq!(a.entry().path, b"a.txt");
20899        assert_eq!(b.entry().path, b"b.txt");
20900        assert_eq!(a.index_mtime(), cache.index_mtime());
20901        assert_eq!(b.index_mtime(), cache.index_mtime());
20902        assert!(
20903            cache.probe_for_git_path(b"missing.txt").is_none(),
20904            "missing paths should not allocate probes"
20905        );
20906
20907        let one_shot =
20908            IndexStatProbe::from_repository_index(&git_dir, ObjectFormat::Sha1, b"a.txt")
20909                .expect("legacy one-shot probe")
20910                .expect("a probe");
20911        assert_eq!(one_shot.entry().path, b"a.txt");
20912        assert_eq!(one_shot.index_mtime(), cache.index_mtime());
20913
20914        fs::remove_dir_all(root).expect("test operation should succeed");
20915    }
20916
20917    #[test]
20918    fn short_status_detects_same_length_content_change() {
20919        let root = temp_root();
20920        let git_dir = root.join(".git");
20921        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
20922        fs::write(root.join("f.txt"), b"aaaa\n").expect("test operation should succeed");
20923        build_commit(&root, &git_dir, &["f.txt"]);
20924        // Overwrite with the SAME byte length but different content. Right after
20925        // staging the entry is racily clean (index mtime >= entry mtime), so the
20926        // stat shortcut must not be trusted and the change must surface as M.
20927        fs::write(root.join("f.txt"), b"bbbb\n").expect("test operation should succeed");
20928        let status = short_status(&root, &git_dir, ObjectFormat::Sha1)
20929            .expect("test operation should succeed");
20930        assert_eq!(
20931            status
20932                .iter()
20933                .map(ShortStatusEntry::line)
20934                .collect::<Vec<_>>(),
20935            vec![" M f.txt"],
20936            "a same-length content change must be reported modified"
20937        );
20938        fs::remove_dir_all(root).expect("test operation should succeed");
20939    }
20940
20941    #[test]
20942    fn short_status_clean_after_byte_identical_rewrite() {
20943        let root = temp_root();
20944        let git_dir = root.join(".git");
20945        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
20946        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
20947        build_commit(&root, &git_dir, &["f.txt"]);
20948        // Rewrite with byte-identical content; the mtime moves so the stat
20949        // shortcut declines to reuse and the fallback hash proves it clean.
20950        std::thread::sleep(std::time::Duration::from_millis(20));
20951        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
20952        let status = short_status(&root, &git_dir, ObjectFormat::Sha1)
20953            .expect("test operation should succeed");
20954        assert!(
20955            status.is_empty(),
20956            "a byte-identical rewrite must be clean via the fallback hash, got {status:?}"
20957        );
20958        fs::remove_dir_all(root).expect("test operation should succeed");
20959    }
20960
20961    #[test]
20962    fn short_status_trusts_stat_cache_and_skips_rehash() {
20963        let root = temp_root();
20964        let git_dir = root.join(".git");
20965        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
20966        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
20967        build_commit(&root, &git_dir, &["f.txt"]);
20968
20969        // Plant a BOGUS oid in the stage-0 entry while preserving its size+mtime,
20970        // so a real re-hash of the (unchanged) worktree file would NOT match it.
20971        let index_path = repository_index_path(&git_dir);
20972        let mut index = read_index(&git_dir);
20973        let bogus = ObjectId::from_hex(ObjectFormat::Sha1, &"0".repeat(40))
20974            .expect("test operation should succeed");
20975        let real_oid = index_entry_for(&index, b"f.txt").oid;
20976        assert_ne!(
20977            real_oid, bogus,
20978            "fixture oid should differ from the bogus oid"
20979        );
20980        index
20981            .entries
20982            .iter_mut()
20983            .find(|entry| entry.path == b"f.txt")
20984            .expect("test operation should succeed")
20985            .oid = bogus.clone();
20986        fs::write(
20987            &index_path,
20988            index
20989                .write(ObjectFormat::Sha1)
20990                .expect("test operation should succeed"),
20991        )
20992        .expect("test operation should succeed");
20993
20994        // Make the index file STRICTLY newer than the entry mtime (non-racy) by
20995        // waiting past one-second filesystem granularity and rewriting it, so the
20996        // racy-clean guard does not force a re-hash.
20997        std::thread::sleep(std::time::Duration::from_millis(1100));
20998        fs::write(
20999            &index_path,
21000            fs::read(&index_path).expect("test operation should succeed"),
21001        )
21002        .expect("test operation should succeed");
21003
21004        // The file is unchanged on disk, so a trusted stat reuses the bogus index
21005        // oid for the worktree entry: worktree-oid == index-oid == bogus, so the
21006        // WORKTREE column is clean. Had status re-hashed the file, the real oid
21007        // would differ from the bogus index oid and the worktree column would be
21008        // 'M'. (The index-vs-HEAD column is 'M' because we corrupted the index
21009        // oid away from HEAD; that is expected and not what this test asserts.)
21010        let status = short_status(&root, &git_dir, ObjectFormat::Sha1)
21011            .expect("test operation should succeed");
21012        let entry = status
21013            .iter()
21014            .find(|entry| entry.path == b"f.txt")
21015            .expect("f.txt should appear (its index oid now differs from HEAD)");
21016        assert_eq!(
21017            entry.worktree, b' ',
21018            "non-racy stat match must trust the cached oid (no re-hash); worktree column was {}",
21019            entry.worktree as char
21020        );
21021        assert_eq!(
21022            entry.index_oid.as_ref(),
21023            Some(&bogus),
21024            "the worktree entry must have reused the planted bogus index oid, not the real hash"
21025        );
21026
21027        fs::remove_dir_all(root).expect("test operation should succeed");
21028    }
21029
21030    #[test]
21031    fn worktree_entry_state_detects_same_size_content_change() {
21032        let root = temp_root();
21033        let git_dir = root.join(".git");
21034        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
21035        fs::write(root.join("f.txt"), b"aaaa\n").expect("test operation should succeed");
21036        build_commit(&root, &git_dir, &["f.txt"]);
21037        let index = read_index(&git_dir);
21038        let entry = index_entry_for(&index, b"f.txt").clone();
21039        let probe = IndexStatProbe::from_index_entry_and_index_path(
21040            entry.clone(),
21041            repository_index_path(&git_dir),
21042        );
21043
21044        fs::write(root.join("f.txt"), b"bbbb\n").expect("test operation should succeed");
21045        let state = worktree_entry_state(
21046            &root,
21047            &git_dir,
21048            ObjectFormat::Sha1,
21049            Path::new("f.txt"),
21050            &entry.oid,
21051            entry.mode,
21052            Some(&probe),
21053        )
21054        .expect("test operation should succeed");
21055        assert_eq!(state, WorktreeEntryState::Modified);
21056
21057        fs::remove_dir_all(root).expect("test operation should succeed");
21058    }
21059
21060    #[test]
21061    fn worktree_entry_state_reports_deleted_for_missing_and_parent_not_directory() {
21062        let root = temp_root();
21063        let git_dir = root.join(".git");
21064        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
21065        fs::create_dir_all(root.join("dir")).expect("test operation should succeed");
21066        fs::write(root.join("dir").join("f.txt"), b"hello\n")
21067            .expect("test operation should succeed");
21068        build_commit(&root, &git_dir, &["dir/f.txt"]);
21069        let index = read_index(&git_dir);
21070        let entry = index_entry_for(&index, b"dir/f.txt").clone();
21071
21072        fs::remove_file(root.join("dir").join("f.txt")).expect("test operation should succeed");
21073        let missing = worktree_entry_state_by_git_path(
21074            &root,
21075            &git_dir,
21076            ObjectFormat::Sha1,
21077            b"dir/f.txt",
21078            &entry.oid,
21079            entry.mode,
21080            None,
21081        )
21082        .expect("test operation should succeed");
21083        assert_eq!(missing, WorktreeEntryState::Deleted);
21084
21085        fs::remove_dir(root.join("dir")).expect("test operation should succeed");
21086        fs::write(root.join("dir"), b"not a directory").expect("test operation should succeed");
21087        let parent_not_directory = worktree_entry_state_by_git_path(
21088            &root,
21089            &git_dir,
21090            ObjectFormat::Sha1,
21091            b"dir/f.txt",
21092            &entry.oid,
21093            entry.mode,
21094            None,
21095        )
21096        .expect("test operation should succeed");
21097        assert_eq!(parent_not_directory, WorktreeEntryState::Deleted);
21098
21099        fs::remove_dir_all(root).expect("test operation should succeed");
21100    }
21101
21102    #[test]
21103    fn worktree_entry_state_trusts_clean_non_racy_probe() {
21104        let root = temp_root();
21105        let git_dir = root.join(".git");
21106        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
21107        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
21108        build_commit(&root, &git_dir, &["f.txt"]);
21109        let index_path = repository_index_path(&git_dir);
21110        let mut index = read_index(&git_dir);
21111        let bogus = ObjectId::from_hex(ObjectFormat::Sha1, &"1".repeat(40))
21112            .expect("test operation should succeed");
21113        index
21114            .entries
21115            .iter_mut()
21116            .find(|entry| entry.path == b"f.txt")
21117            .expect("test operation should succeed")
21118            .oid = bogus;
21119        fs::write(
21120            &index_path,
21121            index
21122                .write(ObjectFormat::Sha1)
21123                .expect("test operation should succeed"),
21124        )
21125        .expect("test operation should succeed");
21126        std::thread::sleep(std::time::Duration::from_millis(1100));
21127        fs::write(
21128            &index_path,
21129            fs::read(&index_path).expect("test operation should succeed"),
21130        )
21131        .expect("test operation should succeed");
21132        let index = read_index(&git_dir);
21133        let entry = index_entry_for(&index, b"f.txt").clone();
21134        let probe = IndexStatProbe::from_index_entry_and_index_path(
21135            entry.clone(),
21136            repository_index_path(&git_dir),
21137        );
21138
21139        let state = worktree_entry_state(
21140            &root,
21141            &git_dir,
21142            ObjectFormat::Sha1,
21143            Path::new("f.txt"),
21144            &entry.oid,
21145            entry.mode,
21146            Some(&probe),
21147        )
21148        .expect("test operation should succeed");
21149        assert_eq!(
21150            state,
21151            WorktreeEntryState::Clean,
21152            "a non-racy stat match must be enough to prove this path clean"
21153        );
21154
21155        fs::remove_dir_all(root).expect("test operation should succeed");
21156    }
21157
21158    #[test]
21159    fn worktree_entry_state_rehashes_racy_probe() {
21160        let root = temp_root();
21161        let git_dir = root.join(".git");
21162        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
21163        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
21164        build_commit(&root, &git_dir, &["f.txt"]);
21165        let index = read_index(&git_dir);
21166        let mut entry = index_entry_for(&index, b"f.txt").clone();
21167        entry.oid = ObjectId::from_hex(ObjectFormat::Sha1, &"2".repeat(40))
21168            .expect("test operation should succeed");
21169        let probe = IndexStatProbe::from_index_entry(
21170            entry.clone(),
21171            Some((
21172                u64::from(entry.mtime_seconds),
21173                u64::from(entry.mtime_nanoseconds),
21174            )),
21175        );
21176
21177        let state = worktree_entry_state(
21178            &root,
21179            &git_dir,
21180            ObjectFormat::Sha1,
21181            Path::new("f.txt"),
21182            &entry.oid,
21183            entry.mode,
21184            Some(&probe),
21185        )
21186        .expect("test operation should succeed");
21187        assert_eq!(
21188            state,
21189            WorktreeEntryState::Modified,
21190            "a racily-clean stat match must fall through to hashing"
21191        );
21192
21193        fs::remove_dir_all(root).expect("test operation should succeed");
21194    }
21195
21196    #[cfg(unix)]
21197    #[test]
21198    fn worktree_entry_state_detects_chmod_only_change() {
21199        use std::os::unix::fs::PermissionsExt;
21200
21201        let root = temp_root();
21202        let git_dir = root.join(".git");
21203        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
21204        fs::write(root.join("f.txt"), b"hello\n").expect("test operation should succeed");
21205        build_commit(&root, &git_dir, &["f.txt"]);
21206        let index = read_index(&git_dir);
21207        let entry = index_entry_for(&index, b"f.txt").clone();
21208
21209        let file = root.join("f.txt");
21210        let mut permissions = fs::metadata(&file)
21211            .expect("test operation should succeed")
21212            .permissions();
21213        permissions.set_mode(permissions.mode() | 0o111);
21214        fs::set_permissions(&file, permissions).expect("test operation should succeed");
21215        let state = worktree_entry_state(
21216            &root,
21217            &git_dir,
21218            ObjectFormat::Sha1,
21219            Path::new("f.txt"),
21220            &entry.oid,
21221            entry.mode,
21222            None,
21223        )
21224        .expect("test operation should succeed");
21225        assert_eq!(state, WorktreeEntryState::Modified);
21226
21227        fs::remove_dir_all(root).expect("test operation should succeed");
21228    }
21229
21230    #[cfg(unix)]
21231    #[test]
21232    fn worktree_entry_state_detects_symlink_target_change() {
21233        use std::os::unix::fs::symlink;
21234
21235        let root = temp_root();
21236        let git_dir = root.join(".git");
21237        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
21238        symlink("one", root.join("link")).expect("test operation should succeed");
21239        build_commit(&root, &git_dir, &["link"]);
21240        let index = read_index(&git_dir);
21241        let entry = index_entry_for(&index, b"link").clone();
21242
21243        fs::remove_file(root.join("link")).expect("test operation should succeed");
21244        symlink("two", root.join("link")).expect("test operation should succeed");
21245        let state = worktree_entry_state(
21246            &root,
21247            &git_dir,
21248            ObjectFormat::Sha1,
21249            Path::new("link"),
21250            &entry.oid,
21251            entry.mode,
21252            None,
21253        )
21254        .expect("test operation should succeed");
21255        assert_eq!(state, WorktreeEntryState::Modified);
21256
21257        fs::remove_dir_all(root).expect("test operation should succeed");
21258    }
21259
21260    #[test]
21261    fn worktree_entry_state_treats_present_unpopulated_gitlink_directory_as_clean() {
21262        let root = temp_root();
21263        let git_dir = root.join(".git");
21264        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
21265        fs::create_dir_all(root.join("submodule")).expect("test operation should succeed");
21266        let oid = ObjectId::from_hex(ObjectFormat::Sha1, &"3".repeat(40))
21267            .expect("test operation should succeed");
21268
21269        let state = worktree_entry_state(
21270            &root,
21271            &git_dir,
21272            ObjectFormat::Sha1,
21273            Path::new("submodule"),
21274            &oid,
21275            sley_index::GITLINK_MODE,
21276            None,
21277        )
21278        .expect("test operation should succeed");
21279        assert_eq!(state, WorktreeEntryState::Clean);
21280
21281        fs::remove_dir_all(root).expect("test operation should succeed");
21282    }
21283
21284    #[test]
21285    fn short_status_empty_on_unborn_repository() {
21286        let root = temp_root();
21287        let git_dir = root.join(".git");
21288        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
21289        fs::write(git_dir.join("HEAD"), "ref: refs/heads/main\n")
21290            .expect("test operation should succeed");
21291        let status = short_status(&root, &git_dir, ObjectFormat::Sha1)
21292            .expect("test operation should succeed");
21293        assert!(
21294            status.is_empty(),
21295            "an unborn repository with an empty worktree must be clean, got {status:?}"
21296        );
21297        fs::remove_dir_all(root).expect("test operation should succeed");
21298    }
21299
21300    #[test]
21301    fn untracked_paths_skips_embedded_git_internals() {
21302        let root = temp_root();
21303        let git_dir = root.join(".git");
21304        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
21305        fs::write(git_dir.join("HEAD"), "ref: refs/heads/main\n")
21306            .expect("test operation should succeed");
21307        let nested = root.join("not-a-submodule");
21308        fs::create_dir_all(nested.join(".git")).expect("test operation should succeed");
21309        fs::write(nested.join(".git/HEAD"), "ref: refs/heads/main\n")
21310            .expect("test operation should succeed");
21311        fs::write(nested.join("file.txt"), b"inside\n").expect("test operation should succeed");
21312        let paths = untracked_paths(&root, &git_dir, ObjectFormat::Sha1)
21313            .expect("test operation should succeed");
21314        assert!(
21315            paths.iter().any(|path| path == b"not-a-submodule/"),
21316            "embedded repository directory should be listed, got {paths:?}"
21317        );
21318        assert!(
21319            !paths
21320                .iter()
21321                .any(|path| path.starts_with(b"not-a-submodule/.git")),
21322            "embedded .git internals must not be listed, got {paths:?}"
21323        );
21324        fs::remove_dir_all(root).expect("test operation should succeed");
21325    }
21326
21327    #[cfg(unix)]
21328    #[test]
21329    fn untracked_paths_lists_symlink() {
21330        use std::os::unix::fs::symlink;
21331
21332        let root = temp_root();
21333        let git_dir = root.join(".git");
21334        fs::create_dir_all(git_dir.join("objects")).expect("test operation should succeed");
21335        fs::write(git_dir.join("HEAD"), "ref: refs/heads/main\n")
21336            .expect("test operation should succeed");
21337        fs::write(root.join("target.txt"), b"target\n").expect("test operation should succeed");
21338        symlink(root.join("target.txt"), root.join("path1")).expect("create symlink");
21339        let paths = untracked_paths(&root, &git_dir, ObjectFormat::Sha1)
21340            .expect("test operation should succeed");
21341        assert!(
21342            paths.contains(&b"path1".to_vec()),
21343            "untracked symlink must be listed, got {paths:?}"
21344        );
21345        fs::remove_dir_all(root).expect("test operation should succeed");
21346    }
21347}