Skip to main content

grit_lib/
index.rs

1//! Git index (staging area) reading and writing.
2//!
3//! The index file (`.git/index`) stores the current state of the staging area.
4//! It uses a binary format with a 12-byte header, fixed-size index entries,
5//! and optional extensions, followed by a trailing SHA-1 over the whole file.
6//!
7//! # Format version
8//!
9//! This implementation supports index versions 2 and 3. Requests for version 4
10//! currently fall back to a non-compressed index on write because path
11//! compression is not yet implemented.
12//!
13//! # References
14//!
15//! See `Documentation/technical/index-format.txt` in the Git source tree for
16//! the authoritative format specification.
17
18use std::collections::{BTreeMap, BTreeSet};
19use std::fs;
20use std::io::{self, Write};
21use std::path::Path;
22
23use sha1::{Digest, Sha1};
24
25use crate::config::ConfigSet;
26use crate::error::{Error, Result};
27use crate::objects::{parse_tree, ObjectId, ObjectKind, TreeEntry};
28use crate::odb::Odb;
29use crate::repo::Repository;
30use crate::resolve_undo::{self, write_resolve_undo_payload, ResolveUndoRecord};
31use crate::rev_parse;
32use crate::untracked_cache;
33
34/// File mode for a regular (non-executable) file.
35pub const MODE_REGULAR: u32 = 0o100644;
36/// File mode for an executable file.
37pub const MODE_EXECUTABLE: u32 = 0o100755;
38/// File mode for a symbolic link.
39pub const MODE_SYMLINK: u32 = 0o120000;
40/// File mode for a gitlink (submodule).
41pub const MODE_GITLINK: u32 = 0o160000;
42/// File mode for a directory (tree) entry — only used in tree objects, not index.
43pub const MODE_TREE: u32 = 0o040000;
44
45/// Git index extension signature `sdir` (sparse directory entries present).
46const INDEX_EXT_SPARSE_DIRECTORIES: u32 = u32::from_be_bytes(*b"sdir");
47/// Git index extension signature `UNTR` (untracked cache).
48const INDEX_EXT_UNTRACKED: u32 = u32::from_be_bytes(*b"UNTR");
49/// Git index extension signature `FSMN` (fsmonitor).
50const INDEX_EXT_FSMONITOR: u32 = u32::from_be_bytes(*b"FSMN");
51/// Git index extension signature `REUC` (resolve undo).
52const INDEX_EXT_RESOLVE_UNDO: u32 = u32::from_be_bytes(*b"REUC");
53/// Git index extension signature `link` (split index).
54const INDEX_EXT_LINK: u32 = u32::from_be_bytes(*b"link");
55
56/// A single entry in the Git index.
57#[derive(Debug, Clone, PartialEq, Eq)]
58pub struct IndexEntry {
59    /// Time the file metadata last changed (seconds since epoch).
60    pub ctime_sec: u32,
61    /// Nanosecond fraction of `ctime_sec`.
62    pub ctime_nsec: u32,
63    /// Time the file data last changed (seconds since epoch).
64    pub mtime_sec: u32,
65    /// Nanosecond fraction of `mtime_sec`.
66    pub mtime_nsec: u32,
67    /// Device number.
68    pub dev: u32,
69    /// Inode number.
70    pub ino: u32,
71    /// Unix file mode (`MODE_REGULAR`, `MODE_EXECUTABLE`, `MODE_SYMLINK`, …).
72    pub mode: u32,
73    /// Owner UID.
74    pub uid: u32,
75    /// Owner GID.
76    pub gid: u32,
77    /// File size in bytes (truncated to 32 bits).
78    pub size: u32,
79    /// SHA-1 of the blob object.
80    pub oid: ObjectId,
81    /// Entry flags (stage, assume-valid, extended, …).
82    pub flags: u16,
83    /// Extended flags (v3+ only).
84    pub flags_extended: Option<u16>,
85    /// Path relative to the repository root.  May contain `/` separators.
86    pub path: Vec<u8>,
87    /// Split index: position in shared base (1-based), or 0 if not from shared index.
88    pub base_index_pos: u32,
89}
90
91impl IndexEntry {
92    /// Merge stage (0 = normal, 1–3 = conflict stages).
93    #[must_use]
94    pub fn stage(&self) -> u8 {
95        ((self.flags >> 12) & 0x3) as u8
96    }
97
98    pub(crate) fn set_stage(&mut self, stage: u8) {
99        self.flags = (self.flags & 0x0FFF) | ((stage as u16 & 0x3) << 12);
100    }
101
102    /// Whether the assume-unchanged bit is set.
103    #[must_use]
104    pub fn assume_unchanged(&self) -> bool {
105        self.flags & 0x8000 != 0
106    }
107
108    /// Whether the skip-worktree bit is set (extended flags, v3+).
109    #[must_use]
110    pub fn skip_worktree(&self) -> bool {
111        self.flags_extended
112            .map(|f| f & 0x4000 != 0)
113            .unwrap_or(false)
114    }
115
116    /// Set the assume-unchanged bit.
117    pub fn set_assume_unchanged(&mut self, value: bool) {
118        if value {
119            self.flags |= 0x8000;
120        } else {
121            self.flags &= !0x8000;
122        }
123    }
124
125    /// Set the skip-worktree bit (promotes entry to v3).
126    pub fn set_skip_worktree(&mut self, value: bool) {
127        let fe = self.flags_extended.get_or_insert(0);
128        if value {
129            *fe |= 0x4000;
130        } else {
131            *fe &= !0x4000;
132            if *fe == 0 {
133                self.flags_extended = None;
134            }
135        }
136    }
137
138    /// Whether the intent-to-add bit is set (extended flags, v3+).
139    #[must_use]
140    pub fn intent_to_add(&self) -> bool {
141        self.flags_extended
142            .map(|f| f & 0x2000 != 0)
143            .unwrap_or(false)
144    }
145
146    /// Set the intent-to-add bit (promotes entry to v3).
147    pub fn set_intent_to_add(&mut self, value: bool) {
148        let fe = self.flags_extended.get_or_insert(0);
149        if value {
150            *fe |= 0x2000;
151        } else {
152            *fe &= !0x2000;
153            if *fe == 0 {
154                self.flags_extended = None;
155            }
156        }
157    }
158
159    /// Sparse-index placeholder: tree mode, stage 0, and `SKIP_WORKTREE` set.
160    #[must_use]
161    pub fn is_sparse_directory_placeholder(&self) -> bool {
162        self.mode == MODE_TREE && self.stage() == 0 && self.skip_worktree()
163    }
164
165    /// In-memory only: `ls-files --with-tree` hides stage-1 overlay rows that duplicate stage 0.
166    const FLAG_EXT_OVERLAY_TREE_SKIP: u16 = 0x8000;
167    /// In-memory and on-disk compatibility bit for fsmonitor validity (`git ls-files -f`).
168    const FLAG_EXT_FSMONITOR_VALID: u16 = 0x1000;
169    /// Extended flags Git persists in index v3 (`CE_EXTENDED_FLAGS` in `read-cache-ll.h`).
170    const FLAG_EXT_ON_DISK: u16 = Self::FLAG_EXT_FSMONITOR_VALID | 0x2000 | 0x4000;
171
172    /// Extended flag bits safe to write to a Git-compatible on-disk index.
173    fn disk_flags_extended(fe: u16) -> u16 {
174        fe & Self::FLAG_EXT_ON_DISK
175    }
176
177    #[must_use]
178    pub fn overlay_tree_skip_output(&self) -> bool {
179        self.flags_extended
180            .is_some_and(|fe| fe & Self::FLAG_EXT_OVERLAY_TREE_SKIP != 0)
181    }
182
183    fn set_overlay_tree_skip_output(&mut self, value: bool) {
184        let fe = self.flags_extended.get_or_insert(0);
185        if value {
186            *fe |= Self::FLAG_EXT_OVERLAY_TREE_SKIP;
187        } else {
188            *fe &= !Self::FLAG_EXT_OVERLAY_TREE_SKIP;
189            if *fe == 0 {
190                self.flags_extended = None;
191            }
192        }
193    }
194
195    /// Whether the fsmonitor-valid bit is set.
196    #[must_use]
197    pub fn fsmonitor_valid(&self) -> bool {
198        self.flags_extended
199            .is_some_and(|fe| fe & Self::FLAG_EXT_FSMONITOR_VALID != 0)
200    }
201
202    /// Set or clear the fsmonitor-valid bit.
203    pub fn set_fsmonitor_valid(&mut self, value: bool) {
204        let fe = self.flags_extended.get_or_insert(0);
205        if value {
206            *fe |= Self::FLAG_EXT_FSMONITOR_VALID;
207        } else {
208            *fe &= !Self::FLAG_EXT_FSMONITOR_VALID;
209            if *fe == 0 {
210                self.flags_extended = None;
211            }
212        }
213    }
214}
215
216/// The in-memory representation of the Git index file.
217#[derive(Debug, Clone, Default)]
218pub struct Index {
219    /// Index format version (2 or 3).
220    pub version: u32,
221    /// Index entries, sorted by (path, stage).
222    pub entries: Vec<IndexEntry>,
223    /// When true, the on-disk index includes the `sdir` extension (sparse index).
224    pub sparse_directories: bool,
225    /// Optional untracked-cache extension (`UNTR`), matching Git's `istate->untracked`.
226    pub untracked_cache: Option<untracked_cache::UntrackedCache>,
227    /// Optional fsmonitor token extension (`FSMN`).
228    pub fsmonitor_last_update: Option<String>,
229    /// Optional `REUC` resolve-undo extension (paths that were unmerged before a resolution).
230    pub resolve_undo: Option<BTreeMap<Vec<u8>, ResolveUndoRecord>>,
231    /// Split index `link` extension (bitmaps cleared after load merge).
232    pub(crate) split_link: Option<crate::split_index::SplitIndexLink>,
233    /// Root tree OID from a valid `TREE` index extension (`cache_tree`), when present.
234    pub cache_tree_root: Option<ObjectId>,
235    /// Parsed `TREE` index extension (`cache-tree`) preserving invalid and subtree nodes.
236    pub cache_tree: Option<CacheTreeNode>,
237}
238
239/// One node from Git's `TREE` index extension.
240#[derive(Debug, Clone, PartialEq, Eq)]
241pub struct CacheTreeNode {
242    /// Path component for this node. The root node stores an empty name.
243    pub name: Vec<u8>,
244    /// Number of index entries covered by this node, or `-1` when invalid.
245    pub entry_count: i32,
246    /// Tree object ID for valid nodes. Invalid nodes do not store an object ID.
247    pub oid: Option<ObjectId>,
248    /// Immediate child cache-tree nodes.
249    pub children: Vec<CacheTreeNode>,
250}
251
252impl CacheTreeNode {
253    /// Create a valid cache-tree node.
254    #[must_use]
255    pub fn valid(
256        name: Vec<u8>,
257        entry_count: i32,
258        oid: ObjectId,
259        children: Vec<CacheTreeNode>,
260    ) -> Self {
261        Self {
262            name,
263            entry_count,
264            oid: Some(oid),
265            children,
266        }
267    }
268
269    /// Mark this node as invalid while preserving its children.
270    pub fn invalidate(&mut self) {
271        self.entry_count = -1;
272        self.oid = None;
273    }
274
275    /// Returns whether this node has a valid cached tree object ID.
276    #[must_use]
277    pub fn is_valid(&self) -> bool {
278        self.entry_count >= 0 && self.oid.is_some()
279    }
280}
281
282/// Options for loading an index from disk.
283#[derive(Debug, Clone, Copy)]
284pub struct IndexLoadOptions {
285    /// If the index contains sparse directory placeholders, expand them to full file entries.
286    pub expand_sparse_directories: bool,
287}
288
289impl Default for IndexLoadOptions {
290    fn default() -> Self {
291        Self {
292            expand_sparse_directories: true,
293        }
294    }
295}
296
297/// Version used after an invalid `GIT_INDEX_VERSION` value (matches Git stderr: "Using version 3").
298const INDEX_ENV_INVALID_FALLBACK: u32 = 3;
299/// Version used after an invalid `index.version` config value (same message as env).
300const INDEX_CONFIG_INVALID_FALLBACK: u32 = 3;
301/// Minimum supported index version.
302const INDEX_FORMAT_LB: u32 = 2;
303/// Maximum supported index version (version 4 requests are accepted and
304/// downgraded on write).
305const INDEX_FORMAT_UB: u32 = 4;
306/// Index extension signature `TREE` (cache-tree).
307const INDEX_EXT_CACHE_TREE: u32 = 0x5452_4545;
308
309/// Best-effort read of Git's `TREE` index extension (`cache_tree_read`).
310fn parse_cache_tree(data: &[u8]) -> Option<CacheTreeNode> {
311    let (node, pos) = parse_cache_tree_node(data, 0)?;
312    if pos == data.len() {
313        Some(node)
314    } else {
315        None
316    }
317}
318
319fn parse_cache_tree_node(data: &[u8], mut pos: usize) -> Option<(CacheTreeNode, usize)> {
320    let name_end = data.get(pos..)?.iter().position(|&b| b == 0)? + pos;
321    let name = data[pos..name_end].to_vec();
322    pos = name_end + 1;
323
324    let (entry_count, consumed) = parse_signed_int_prefix(&data[pos..])?;
325    pos += consumed;
326    if data.get(pos) != Some(&b' ') {
327        return None;
328    }
329    pos += 1;
330    let (subtree_count, consumed) = parse_signed_int_prefix(&data[pos..])?;
331    if subtree_count < 0 {
332        return None;
333    }
334    pos += consumed;
335    if data.get(pos) != Some(&b'\n') {
336        return None;
337    }
338    pos += 1;
339
340    let oid = if entry_count >= 0 {
341        if data.len().saturating_sub(pos) < 20 {
342            return None;
343        }
344        let oid = ObjectId::from_bytes(&data[pos..pos + 20]).ok()?;
345        pos += 20;
346        Some(oid)
347    } else {
348        None
349    };
350
351    let mut children = Vec::with_capacity(subtree_count as usize);
352    for _ in 0..subtree_count {
353        let (child, next) = parse_cache_tree_node(data, pos)?;
354        children.push(child);
355        pos = next;
356    }
357
358    Some((
359        CacheTreeNode {
360            name,
361            entry_count,
362            oid,
363            children,
364        },
365        pos,
366    ))
367}
368
369fn parse_signed_int_prefix(data: &[u8]) -> Option<(i32, usize)> {
370    let mut j = 0usize;
371    while j < data.len() && data[j] == b' ' {
372        j += 1;
373    }
374    let start = j;
375    if j < data.len() && data[j] == b'-' {
376        j += 1;
377    }
378    let digit_start = j;
379    while j < data.len() && data[j].is_ascii_digit() {
380        j += 1;
381    }
382    if j == digit_start {
383        return None;
384    }
385    let s = std::str::from_utf8(&data[start..j]).ok()?;
386    let v: i32 = s.parse().ok()?;
387    Some((v, j))
388}
389
390fn serialize_cache_tree_node(node: &CacheTreeNode, out: &mut Vec<u8>) {
391    out.extend_from_slice(&node.name);
392    out.push(0);
393    out.extend_from_slice(node.entry_count.to_string().as_bytes());
394    out.push(b' ');
395    out.extend_from_slice(node.children.len().to_string().as_bytes());
396    out.push(b'\n');
397    if node.entry_count >= 0 {
398        if let Some(oid) = node.oid {
399            out.extend_from_slice(oid.as_bytes());
400        }
401    }
402    for child in &node.children {
403        serialize_cache_tree_node(child, out);
404    }
405}
406
407fn format_cache_tree_node(node: &CacheTreeNode, parent_path: &str, out: &mut String) {
408    let path = if node.name.is_empty() {
409        String::new()
410    } else {
411        let name = String::from_utf8_lossy(&node.name);
412        format!("{parent_path}{name}/")
413    };
414    if node.is_valid() {
415        if let Some(oid) = node.oid {
416            out.push_str(&format!(
417                "{} {} ({} entries, {} subtrees)\n",
418                oid,
419                path,
420                node.entry_count,
421                node.children.len()
422            ));
423        }
424    } else {
425        out.push_str(&format!(
426            "{:<40} {} ({} subtrees)\n",
427            "invalid",
428            path,
429            node.children.len()
430        ));
431    }
432    for child in &node.children {
433        format_cache_tree_node(child, &path, out);
434    }
435}
436
437/// Emit a single cache-tree node line in `test-tool dump-cache-tree` format.
438fn dump_one_line(node: &CacheTreeNode, pfx: &str, out: &mut String) {
439    if node.entry_count < 0 {
440        out.push_str(&format!(
441            "{:<40} {} ({} subtrees)\n",
442            "invalid",
443            pfx,
444            node.children.len()
445        ));
446    } else {
447        let oid = node.oid.unwrap_or_else(ObjectId::zero);
448        out.push_str(&format!(
449            "{} {} ({} entries, {} subtrees)\n",
450            oid,
451            pfx,
452            node.entry_count,
453            node.children.len()
454        ));
455    }
456}
457
458/// Walk the stored cache-tree (`it`) against a freshly built reference (`ref`),
459/// emitting only the nodes that exist in both — matching Git's `dump_cache_tree`.
460fn dump_cache_tree_pair(
461    it: &CacheTreeNode,
462    reference: &CacheTreeNode,
463    pfx: &str,
464    out: &mut String,
465) {
466    dump_one_line(it, pfx, out);
467
468    for child in &it.children {
469        let Some(ref_child) = reference.children.iter().find(|c| c.name == child.name) else {
470            continue;
471        };
472        let name = String::from_utf8_lossy(&child.name);
473        let child_pfx = format!("{pfx}{name}/");
474        dump_cache_tree_pair(child, ref_child, &child_pfx, out);
475    }
476}
477
478/// Read `GIT_INDEX_VERSION` and return the requested version.
479///
480/// If the environment variable is unset, returns `None`.
481/// If it is set but invalid (non-numeric or out of range 2..=4), prints a
482/// warning to stderr and returns the default version.
483pub fn get_index_format_from_env() -> Option<u32> {
484    let val = std::env::var("GIT_INDEX_VERSION").ok()?;
485    if val.is_empty() {
486        return None;
487    }
488    match val.parse::<u32>() {
489        Ok(v) if (INDEX_FORMAT_LB..=INDEX_FORMAT_UB).contains(&v) => Some(v),
490        _ => {
491            eprintln!(
492                "warning: GIT_INDEX_VERSION set, but the value is invalid.\n\
493                 Using version {INDEX_ENV_INVALID_FALLBACK}"
494            );
495            Some(INDEX_ENV_INVALID_FALLBACK)
496        }
497    }
498}
499
500impl Index {
501    /// Create a new, empty index.
502    ///
503    /// Respects `GIT_INDEX_VERSION` if set, otherwise defaults to version 2.
504    #[must_use]
505    pub fn new() -> Self {
506        let version = get_index_format_from_env().unwrap_or(2);
507        Self {
508            version,
509            entries: Vec::new(),
510            sparse_directories: false,
511            untracked_cache: None,
512            fsmonitor_last_update: None,
513            resolve_undo: None,
514            split_link: None,
515            cache_tree_root: None,
516            cache_tree: None,
517        }
518    }
519
520    /// Create a new, empty index at a fixed version without consulting
521    /// `GIT_INDEX_VERSION` or config.
522    ///
523    /// Unlike [`Self::new`], this never reads the environment and therefore never
524    /// emits the "GIT_INDEX_VERSION set, but the value is invalid" warning. Use it
525    /// for throwaway baseline indexes (e.g. sparse-index diff references) where the
526    /// version is irrelevant and Git resolves the format only once per operation.
527    #[must_use]
528    pub fn empty(version: u32) -> Self {
529        Self {
530            version,
531            entries: Vec::new(),
532            sparse_directories: false,
533            untracked_cache: None,
534            fsmonitor_last_update: None,
535            resolve_undo: None,
536            split_link: None,
537            cache_tree_root: None,
538            cache_tree: None,
539        }
540    }
541
542    /// Create a new empty index, respecting config values for version.
543    ///
544    /// Priority matches Git's `prepare_repo_settings`: `GIT_INDEX_VERSION` env, then
545    /// `feature.manyFiles` (implies version 4), then `index.version` (overrides version).
546    pub fn new_with_config(
547        config_index_version: Option<&str>,
548        config_many_files: Option<&str>,
549    ) -> Self {
550        if let Some(v) = get_index_format_from_env() {
551            return Self {
552                version: v,
553                entries: Vec::new(),
554                sparse_directories: false,
555                untracked_cache: None,
556                fsmonitor_last_update: None,
557                resolve_undo: None,
558                split_link: None,
559                cache_tree_root: None,
560                cache_tree: None,
561            };
562        }
563
564        let many_files = config_truthy(config_many_files);
565        let mut version = if many_files { 4 } else { 2 };
566
567        if let Some(val) = config_index_version {
568            let trimmed = val.trim();
569            if !trimmed.is_empty() {
570                match trimmed.parse::<u32>() {
571                    Ok(v) if (INDEX_FORMAT_LB..=INDEX_FORMAT_UB).contains(&v) => {
572                        version = v;
573                    }
574                    _ => {
575                        eprintln!(
576                            "warning: index.version set, but the value is invalid.\n\
577                             Using version {INDEX_CONFIG_INVALID_FALLBACK}"
578                        );
579                        version = INDEX_CONFIG_INVALID_FALLBACK;
580                    }
581                }
582            }
583        }
584
585        Self {
586            version,
587            entries: Vec::new(),
588            sparse_directories: false,
589            untracked_cache: None,
590            fsmonitor_last_update: None,
591            resolve_undo: None,
592            split_link: None,
593            cache_tree_root: None,
594            cache_tree: None,
595        }
596    }
597
598    /// New empty index using a loaded [`ConfigSet`] (includes `-c` / `GIT_CONFIG_PARAMETERS`).
599    ///
600    /// Same precedence as [`Self::new_with_config`], but reads `feature.manyFiles` and
601    /// `index.version` from `config`.
602    #[must_use]
603    pub fn new_from_config(config: &ConfigSet) -> Self {
604        if let Some(v) = get_index_format_from_env() {
605            return Self {
606                version: v,
607                entries: Vec::new(),
608                sparse_directories: false,
609                untracked_cache: None,
610                fsmonitor_last_update: None,
611                resolve_undo: None,
612                split_link: None,
613                cache_tree_root: None,
614                cache_tree: None,
615            };
616        }
617
618        let many_files = config
619            .get_bool("feature.manyFiles")
620            .and_then(|r| r.ok())
621            .unwrap_or(false);
622        let mut version = if many_files { 4 } else { 2 };
623
624        if let Some(val) = config.get("index.version") {
625            let trimmed = val.trim();
626            if !trimmed.is_empty() {
627                match trimmed.parse::<u32>() {
628                    Ok(v) if (INDEX_FORMAT_LB..=INDEX_FORMAT_UB).contains(&v) => {
629                        version = v;
630                    }
631                    _ => {
632                        eprintln!(
633                            "warning: index.version set, but the value is invalid.\n\
634                             Using version {INDEX_CONFIG_INVALID_FALLBACK}"
635                        );
636                        version = INDEX_CONFIG_INVALID_FALLBACK;
637                    }
638                }
639            }
640        }
641
642        Self {
643            version,
644            entries: Vec::new(),
645            sparse_directories: false,
646            untracked_cache: None,
647            fsmonitor_last_update: None,
648            resolve_undo: None,
649            split_link: None,
650            cache_tree_root: None,
651            cache_tree: None,
652        }
653    }
654
655    /// Load an index from the given file path without expanding sparse-directory placeholders.
656    ///
657    /// Returns an empty index if the file does not exist.
658    ///
659    /// # Errors
660    ///
661    /// Returns [`Error::IndexError`] if the file is present but corrupt.
662    pub fn load(path: &Path) -> Result<Self> {
663        match fs::read(path) {
664            Ok(data) => Self::parse(&data),
665            Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(Self {
666                sparse_directories: false,
667                ..Self::new()
668            }),
669            Err(e) => Err(Error::Io(e)),
670        }
671    }
672
673    /// Load an index and expand sparse-directory placeholders using the object database.
674    ///
675    /// After a successful return, [`Index::sparse_directories`] is cleared and every
676    /// placeholder is replaced by the blob entries from the referenced tree.
677    pub fn load_expand_sparse(path: &Path, odb: &Odb) -> Result<Self> {
678        let mut idx = Self::load(path)?;
679        idx.expand_sparse_directory_placeholders(odb)?;
680        Ok(idx)
681    }
682
683    /// Like [`Index::load_expand_sparse`], but treats a missing index or Git's
684    /// `"file too short"` placeholder as an empty index.
685    pub fn load_expand_sparse_optional(path: &Path, odb: &Odb) -> Result<Self> {
686        let mut idx = match fs::read(path) {
687            Ok(data) => Self::parse(&data).or_else(|e| match e {
688                Error::IndexError(msg) if msg == "file too short" => Ok(Self::new()),
689                other => Err(other),
690            })?,
691            Err(e) if e.kind() == io::ErrorKind::NotFound => Self::new(),
692            Err(e) => return Err(Error::Io(e)),
693        };
694        idx.expand_sparse_directory_placeholders(odb)?;
695        Ok(idx)
696    }
697
698    /// Returns true if the index contains sparse-index tree placeholders (`MODE_TREE` + skip-worktree).
699    #[must_use]
700    pub fn has_sparse_directory_placeholders(&self) -> bool {
701        self.entries
702            .iter()
703            .any(IndexEntry::is_sparse_directory_placeholder)
704    }
705
706    /// Replace sparse-directory placeholder entries with all blob paths from their trees.
707    ///
708    /// Each placeholder must reference a tree object. New entries are marked skip-worktree like Git's
709    /// expanded index, except we keep `sparse_directories` false in memory after expansion.
710    pub fn expand_sparse_directory_placeholders(&mut self, odb: &Odb) -> Result<()> {
711        if !self.has_sparse_directory_placeholders() {
712            return Ok(());
713        }
714        let mut out: Vec<IndexEntry> = Vec::with_capacity(self.entries.len());
715        for entry in self.entries.drain(..) {
716            if entry.is_sparse_directory_placeholder() {
717                let prefix = trim_trailing_slash_bytes(&entry.path);
718                let blobs = flatten_tree_blobs(odb, &entry.oid, prefix)?;
719                out.extend(blobs);
720            } else {
721                out.push(entry);
722            }
723        }
724        self.entries = out;
725        self.sparse_directories = false;
726        self.sort();
727        Ok(())
728    }
729
730    /// Collapse consecutive skip-worktree subtrees into sparse-directory placeholders when
731    /// `cone_mode` is true and each directory is outside the sparse cone.
732    ///
733    /// `head_tree` is the tree OID at `HEAD`. When `enable_sparse_index` is false, clears
734    /// [`Index::sparse_directories`] and returns without collapsing.
735    pub fn try_collapse_sparse_directories(
736        &mut self,
737        odb: &Odb,
738        head_tree: &ObjectId,
739        patterns: &[String],
740        cone_mode: bool,
741        enable_sparse_index: bool,
742    ) -> Result<()> {
743        if !enable_sparse_index || !cone_mode {
744            self.sparse_directories = false;
745            return Ok(());
746        }
747
748        let mut prefixes = BTreeSet::<Vec<u8>>::new();
749        for e in &self.entries {
750            if e.stage() != 0 || e.mode == MODE_TREE || !e.skip_worktree() {
751                continue;
752            }
753            collect_directory_prefixes(&e.path, &mut prefixes);
754        }
755
756        let mut collapsed_any = false;
757        // Deepest prefixes first so nested dirs collapse before parents.
758        let mut ordered: Vec<Vec<u8>> = prefixes.into_iter().collect();
759        ordered.sort_by_key(|p| std::cmp::Reverse(p.len()));
760
761        for pref in ordered {
762            let pref_str = String::from_utf8_lossy(&pref);
763            if directory_in_cone(&pref_str, patterns, cone_mode) {
764                continue;
765            }
766            let Some(subtree_oid) = tree_oid_for_prefix(odb, head_tree, &pref)? else {
767                continue;
768            };
769            let expected = collect_sparse_aware_expected_blobs(
770                odb,
771                &subtree_oid,
772                &pref,
773                patterns,
774                cone_mode,
775                &self.entries,
776            )?;
777            if expected.is_empty() {
778                continue;
779            }
780            let mut matched = Vec::new();
781            for e in &self.entries {
782                if e.stage() != 0 {
783                    continue;
784                }
785                if path_under_prefix(&e.path, &pref) && e.mode != MODE_TREE {
786                    matched.push(e.clone());
787                }
788            }
789            if matched.len() != expected.len() {
790                continue;
791            }
792            matched.sort_by(|a, b| a.path.cmp(&b.path));
793            let mut exp_sorted = expected;
794            exp_sorted.sort_by(|a, b| a.path.cmp(&b.path));
795            if !matched
796                .iter()
797                .zip(exp_sorted.iter())
798                .all(|(a, b)| a.path == b.path && a.oid == b.oid && a.mode == b.mode)
799            {
800                continue;
801            }
802            if !matched.iter().all(|e| e.skip_worktree()) {
803                continue;
804            }
805            // Git's convert_to_sparse_rec refuses to collapse a directory that contains a
806            // submodule (gitlink); the sparse-directory entry could not faithfully represent
807            // the gitlink's committed OID. Leave such directories expanded.
808            if matched.iter().any(|e| e.mode == MODE_GITLINK)
809                || exp_sorted.iter().any(|e| e.mode == MODE_GITLINK)
810            {
811                continue;
812            }
813
814            let mut path_with_slash = pref.clone();
815            if !path_with_slash.ends_with(b"/") {
816                path_with_slash.push(b'/');
817            }
818            self.entries
819                .retain(|e| e.stage() != 0 || !path_under_prefix(&e.path, &pref));
820            let mut placeholder = IndexEntry {
821                ctime_sec: 0,
822                ctime_nsec: 0,
823                mtime_sec: 0,
824                mtime_nsec: 0,
825                dev: 0,
826                ino: 0,
827                mode: MODE_TREE,
828                uid: 0,
829                gid: 0,
830                size: 0,
831                oid: subtree_oid,
832                flags: path_with_slash.len().min(0xFFF) as u16,
833                flags_extended: Some(0),
834                path: path_with_slash,
835                base_index_pos: 0,
836            };
837            placeholder.set_skip_worktree(true);
838            self.add_or_replace(placeholder);
839            collapsed_any = true;
840        }
841
842        if collapsed_any {
843            self.sort();
844            self.sparse_directories = true;
845        } else {
846            self.sparse_directories = false;
847        }
848        Ok(())
849    }
850
851    /// Parse index bytes (the whole file including trailing SHA-1).
852    ///
853    /// # Errors
854    ///
855    /// Returns [`Error::IndexError`] on structural problems.
856    pub fn parse(data: &[u8]) -> Result<Self> {
857        if data.len() < 12 {
858            return Err(Error::IndexError("file too short".to_owned()));
859        }
860
861        // Trailing SHA-1: normal index is a hash of the body; Git may write all zeros when
862        // `index.skipHash` / `feature.manyFiles` skips computing the checksum.
863        let (body, checksum) = data.split_at(data.len() - 20);
864        if !checksum.iter().all(|&b| b == 0) {
865            let mut hasher = Sha1::new();
866            hasher.update(body);
867            let computed = hasher.finalize();
868            if computed.as_slice() != checksum {
869                return Err(Error::IndexError("SHA-1 checksum mismatch".to_owned()));
870            }
871        }
872
873        // Header
874        let magic = &body[..4];
875        if magic != b"DIRC" {
876            return Err(Error::IndexError("bad magic: expected DIRC".to_owned()));
877        }
878        let version = u32::from_be_bytes(
879            body[4..8]
880                .try_into()
881                .map_err(|_| Error::IndexError("cannot read version".to_owned()))?,
882        );
883        if version != 2 && version != 3 && version != 4 {
884            return Err(Error::IndexError(format!(
885                "unsupported index version {version}"
886            )));
887        }
888        let count = u32::from_be_bytes(
889            body[8..12]
890                .try_into()
891                .map_err(|_| Error::IndexError("cannot read entry count".to_owned()))?,
892        );
893
894        let mut pos = 12usize;
895        let mut entries = Vec::with_capacity(count as usize);
896
897        let mut prev_path: Vec<u8> = Vec::new();
898        for _ in 0..count {
899            let (entry, consumed) = parse_entry(&body[pos..], version, &prev_path)?;
900            prev_path = entry.path.clone();
901            entries.push(entry);
902            pos += consumed;
903        }
904
905        let mut sparse_directories = false;
906        let mut untracked_cache = None;
907        let mut fsmonitor_last_update = None;
908        let mut resolve_undo = None;
909        let mut split_link = None;
910        let mut cache_tree_root = None;
911        let mut cache_tree = None;
912        while pos + 8 <= body.len() {
913            let sig = u32::from_be_bytes(
914                body[pos..pos + 4]
915                    .try_into()
916                    .map_err(|_| Error::IndexError("truncated extension sig".to_owned()))?,
917            );
918            let ext_sz = u32::from_be_bytes(
919                body[pos + 4..pos + 8]
920                    .try_into()
921                    .map_err(|_| Error::IndexError("truncated extension size".to_owned()))?,
922            ) as usize;
923            pos += 8;
924            if pos + ext_sz > body.len() {
925                return Err(Error::IndexError(
926                    "extension overruns index body".to_owned(),
927                ));
928            }
929            if sig == INDEX_EXT_SPARSE_DIRECTORIES {
930                sparse_directories = true;
931            } else if sig == INDEX_EXT_UNTRACKED {
932                let ext_data = &body[pos..pos + ext_sz];
933                untracked_cache = untracked_cache::parse_untracked_extension(ext_data);
934            } else if sig == INDEX_EXT_FSMONITOR {
935                let ext_data = &body[pos..pos + ext_sz];
936                let token_bytes = if let Some(nul) = ext_data.iter().position(|&b| b == 0) {
937                    &ext_data[..nul]
938                } else {
939                    ext_data
940                };
941                fsmonitor_last_update = Some(String::from_utf8_lossy(token_bytes).into_owned());
942            } else if sig == INDEX_EXT_RESOLVE_UNDO {
943                let ext_data = &body[pos..pos + ext_sz];
944                resolve_undo = Some(resolve_undo::parse_resolve_undo_payload(ext_data)?);
945            } else if sig == INDEX_EXT_LINK {
946                let ext_data = &body[pos..pos + ext_sz];
947                split_link = Some(crate::split_index::parse_link_extension(ext_data)?);
948            } else if sig == INDEX_EXT_CACHE_TREE {
949                let ext_data = &body[pos..pos + ext_sz];
950                cache_tree = parse_cache_tree(ext_data);
951                cache_tree_root = cache_tree
952                    .as_ref()
953                    .and_then(|node| node.oid.filter(|_| node.entry_count >= 0));
954            }
955            pos += ext_sz;
956        }
957        if pos != body.len() {
958            return Err(Error::IndexError("junk after index extensions".to_owned()));
959        }
960
961        Ok(Self {
962            version,
963            entries,
964            sparse_directories,
965            untracked_cache,
966            fsmonitor_last_update,
967            resolve_undo,
968            split_link,
969            cache_tree_root,
970            cache_tree,
971        })
972    }
973
974    /// Write the index to a file, computing and appending the trailing SHA-1.
975    ///
976    /// # Errors
977    ///
978    /// Returns [`Error::Io`] on filesystem errors.
979    pub fn write(&self, path: &Path) -> Result<()> {
980        let git_dir = path.parent();
981        let config = git_dir.and_then(|d| ConfigSet::load(Some(d), true).ok());
982        let skip_hash = index_skip_hash_for_write(config.as_ref());
983        self.write_to_path(path, skip_hash)
984    }
985
986    /// Write this index to `path` with an explicit trailing-checksum policy.
987    ///
988    /// When `skip_hash` is true, the trailing SHA-1 is written as all zeros (Git `index.skipHash`).
989    pub fn write_to_path(&self, path: &Path, skip_hash: bool) -> Result<()> {
990        let mut body = Vec::new();
991        // Fast path: entries loaded from disk (or maintained via `add_or_replace`) are already in
992        // canonical order; serializing from `&self` skips a full clone of every entry. The
993        // comparator must stay identical to [`Index::sort`] (path, then stage) — format v4 path
994        // compression depends on it.
995        let already_sorted = self
996            .entries
997            .is_sorted_by(|a, b| (&a.path, a.stage()) <= (&b.path, b.stage()));
998        if already_sorted {
999            self.serialize_into(&mut body)?;
1000        } else {
1001            let mut sorted = self.clone();
1002            sorted.sort();
1003            sorted.serialize_into(&mut body)?;
1004        }
1005
1006        let checksum: [u8; 20] = if skip_hash {
1007            [0u8; 20]
1008        } else {
1009            let mut hasher = Sha1::new();
1010            hasher.update(&body);
1011            hasher.finalize().into()
1012        };
1013
1014        let tmp_path = path.with_extension("lock");
1015        let pid_path = pid_path_for_lock(&tmp_path);
1016        let lockfile_pid_enabled = lockfile_pid_enabled(path);
1017
1018        let mut lock_file = match fs::OpenOptions::new()
1019            .write(true)
1020            .create_new(true)
1021            .open(&tmp_path)
1022        {
1023            Ok(file) => file,
1024            Err(e) if e.kind() == io::ErrorKind::AlreadyExists => {
1025                let message = build_lock_exists_message(&tmp_path, &pid_path, &e);
1026                return Err(Error::Io(io::Error::new(
1027                    io::ErrorKind::AlreadyExists,
1028                    message,
1029                )));
1030            }
1031            Err(e) => return Err(Error::Io(e)),
1032        };
1033
1034        let mut wrote_pid_file = false;
1035        if lockfile_pid_enabled {
1036            if let Err(e) = write_lock_pid_file(&pid_path) {
1037                let _ = fs::remove_file(&tmp_path);
1038                return Err(Error::Io(e));
1039            }
1040            wrote_pid_file = true;
1041        }
1042
1043        if let Err(e) = (|| -> io::Result<()> {
1044            lock_file.write_all(&body)?;
1045            lock_file.write_all(&checksum)?;
1046            Ok(())
1047        })() {
1048            let _ = fs::remove_file(&tmp_path);
1049            if wrote_pid_file {
1050                let _ = fs::remove_file(&pid_path);
1051            }
1052            return Err(Error::Io(e));
1053        }
1054        drop(lock_file);
1055
1056        if let Err(e) = fs::rename(&tmp_path, path) {
1057            let _ = fs::remove_file(&tmp_path);
1058            if wrote_pid_file {
1059                let _ = fs::remove_file(&pid_path);
1060            }
1061            return Err(Error::Io(e));
1062        }
1063        {
1064            if wrote_pid_file {
1065                let _ = fs::remove_file(&pid_path);
1066            }
1067        }
1068        Ok(())
1069    }
1070
1071    /// Serialise the index body (without trailing checksum) into `out`.
1072    ///
1073    /// Callers must have sorted entries when using format 4 (path compression depends on order).
1074    pub(crate) fn serialize_into(&self, out: &mut Vec<u8>) -> Result<()> {
1075        let has_extended_flags = self.entries.iter().any(|e| e.flags_extended.is_some());
1076        let write_version = if self.version >= 4 {
1077            4
1078        } else if has_extended_flags {
1079            3
1080        } else if self.version >= 3 {
1081            2
1082        } else {
1083            self.version
1084        };
1085        // Header
1086        out.extend_from_slice(b"DIRC");
1087        out.extend_from_slice(&write_version.to_be_bytes());
1088        out.extend_from_slice(&(self.entries.len() as u32).to_be_bytes());
1089
1090        if write_version == 4 {
1091            let mut previous_path: Vec<u8> = Vec::new();
1092            for entry in &self.entries {
1093                serialize_entry_v4(entry, &mut previous_path, out);
1094            }
1095        } else {
1096            for entry in &self.entries {
1097                serialize_entry(entry, write_version, out);
1098            }
1099        }
1100        if self.sparse_directories {
1101            out.extend_from_slice(&INDEX_EXT_SPARSE_DIRECTORIES.to_be_bytes());
1102            out.extend_from_slice(&0u32.to_be_bytes());
1103        }
1104        if let Some(uc) = &self.untracked_cache {
1105            let payload = untracked_cache::write_untracked_extension(uc);
1106            out.extend_from_slice(&INDEX_EXT_UNTRACKED.to_be_bytes());
1107            out.extend_from_slice(&(payload.len() as u32).to_be_bytes());
1108            out.extend_from_slice(&payload);
1109        }
1110        if let Some(token) = &self.fsmonitor_last_update {
1111            let mut payload = token.as_bytes().to_vec();
1112            payload.push(0);
1113            out.extend_from_slice(&INDEX_EXT_FSMONITOR.to_be_bytes());
1114            out.extend_from_slice(&(payload.len() as u32).to_be_bytes());
1115            out.extend_from_slice(&payload);
1116        }
1117        if let Some(ru) = &self.resolve_undo {
1118            let payload = write_resolve_undo_payload(ru);
1119            if !payload.is_empty() {
1120                out.extend_from_slice(&INDEX_EXT_RESOLVE_UNDO.to_be_bytes());
1121                out.extend_from_slice(&(payload.len() as u32).to_be_bytes());
1122                out.extend_from_slice(&payload);
1123            }
1124        }
1125        if let Some(sl) = &self.split_link {
1126            use crate::ewah_bitmap::EwahBitmap;
1127            let del = sl
1128                .delete_bitmap
1129                .as_ref()
1130                .cloned()
1131                .unwrap_or_else(EwahBitmap::new);
1132            let rep = sl
1133                .replace_bitmap
1134                .as_ref()
1135                .cloned()
1136                .unwrap_or_else(EwahBitmap::new);
1137            let payload =
1138                crate::split_index::serialize_link_extension_payload(&sl.base_oid, &del, &rep);
1139            out.extend_from_slice(&INDEX_EXT_LINK.to_be_bytes());
1140            out.extend_from_slice(&(payload.len() as u32).to_be_bytes());
1141            out.extend_from_slice(&payload);
1142        }
1143        if let Some(cache_tree) = &self.cache_tree {
1144            let mut payload = Vec::new();
1145            serialize_cache_tree_node(cache_tree, &mut payload);
1146            out.extend_from_slice(&INDEX_EXT_CACHE_TREE.to_be_bytes());
1147            out.extend_from_slice(&(payload.len() as u32).to_be_bytes());
1148            out.extend_from_slice(&payload);
1149        }
1150        Ok(())
1151    }
1152
1153    /// Add or replace an entry (matched by path + stage).
1154    pub fn add_or_replace(&mut self, entry: IndexEntry) {
1155        let path = entry.path.clone();
1156        let stage = entry.stage();
1157        let mut inserted_stage0 = false;
1158        // Binary search for the insertion point by (path, stage)
1159        let result = self.entries.binary_search_by(|e| {
1160            e.path
1161                .as_slice()
1162                .cmp(path.as_slice())
1163                .then_with(|| e.stage().cmp(&stage))
1164        });
1165        match result {
1166            Ok(pos) => {
1167                // Preserve split-index row binding across refresh/replace (Git `ce->index`).
1168                let mut e = entry;
1169                e.base_index_pos = self.entries[pos].base_index_pos;
1170                self.entries[pos] = e;
1171            }
1172            Err(pos) => {
1173                // Not found — insert at sorted position
1174                self.entries.insert(pos, entry);
1175                inserted_stage0 = stage == 0;
1176            }
1177        }
1178        if inserted_stage0 {
1179            if let Ok(p) = std::str::from_utf8(&path) {
1180                self.invalidate_untracked_cache_for_path(p);
1181            }
1182        }
1183        if stage == 0 {
1184            self.invalidate_cache_tree_for_path(&path);
1185        }
1186    }
1187
1188    /// Stage a file at stage 0, removing any conflict stage entries (1, 2, 3)
1189    /// for the same path. This is the correct behavior for `git add` on a
1190    /// conflicted file during merge/cherry-pick resolution.
1191    pub fn stage_file(&mut self, entry: IndexEntry) {
1192        let path = entry.path.clone();
1193        for e in &self.entries {
1194            if e.path == path && e.stage() != 0 {
1195                resolve_undo::record_resolve_undo_for_entry(&mut self.resolve_undo, e);
1196            }
1197        }
1198        // Remove conflict stages first
1199        self.entries.retain(|e| e.path != path || e.stage() == 0);
1200        // Then add/replace stage-0 entry
1201        self.add_or_replace(entry);
1202    }
1203
1204    /// Drop all resolve-undo records (matches Git `resolve_undo_clear_index`).
1205    pub fn clear_resolve_undo(&mut self) {
1206        self.resolve_undo = None;
1207    }
1208
1209    /// Remove and return the resolve-undo record for `path`, if any.
1210    pub fn take_resolve_undo_record(&mut self, path: &[u8]) -> Option<ResolveUndoRecord> {
1211        let map = self.resolve_undo.as_mut()?;
1212        let ru = map.remove(path)?;
1213        if map.is_empty() {
1214            self.resolve_undo = None;
1215        }
1216        Some(ru)
1217    }
1218
1219    /// Replace all index entries for `path` with unmerged stages from `record`.
1220    pub fn install_unmerged_from_resolve_undo(&mut self, path: &[u8], record: &ResolveUndoRecord) {
1221        self.entries.retain(|e| e.path != path);
1222        for stage in 1u8..=3u8 {
1223            let i = (stage - 1) as usize;
1224            if record.modes[i] == 0 {
1225                continue;
1226            }
1227            let entry = IndexEntry {
1228                ctime_sec: 0,
1229                ctime_nsec: 0,
1230                mtime_sec: 0,
1231                mtime_nsec: 0,
1232                dev: 0,
1233                ino: 0,
1234                mode: record.modes[i],
1235                uid: 0,
1236                gid: 0,
1237                size: 0,
1238                oid: record.oids[i],
1239                flags: path.len().min(0xFFF) as u16 | ((stage as u16) << 12),
1240                flags_extended: None,
1241                path: path.to_vec(),
1242                base_index_pos: 0,
1243            };
1244            self.add_or_replace(entry);
1245        }
1246        self.sort();
1247    }
1248
1249    /// Re-create unmerged index entries for `path` from the resolve-undo extension.
1250    ///
1251    /// Returns `true` when a resolve-undo record existed and was consumed (Git `unmerge_one`).
1252    pub fn unmerge_path_from_resolve_undo(&mut self, path: &[u8]) -> bool {
1253        let Some(record) = self.take_resolve_undo_record(path) else {
1254            return false;
1255        };
1256        self.install_unmerged_from_resolve_undo(path, &record);
1257        // Git's `unmerge_index_entry` swaps the resolved stage-0 entry for unmerged
1258        // stages 1/2/3 via `remove_index_entry_at` + `add_index_entry`, both of which
1259        // call `cache_tree_invalidate_path`. After committing the resolution the index
1260        // carries a valid TREE extension whose stage-0 entry for `path` no longer
1261        // matches; without invalidating it, the next write-tree trips
1262        // `verify_cache_tree` ("corrupted cache-tree has entries not present in index").
1263        if let Ok(p) = std::str::from_utf8(path) {
1264            self.invalidate_untracked_cache_for_path(p);
1265        }
1266        self.invalidate_cache_tree_for_path(path);
1267        true
1268    }
1269
1270    /// Remove all entries matching the given path (all stages).
1271    ///
1272    /// Returns `true` if at least one entry was removed.
1273    pub fn remove(&mut self, path: &[u8]) -> bool {
1274        let mut removed_any = false;
1275        for e in &self.entries {
1276            if e.path == path {
1277                if e.stage() != 0 {
1278                    resolve_undo::record_resolve_undo_for_entry(&mut self.resolve_undo, e);
1279                }
1280                removed_any = true;
1281            }
1282        }
1283        if !removed_any {
1284            return false;
1285        }
1286        self.entries.retain(|e| e.path != path);
1287        if let Ok(p) = std::str::from_utf8(path) {
1288            self.invalidate_untracked_cache_for_path(p);
1289        }
1290        self.invalidate_cache_tree_for_path(path);
1291        true
1292    }
1293
1294    /// Remove every index entry for `path` (all merge stages), like `remove_file_from_index`.
1295    ///
1296    /// Returns whether any entry was removed.
1297    pub fn remove_path_all_stages(&mut self, path: &[u8]) -> bool {
1298        self.remove(path)
1299    }
1300
1301    /// Invalidate UNTR nodes affected by an index change (Git `untracked_cache_*_index`).
1302    pub fn invalidate_untracked_cache_for_path(&mut self, path: &str) {
1303        if let Some(uc) = self.untracked_cache.as_mut() {
1304            untracked_cache::invalidate_path(uc, path);
1305        }
1306    }
1307
1308    /// Remove every index entry whose path lies strictly under `path` (all stages).
1309    ///
1310    /// Used when staging a file at `path` that replaces a former directory: Git removes
1311    /// tracked paths like `path/child` from the index so they do not remain alongside
1312    /// the new blob entry.
1313    pub fn remove_descendants_under_path(&mut self, path: &str) {
1314        let prefix = path.as_bytes();
1315        if prefix.is_empty() {
1316            return;
1317        }
1318        let plen = prefix.len();
1319        let had_descendant = self.entries.iter().any(|e| {
1320            let ep = e.path.as_slice();
1321            ep.len() > plen && ep.starts_with(prefix) && ep[plen] == b'/'
1322        });
1323        for e in self.entries.iter() {
1324            let ep = e.path.as_slice();
1325            if ep.len() > plen && ep.starts_with(prefix) && ep[plen] == b'/' && e.stage() != 0 {
1326                resolve_undo::record_resolve_undo_for_entry(&mut self.resolve_undo, e);
1327            }
1328        }
1329        self.entries.retain(|e| {
1330            let ep = e.path.as_slice();
1331            if ep.len() <= plen {
1332                return true;
1333            }
1334            if !ep.starts_with(prefix) {
1335                return true;
1336            }
1337            // Drop paths strictly under `prefix/` (keep same-length prefix matches like "d-other").
1338            ep[plen] != b'/'
1339        });
1340        if had_descendant {
1341            self.invalidate_untracked_cache_for_path(path);
1342            self.invalidate_cache_tree_for_path(path.as_bytes());
1343        }
1344    }
1345
1346    /// Replace the cache-tree extension with a valid tree.
1347    pub fn set_cache_tree(&mut self, cache_tree: CacheTreeNode) {
1348        self.cache_tree_root = cache_tree.oid.filter(|_| cache_tree.entry_count >= 0);
1349        self.cache_tree = Some(cache_tree);
1350    }
1351
1352    /// Remove the cache-tree extension.
1353    pub fn clear_cache_tree(&mut self) {
1354        self.cache_tree_root = None;
1355        self.cache_tree = None;
1356    }
1357
1358    /// Mark cache-tree nodes affected by an index path change as invalid.
1359    ///
1360    /// Mirrors Git's `do_invalidate_path` (`cache-tree.c`): each ancestor node along `path`
1361    /// is invalidated (`entry_count = -1`), and when the final path component names an existing
1362    /// **subtree** node, that subtree is removed entirely. The removal is essential for the
1363    /// directory→file transition (e.g. tracked dir `a/b/` replaced by file `a/b`): without it,
1364    /// stale descendant nodes (`a/b/c`, …) keep their positive `entry_count` and later trip
1365    /// [`crate::write_tree::verify_cache_tree`] with "corrupted cache-tree has entries not present
1366    /// in index".
1367    pub fn invalidate_cache_tree_for_path(&mut self, path: &[u8]) {
1368        let Some(root) = self.cache_tree.as_mut() else {
1369            self.cache_tree_root = None;
1370            return;
1371        };
1372        self.cache_tree_root = None;
1373        Self::do_invalidate_cache_tree_path(root, path);
1374    }
1375
1376    /// Recursive worker for [`Self::invalidate_cache_tree_for_path`], mirroring
1377    /// Git's `do_invalidate_path`.
1378    fn do_invalidate_cache_tree_path(node: &mut CacheTreeNode, path: &[u8]) {
1379        node.invalidate();
1380        let slash = path.iter().position(|&b| b == b'/');
1381        match slash {
1382            // Final component: drop the matching subtree node, if any.
1383            None => {
1384                if !path.is_empty() {
1385                    node.children.retain(|child| child.name != path);
1386                }
1387            }
1388            // Interior component: descend into the named subtree and recurse on the rest.
1389            Some(idx) => {
1390                let (component, rest) = (&path[..idx], &path[idx + 1..]);
1391                if let Some(child) = node
1392                    .children
1393                    .iter_mut()
1394                    .find(|child| child.name == component)
1395                {
1396                    Self::do_invalidate_cache_tree_path(child, rest);
1397                }
1398            }
1399        }
1400    }
1401
1402    /// Format the parsed cache-tree extension like Git's `test-tool dump-cache-tree`.
1403    #[must_use]
1404    pub fn format_cache_tree_dump(&self) -> String {
1405        let Some(root) = self.cache_tree.as_ref() else {
1406            return String::new();
1407        };
1408        let mut out = String::new();
1409        format_cache_tree_node(root, "", &mut out);
1410        out
1411    }
1412
1413    /// Produce `test-tool dump-cache-tree` output for this index.
1414    ///
1415    /// Mirrors Git's `cmd__dump_cache_tree`: the stored cache-tree (`it`) is
1416    /// compared against a freshly computed reference (`ref`) built from the
1417    /// current index entries with `WRITE_TREE_DRY_RUN`. Only nodes present in
1418    /// both trees are dumped; the `#(ref)` divergence lines that Git would emit
1419    /// are filtered out by the harness, so they are intentionally omitted here.
1420    ///
1421    /// If the index has no stored cache-tree, output is empty (Git dumps nothing
1422    /// because `it` is NULL).
1423    ///
1424    /// # Errors
1425    ///
1426    /// Returns an error if the reference cache-tree cannot be built (for example,
1427    /// if a tree object cannot be written to the object database).
1428    pub fn dump_cache_tree(&self, odb: &Odb) -> Result<String> {
1429        let Some(it_root) = self.cache_tree.as_ref() else {
1430            return Ok(String::new());
1431        };
1432        let ref_root = crate::write_tree::build_cache_tree_from_index(odb, self)?;
1433        let mut out = String::new();
1434        dump_cache_tree_pair(it_root, &ref_root, "", &mut out);
1435        Ok(out)
1436    }
1437
1438    /// Sort entries in Git's canonical order: by path, then by stage.
1439    pub fn sort(&mut self) {
1440        self.entries
1441            .sort_by(|a, b| a.path.cmp(&b.path).then_with(|| a.stage().cmp(&b.stage())));
1442    }
1443
1444    /// Collapse duplicate `(path, stage)` entries, keeping the **last** one in current order.
1445    ///
1446    /// A valid Git index never holds two entries with the same path and stage; `add_index_entry`
1447    /// replaces an existing same-name entry in place. When grit builds an index by flattening a
1448    /// tree that has duplicate path entries (see `t4058-diff-duplicates`), the naive flatten yields
1449    /// several identical-path entries. This restores Git's invariant by keeping the last entry for
1450    /// each `(path, stage)` — matching `add_index_entry`'s replace-on-collision semantics where the
1451    /// final tree entry for a path wins.
1452    pub fn dedup_paths_keep_last(&mut self) {
1453        let mut seen: std::collections::HashSet<(Vec<u8>, u8)> = std::collections::HashSet::new();
1454        let mut kept: Vec<IndexEntry> = Vec::with_capacity(self.entries.len());
1455        // Walk in reverse so the *last* occurrence of each (path, stage) is the one retained.
1456        for entry in self.entries.iter().rev() {
1457            if seen.insert((entry.path.clone(), entry.stage())) {
1458                kept.push(entry.clone());
1459            }
1460        }
1461        kept.reverse();
1462        self.entries = kept;
1463    }
1464
1465    /// OID of the shared index when this index uses split-index mode (`link` extension).
1466    #[must_use]
1467    pub fn split_index_base_oid(&self) -> Option<ObjectId> {
1468        self.split_link.as_ref().map(|l| l.base_oid)
1469    }
1470
1471    /// Find an entry by path and stage (0 for normal entries).
1472    #[must_use]
1473    pub fn get(&self, path: &[u8], stage: u8) -> Option<&IndexEntry> {
1474        self.entries
1475            .iter()
1476            .find(|e| e.path == path && e.stage() == stage)
1477    }
1478
1479    /// Find a mutable entry by path and stage.
1480    pub fn get_mut(&mut self, path: &[u8], stage: u8) -> Option<&mut IndexEntry> {
1481        self.entries
1482            .iter_mut()
1483            .find(|e| e.path == path && e.stage() == stage)
1484    }
1485
1486    /// Merge tree contents from `treeish` into this index as virtual stage-1 entries, matching
1487    /// Git's `overlay_tree_on_index` used by `git ls-files --with-tree`.
1488    ///
1489    /// Existing unmerged entries (stages 1–3) are shifted to stage 3 so stage 1 is free for the
1490    /// overlay. Stage-1 paths that already exist at stage 0 are marked so `ls-files` can skip
1491    /// them (Git's `CE_UPDATE` on the stage-1 entry).
1492    ///
1493    /// # Parameters
1494    ///
1495    /// - `repo` — repository whose object database is used to read the tree.
1496    /// - `treeish` — revision or tree OID string (`HEAD`, `HEAD~1`, full SHA, etc.).
1497    /// - `prefix` — optional path prefix (bytes, no trailing slash except empty); only paths under
1498    ///   this prefix are considered from the tree. Pass empty slice for the full tree.
1499    ///
1500    /// # Errors
1501    ///
1502    /// Returns [`Error`] if `treeish` cannot be resolved, the tree cannot be read, or an object is
1503    /// missing from the ODB.
1504    pub fn overlay_tree_on_index(
1505        &mut self,
1506        repo: &Repository,
1507        treeish: &str,
1508        prefix: &[u8],
1509    ) -> Result<()> {
1510        let oid = rev_parse::resolve_revision(repo, treeish)?;
1511        let tree_oid = peel_to_tree_oid(repo, oid)?;
1512        for e in self.entries.iter_mut() {
1513            if e.stage() != 0 {
1514                e.set_stage(3);
1515            }
1516        }
1517        self.sort();
1518        let has_stage1 = self.entries.iter().any(|e| e.stage() == 1);
1519        let mut appended: Vec<IndexEntry> = Vec::new();
1520        read_tree_into_overlay(repo, &tree_oid, prefix, &[], has_stage1, &mut appended)?;
1521        for e in appended {
1522            self.add_or_replace(e);
1523        }
1524        if !has_stage1 {
1525            self.sort();
1526        }
1527        let mut last_stage0: Option<&[u8]> = None;
1528        for e in &mut self.entries {
1529            match e.stage() {
1530                0 => {
1531                    last_stage0 = Some(e.path.as_slice());
1532                }
1533                1 if last_stage0.is_some_and(|p| p == e.path.as_slice()) => {
1534                    e.set_overlay_tree_skip_output(true);
1535                }
1536                _ => {}
1537            }
1538        }
1539        Ok(())
1540    }
1541}
1542
1543fn peel_to_tree_oid(repo: &Repository, oid: ObjectId) -> Result<ObjectId> {
1544    let obj = repo.odb.read(&oid)?;
1545    match obj.kind {
1546        ObjectKind::Tree => Ok(oid),
1547        ObjectKind::Commit => {
1548            let commit = crate::objects::parse_commit(&obj.data)?;
1549            Ok(commit.tree)
1550        }
1551        ObjectKind::Tag => {
1552            let tag = crate::objects::parse_tag(&obj.data)?;
1553            peel_to_tree_oid(repo, tag.object)
1554        }
1555        _ => Err(Error::ObjectNotFound(format!(
1556            "cannot peel {oid} to tree for --with-tree"
1557        ))),
1558    }
1559}
1560
1561fn read_tree_into_overlay(
1562    repo: &Repository,
1563    tree_oid: &ObjectId,
1564    prefix: &[u8],
1565    rel_base: &[u8],
1566    use_replace_path: bool,
1567    out: &mut Vec<IndexEntry>,
1568) -> Result<()> {
1569    let obj = repo.odb.read(tree_oid)?;
1570    if obj.kind != ObjectKind::Tree {
1571        return Err(Error::ObjectNotFound(format!(
1572            "object {tree_oid} is not a tree"
1573        )));
1574    }
1575    let entries = parse_tree(&obj.data)?;
1576    for TreeEntry { mode, name, oid } in entries {
1577        if mode == MODE_TREE {
1578            let mut path = rel_base.to_vec();
1579            if !path.is_empty() {
1580                path.push(b'/');
1581            }
1582            path.extend_from_slice(&name);
1583            if !prefix_under_or_equal(prefix, &path) {
1584                continue;
1585            }
1586            read_tree_into_overlay(repo, &oid, prefix, &path, use_replace_path, out)?;
1587            continue;
1588        }
1589        if mode == MODE_GITLINK {
1590            continue;
1591        }
1592        let mut path = rel_base.to_vec();
1593        if !path.is_empty() {
1594            path.push(b'/');
1595        }
1596        path.extend_from_slice(&name);
1597        if !prefix_under_or_equal(prefix, &path) {
1598            continue;
1599        }
1600        let entry = synthetic_stage1_index_entry(mode, &path, oid);
1601        if use_replace_path {
1602            if let Some(pos) = out.iter().position(|e| e.path == path && e.stage() == 1) {
1603                out[pos] = entry;
1604            } else {
1605                out.push(entry);
1606            }
1607        } else {
1608            out.push(entry);
1609        }
1610    }
1611    Ok(())
1612}
1613
1614fn prefix_under_or_equal(prefix: &[u8], path: &[u8]) -> bool {
1615    if prefix.is_empty() {
1616        return true;
1617    }
1618    if path == prefix {
1619        return true;
1620    }
1621    path.len() > prefix.len() && path.starts_with(prefix) && path[prefix.len()] == b'/'
1622}
1623
1624fn synthetic_stage1_index_entry(mode: u32, path: &[u8], oid: ObjectId) -> IndexEntry {
1625    let path_len = path.len().min(0xFFF) as u16;
1626    let flags = (1u16 << 12) | path_len;
1627    IndexEntry {
1628        ctime_sec: 0,
1629        ctime_nsec: 0,
1630        mtime_sec: 0,
1631        mtime_nsec: 0,
1632        dev: 0,
1633        ino: 0,
1634        mode,
1635        uid: 0,
1636        gid: 0,
1637        size: 0,
1638        oid,
1639        flags,
1640        flags_extended: None,
1641        path: path.to_vec(),
1642        base_index_pos: 0,
1643    }
1644}
1645
1646fn config_truthy(raw: Option<&str>) -> bool {
1647    let Some(val) = raw else {
1648        return false;
1649    };
1650    let lowered = val.trim().to_lowercase();
1651    matches!(lowered.as_str(), "true" | "yes" | "1" | "on")
1652}
1653
1654/// Whether to write 20 zero bytes instead of the SHA-1 of the index body.
1655///
1656/// Mirrors Git `prepare_repo_settings`: `feature.manyFiles` enables skip-hash unless
1657/// `index.skipHash` / `index.skiphash` is explicitly false; otherwise honor true `index.skipHash`.
1658pub(crate) fn index_skip_hash_for_write(config: Option<&ConfigSet>) -> bool {
1659    let Some(config) = config else {
1660        return false;
1661    };
1662    let many_files = config
1663        .get_bool("feature.manyFiles")
1664        .and_then(|r| r.ok())
1665        .unwrap_or(false);
1666    if many_files {
1667        if let Some(Ok(false)) = config.get_bool("index.skipHash") {
1668            return false;
1669        }
1670        if let Some(Ok(false)) = config.get_bool("index.skiphash") {
1671            return false;
1672        }
1673        return true;
1674    }
1675    for key in ["index.skipHash", "index.skiphash"] {
1676        if let Some(Ok(true)) = config.get_bool(key) {
1677            return true;
1678        }
1679    }
1680    false
1681}
1682
1683fn trim_trailing_slash_bytes(path: &[u8]) -> &[u8] {
1684    path.strip_suffix(b"/").unwrap_or(path)
1685}
1686
1687fn path_under_prefix(path: &[u8], prefix: &[u8]) -> bool {
1688    if path == prefix {
1689        return true;
1690    }
1691    if prefix.is_empty() {
1692        return true;
1693    }
1694    path.len() > prefix.len() && path.starts_with(prefix) && path[prefix.len()] == b'/'
1695}
1696
1697fn directory_in_cone(dir_path: &str, patterns: &[String], cone_mode: bool) -> bool {
1698    // Match Git `path_in_cone_mode_sparse_checkout`: a *directory* is in the cone when its
1699    // contents are recursively included. `path_matches_sparse_patterns` distinguishes a
1700    // directory from a file by a trailing slash (expanded-cone matching uses dtype the way
1701    // Git does), so pass the directory with a trailing slash. Collapse prefixes arrive
1702    // without one (e.g. `before`, `folder1`), so append it to avoid a top-level directory
1703    // being mistaken for an always-in-cone top-level file. The root (empty) directory is
1704    // always in the cone (`/*` + `!/*/` excludes every top-level directory, so e.g. `deep`
1705    // collapses to a single placeholder instead of leaving `deep/deeper1/` etc.).
1706    let dir = dir_path.trim_end_matches('/');
1707    if dir.is_empty() {
1708        return true;
1709    }
1710    let with_slash = format!("{dir}/");
1711    crate::sparse_checkout::path_matches_sparse_patterns(&with_slash, patterns, cone_mode)
1712}
1713
1714fn collect_directory_prefixes(path: &[u8], out: &mut BTreeSet<Vec<u8>>) {
1715    for (i, &b) in path.iter().enumerate() {
1716        if b == b'/' {
1717            out.insert(path[..i].to_vec());
1718        }
1719    }
1720}
1721
1722fn tree_oid_for_prefix(odb: &Odb, root_tree: &ObjectId, prefix: &[u8]) -> Result<Option<ObjectId>> {
1723    if prefix.is_empty() {
1724        return Ok(Some(*root_tree));
1725    }
1726    let pref_str = String::from_utf8_lossy(prefix);
1727    let components: Vec<&str> = pref_str.split('/').filter(|c| !c.is_empty()).collect();
1728    let mut current = *root_tree;
1729    for comp in components {
1730        let obj = odb.read(&current)?;
1731        if obj.kind != ObjectKind::Tree {
1732            return Ok(None);
1733        }
1734        let entries = parse_tree(&obj.data)?;
1735        let mut next = None;
1736        for e in entries {
1737            if e.name == comp.as_bytes() {
1738                if e.mode == MODE_TREE {
1739                    next = Some(e.oid);
1740                }
1741                break;
1742            }
1743        }
1744        current = match next {
1745            Some(o) => o,
1746            None => return Ok(None),
1747        };
1748    }
1749    Ok(Some(current))
1750}
1751
1752/// Build the list of blob index entries under `prefix` that match `HEAD` at `tree_oid`,
1753/// treating existing sparse-directory placeholders in `entries` as opaque subtrees (like Git).
1754fn collect_sparse_aware_expected_blobs(
1755    odb: &Odb,
1756    tree_oid: &ObjectId,
1757    prefix: &[u8],
1758    patterns: &[String],
1759    cone_mode: bool,
1760    entries: &[IndexEntry],
1761) -> Result<Vec<IndexEntry>> {
1762    let mut out = Vec::new();
1763    walk_sparse_aware(
1764        odb, tree_oid, prefix, patterns, cone_mode, entries, &mut out,
1765    )?;
1766    Ok(out)
1767}
1768
1769fn walk_sparse_aware(
1770    odb: &Odb,
1771    tree_oid: &ObjectId,
1772    prefix: &[u8],
1773    patterns: &[String],
1774    cone_mode: bool,
1775    entries: &[IndexEntry],
1776    out: &mut Vec<IndexEntry>,
1777) -> Result<()> {
1778    let obj = odb.read(tree_oid)?;
1779    if obj.kind != ObjectKind::Tree {
1780        return Err(Error::IndexError(format!("expected tree at {}", tree_oid)));
1781    }
1782    let tree_entries = parse_tree(&obj.data)?;
1783    for te in tree_entries {
1784        let path = if prefix.is_empty() {
1785            te.name.clone()
1786        } else {
1787            let mut p = prefix.to_vec();
1788            p.push(b'/');
1789            p.extend_from_slice(&te.name);
1790            p
1791        };
1792        if te.mode == MODE_TREE {
1793            let path_slash = {
1794                let mut p = path.clone();
1795                p.push(b'/');
1796                p
1797            };
1798            if entries.iter().any(|e| {
1799                e.stage() == 0
1800                    && e.is_sparse_directory_placeholder()
1801                    && e.path == path_slash
1802                    && e.oid == te.oid
1803            }) {
1804                continue;
1805            }
1806            walk_sparse_aware(odb, &te.oid, &path, patterns, cone_mode, entries, out)?;
1807        } else {
1808            let path_len = path.len().min(0xFFF) as u16;
1809            let path_str = String::from_utf8_lossy(&path);
1810            if crate::sparse_checkout::path_matches_sparse_patterns(&path_str, patterns, cone_mode)
1811            {
1812                continue;
1813            }
1814            let mut e = IndexEntry {
1815                ctime_sec: 0,
1816                ctime_nsec: 0,
1817                mtime_sec: 0,
1818                mtime_nsec: 0,
1819                dev: 0,
1820                ino: 0,
1821                mode: te.mode,
1822                uid: 0,
1823                gid: 0,
1824                size: 0,
1825                oid: te.oid,
1826                flags: path_len,
1827                flags_extended: Some(0),
1828                path,
1829                base_index_pos: 0,
1830            };
1831            e.set_skip_worktree(true);
1832            out.push(e);
1833        }
1834    }
1835    Ok(())
1836}
1837
1838fn flatten_tree_blobs(odb: &Odb, tree_oid: &ObjectId, prefix: &[u8]) -> Result<Vec<IndexEntry>> {
1839    let obj = odb.read(tree_oid)?;
1840    if obj.kind != ObjectKind::Tree {
1841        return Err(Error::IndexError(format!("expected tree at {}", tree_oid)));
1842    }
1843    let entries = parse_tree(&obj.data)?;
1844    let mut out = Vec::new();
1845    for te in entries {
1846        let path = if prefix.is_empty() {
1847            te.name.clone()
1848        } else {
1849            let mut p = prefix.to_vec();
1850            p.push(b'/');
1851            p.extend_from_slice(&te.name);
1852            p
1853        };
1854        if te.mode == MODE_TREE {
1855            let sub = flatten_tree_blobs(odb, &te.oid, &path)?;
1856            out.extend(sub);
1857        } else {
1858            let path_len = path.len().min(0xFFF) as u16;
1859            let mut e = IndexEntry {
1860                ctime_sec: 0,
1861                ctime_nsec: 0,
1862                mtime_sec: 0,
1863                mtime_nsec: 0,
1864                dev: 0,
1865                ino: 0,
1866                mode: te.mode,
1867                uid: 0,
1868                gid: 0,
1869                size: 0,
1870                oid: te.oid,
1871                flags: path_len,
1872                flags_extended: Some(0),
1873                path,
1874                base_index_pos: 0,
1875            };
1876            e.set_skip_worktree(true);
1877            out.push(e);
1878        }
1879    }
1880    Ok(out)
1881}
1882
1883fn lockfile_pid_enabled(index_path: &Path) -> bool {
1884    let git_dir = match index_path.parent() {
1885        Some(dir) => dir,
1886        None => return false,
1887    };
1888
1889    ConfigSet::load(Some(git_dir), true)
1890        .ok()
1891        .and_then(|cfg| cfg.get_bool("core.lockfilepid"))
1892        .and_then(|res| res.ok())
1893        .unwrap_or(false)
1894}
1895
1896fn pid_path_for_lock(lock_path: &Path) -> std::path::PathBuf {
1897    let file_name = lock_path
1898        .file_name()
1899        .map(|s| s.to_string_lossy().to_string())
1900        .unwrap_or_else(|| "index.lock".to_owned());
1901    let pid_name = if let Some(base) = file_name.strip_suffix(".lock") {
1902        format!("{base}~pid.lock")
1903    } else {
1904        format!("{file_name}~pid.lock")
1905    };
1906    lock_path.with_file_name(pid_name)
1907}
1908
1909fn write_lock_pid_file(pid_path: &Path) -> io::Result<()> {
1910    use std::io::Write as _;
1911    let mut file = fs::OpenOptions::new()
1912        .write(true)
1913        .create(true)
1914        .truncate(true)
1915        .open(pid_path)?;
1916    writeln!(file, "pid {}", std::process::id())?;
1917    Ok(())
1918}
1919
1920/// Detail lines Git prints when the index lock file already exists (used by stash and similar).
1921pub fn format_index_lock_blocked_detail(index_path: &Path) -> String {
1922    let lock_path = index_path.with_extension("lock");
1923    let pid_path = pid_path_for_lock(&lock_path);
1924    let err = io::Error::new(io::ErrorKind::AlreadyExists, "File exists");
1925    build_lock_exists_message(&lock_path, &pid_path, &err)
1926}
1927
1928fn build_lock_exists_message(lock_path: &Path, pid_path: &Path, err: &io::Error) -> String {
1929    let mut msg = format!("Unable to create '{}': {}.\n\n", lock_path.display(), err);
1930
1931    if let Some(pid) = read_lock_pid(pid_path) {
1932        if is_process_running(pid) {
1933            msg.push_str(&format!(
1934                "Lock is held by process {pid}; if no git process is running, the lock file may be stale (PIDs can be reused)"
1935            ));
1936        } else {
1937            msg.push_str(&format!(
1938                "Lock was held by process {pid}, which is no longer running; the lock file appears to be stale"
1939            ));
1940        }
1941    } else {
1942        msg.push_str(
1943            "Another git process seems to be running in this repository, or the lock file may be stale",
1944        );
1945    }
1946
1947    msg
1948}
1949
1950fn read_lock_pid(pid_path: &Path) -> Option<u64> {
1951    let raw = fs::read_to_string(pid_path).ok()?;
1952    let trimmed = raw.trim();
1953    if let Some(v) = trimmed.strip_prefix("pid ") {
1954        return v.trim().parse::<u64>().ok();
1955    }
1956    trimmed.parse::<u64>().ok()
1957}
1958
1959fn is_process_running(pid: u64) -> bool {
1960    #[cfg(target_os = "linux")]
1961    {
1962        let proc_path = std::path::PathBuf::from(format!("/proc/{pid}"));
1963        proc_path.exists()
1964    }
1965
1966    #[cfg(not(target_os = "linux"))]
1967    {
1968        let status = std::process::Command::new("kill")
1969            .arg("-0")
1970            .arg(pid.to_string())
1971            .status();
1972        status.map(|s| s.success()).unwrap_or(false)
1973    }
1974}
1975
1976/// Parse a single index entry from `data`, returning `(entry, bytes_consumed)`.
1977fn parse_entry(data: &[u8], version: u32, prev_path: &[u8]) -> Result<(IndexEntry, usize)> {
1978    if data.len() < 62 {
1979        return Err(Error::IndexError("entry too short".to_owned()));
1980    }
1981
1982    let mut pos = 0;
1983
1984    macro_rules! read_u32 {
1985        () => {{
1986            let v = u32::from_be_bytes(
1987                data[pos..pos + 4]
1988                    .try_into()
1989                    .map_err(|_| Error::IndexError("truncated u32".to_owned()))?,
1990            );
1991            pos += 4;
1992            v
1993        }};
1994    }
1995
1996    let ctime_sec = read_u32!();
1997    let ctime_nsec = read_u32!();
1998    let mtime_sec = read_u32!();
1999    let mtime_nsec = read_u32!();
2000    let dev = read_u32!();
2001    let ino = read_u32!();
2002    let mode = read_u32!();
2003    let uid = read_u32!();
2004    let gid = read_u32!();
2005    let size = read_u32!();
2006
2007    let oid = ObjectId::from_bytes(&data[pos..pos + 20])?;
2008    pos += 20;
2009
2010    let flags = u16::from_be_bytes(
2011        data[pos..pos + 2]
2012            .try_into()
2013            .map_err(|_| Error::IndexError("truncated flags".to_owned()))?,
2014    );
2015    pos += 2;
2016
2017    let flags_extended = if version >= 3 && flags & 0x4000 != 0 {
2018        let fe = u16::from_be_bytes(
2019            data[pos..pos + 2]
2020                .try_into()
2021                .map_err(|_| Error::IndexError("truncated extended flags".to_owned()))?,
2022        );
2023        pos += 2;
2024        Some(fe)
2025    } else {
2026        None
2027    };
2028
2029    let path;
2030    if version == 4 {
2031        // V4: prefix-compressed path
2032        let (strip_len, varint_bytes) = read_varint(&data[pos..]);
2033        pos += varint_bytes;
2034        let nul = data[pos..]
2035            .iter()
2036            .position(|&b| b == 0)
2037            .ok_or_else(|| Error::IndexError("v4 entry path missing NUL".to_owned()))?;
2038        let suffix = &data[pos..pos + nul];
2039        pos += nul + 1;
2040        let keep = prev_path.len().saturating_sub(strip_len);
2041        let mut full_path = prev_path[..keep].to_vec();
2042        full_path.extend_from_slice(suffix);
2043        path = full_path;
2044    } else {
2045        // V2/V3: NUL-terminated full path + padding
2046        let nul = data[pos..]
2047            .iter()
2048            .position(|&b| b == 0)
2049            .ok_or_else(|| Error::IndexError("entry path missing NUL terminator".to_owned()))?;
2050        path = data[pos..pos + nul].to_vec();
2051        pos += nul + 1;
2052        let entry_start = 0usize;
2053        let entry_len = pos - entry_start;
2054        let padded = (entry_len + 7) & !7;
2055        let padding = padded.saturating_sub(entry_len);
2056        pos += padding;
2057    }
2058
2059    Ok((
2060        IndexEntry {
2061            ctime_sec,
2062            ctime_nsec,
2063            mtime_sec,
2064            mtime_nsec,
2065            dev,
2066            ino,
2067            mode,
2068            uid,
2069            gid,
2070            size,
2071            oid,
2072            flags,
2073            flags_extended,
2074            path,
2075            base_index_pos: 0,
2076        },
2077        pos,
2078    ))
2079}
2080
2081/// Serialise a single index entry into `out`.
2082/// Read a variable-length integer (git's index v4 varint encoding).
2083/// Returns (value, bytes_consumed).
2084fn write_varint(out: &mut Vec<u8>, mut value: usize) {
2085    loop {
2086        let mut b = (value & 0x7F) as u8;
2087        value >>= 7;
2088        if value != 0 {
2089            b |= 0x80;
2090        }
2091        out.push(b);
2092        if value == 0 {
2093            break;
2094        }
2095    }
2096}
2097
2098fn read_varint(data: &[u8]) -> (usize, usize) {
2099    let mut value: usize = 0;
2100    let mut shift = 0usize;
2101    let mut pos = 0;
2102    loop {
2103        if pos >= data.len() {
2104            break;
2105        }
2106        let byte = data[pos] as usize;
2107        pos += 1;
2108        value |= (byte & 0x7F) << shift;
2109        if byte & 0x80 == 0 {
2110            break;
2111        }
2112        shift += 7;
2113        // Prevent infinite loops on malformed data
2114        if shift > 28 {
2115            break;
2116        }
2117    }
2118    (value, pos)
2119}
2120
2121fn serialize_entry_v4(entry: &IndexEntry, previous_path: &mut Vec<u8>, out: &mut Vec<u8>) {
2122    let write_u32 = |out: &mut Vec<u8>, v: u32| out.extend_from_slice(&v.to_be_bytes());
2123
2124    write_u32(out, entry.ctime_sec);
2125    write_u32(out, entry.ctime_nsec);
2126    write_u32(out, entry.mtime_sec);
2127    write_u32(out, entry.mtime_nsec);
2128    write_u32(out, entry.dev);
2129    write_u32(out, entry.ino);
2130    write_u32(out, entry.mode);
2131    write_u32(out, entry.uid);
2132    write_u32(out, entry.gid);
2133    write_u32(out, entry.size);
2134    out.extend_from_slice(entry.oid.as_bytes());
2135
2136    let mut flags = entry.flags;
2137    let disk_ext = entry
2138        .flags_extended
2139        .map(IndexEntry::disk_flags_extended)
2140        .filter(|fe| *fe != 0);
2141    if disk_ext.is_some() {
2142        flags |= 0x4000;
2143    } else {
2144        flags &= !0x4000;
2145    }
2146    let path_len = entry.path.len().min(0xFFF) as u16;
2147    flags = (flags & 0xF000) | path_len;
2148    out.extend_from_slice(&flags.to_be_bytes());
2149
2150    if let Some(fe) = disk_ext {
2151        out.extend_from_slice(&fe.to_be_bytes());
2152    }
2153
2154    let common = previous_path
2155        .iter()
2156        .zip(entry.path.iter())
2157        .take_while(|(a, b)| a == b)
2158        .count();
2159    let to_remove = previous_path.len().saturating_sub(common);
2160    write_varint(out, to_remove);
2161    out.extend_from_slice(&entry.path[common..]);
2162    out.push(0);
2163
2164    previous_path.clear();
2165    previous_path.extend_from_slice(&entry.path);
2166}
2167
2168fn serialize_entry(entry: &IndexEntry, version: u32, out: &mut Vec<u8>) {
2169    let start = out.len();
2170
2171    let write_u32 = |out: &mut Vec<u8>, v: u32| out.extend_from_slice(&v.to_be_bytes());
2172
2173    write_u32(out, entry.ctime_sec);
2174    write_u32(out, entry.ctime_nsec);
2175    write_u32(out, entry.mtime_sec);
2176    write_u32(out, entry.mtime_nsec);
2177    write_u32(out, entry.dev);
2178    write_u32(out, entry.ino);
2179    write_u32(out, entry.mode);
2180    write_u32(out, entry.uid);
2181    write_u32(out, entry.gid);
2182    write_u32(out, entry.size);
2183    out.extend_from_slice(entry.oid.as_bytes());
2184
2185    // Set or clear the extended-flags bit in flags
2186    let mut flags = entry.flags;
2187    let disk_ext = entry
2188        .flags_extended
2189        .map(IndexEntry::disk_flags_extended)
2190        .filter(|fe| *fe != 0);
2191    if version >= 3 && disk_ext.is_some() {
2192        flags |= 0x4000;
2193    } else {
2194        flags &= !0x4000;
2195    }
2196    // Overwrite path length bits (bottom 12)
2197    let path_len = entry.path.len().min(0xFFF) as u16;
2198    flags = (flags & 0xF000) | path_len;
2199    out.extend_from_slice(&flags.to_be_bytes());
2200
2201    if version >= 3 {
2202        if let Some(fe) = disk_ext {
2203            out.extend_from_slice(&fe.to_be_bytes());
2204        }
2205    }
2206
2207    out.extend_from_slice(&entry.path);
2208    out.push(0);
2209
2210    // Pad to 8-byte boundary
2211    let entry_len = out.len() - start;
2212    let padded = (entry_len + 7) & !7;
2213    let padding = padded - entry_len;
2214    for _ in 0..padding {
2215        out.push(0);
2216    }
2217}
2218
2219/// Build an [`IndexEntry`] by stat-ing a file on disk.
2220///
2221/// # Parameters
2222///
2223/// - `path` — absolute path to the file.
2224/// - `rel_path` — path relative to the repo root (stored in the index).
2225/// - `oid` — the object ID of the file's blob.
2226/// - `mode` — file mode (use [`MODE_REGULAR`], [`MODE_EXECUTABLE`], etc.).
2227///
2228/// # Errors
2229///
2230/// Returns [`Error::Io`] if `stat` fails.
2231pub fn entry_from_stat(
2232    path: &Path,
2233    rel_path: &[u8],
2234    oid: ObjectId,
2235    mode: u32,
2236) -> Result<IndexEntry> {
2237    let meta = fs::symlink_metadata(path)?;
2238    Ok(entry_from_metadata(&meta, rel_path, oid, mode))
2239}
2240
2241/// Build an [`IndexEntry`] from already-obtained metadata.
2242///
2243/// This avoids a redundant `stat()` call when the caller already has
2244/// filesystem metadata (e.g. from `symlink_metadata`).
2245#[must_use]
2246pub fn entry_from_metadata(
2247    meta: &fs::Metadata,
2248    rel_path: &[u8],
2249    oid: ObjectId,
2250    mode: u32,
2251) -> IndexEntry {
2252    #[cfg(unix)]
2253    {
2254        use std::os::unix::fs::MetadataExt;
2255        IndexEntry {
2256            ctime_sec: meta.ctime() as u32,
2257            ctime_nsec: meta.ctime_nsec() as u32,
2258            mtime_sec: meta.mtime() as u32,
2259            mtime_nsec: meta.mtime_nsec() as u32,
2260            dev: meta.dev() as u32,
2261            ino: meta.ino() as u32,
2262            mode,
2263            uid: meta.uid(),
2264            gid: meta.gid(),
2265            size: meta.size() as u32,
2266            oid,
2267            flags: rel_path.len().min(0xFFF) as u16,
2268            flags_extended: None,
2269            path: rel_path.to_vec(),
2270            base_index_pos: 0,
2271        }
2272    }
2273    #[cfg(not(unix))]
2274    {
2275        use std::time::UNIX_EPOCH;
2276        let mtime = meta
2277            .modified()
2278            .ok()
2279            .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
2280            .unwrap_or_default();
2281        IndexEntry {
2282            ctime_sec: mtime.as_secs() as u32,
2283            ctime_nsec: mtime.subsec_nanos(),
2284            mtime_sec: mtime.as_secs() as u32,
2285            mtime_nsec: mtime.subsec_nanos(),
2286            dev: 0,
2287            ino: 0,
2288            mode,
2289            uid: 0,
2290            gid: 0,
2291            size: meta.len() as u32,
2292            oid,
2293            flags: rel_path.len().min(0xFFF) as u16,
2294            flags_extended: None,
2295            path: rel_path.to_vec(),
2296            base_index_pos: 0,
2297        }
2298    }
2299}
2300
2301/// Convert a `stat` mode to the Git index mode, normalised to one of the
2302/// known constants ([`MODE_REGULAR`], [`MODE_EXECUTABLE`], [`MODE_SYMLINK`]).
2303///
2304/// Only the `S_IFMT` and execute bits are inspected; all other permission bits
2305/// are discarded (Git stores only 644 or 755 for regular files).
2306///
2307/// # Parameters
2308///
2309/// - `raw_mode` — the raw `st_mode` value from `stat(2)`.
2310#[must_use]
2311pub fn normalize_mode(raw_mode: u32) -> u32 {
2312    const S_IFMT: u32 = 0o170000;
2313    const S_IFLNK: u32 = 0o120000;
2314    const S_IFREG: u32 = 0o100000;
2315
2316    let fmt = raw_mode & S_IFMT;
2317    if fmt == S_IFLNK {
2318        return MODE_SYMLINK;
2319    }
2320    if fmt == S_IFREG {
2321        // Executable if any execute bit is set
2322        if raw_mode & 0o111 != 0 {
2323            return MODE_EXECUTABLE;
2324        }
2325        return MODE_REGULAR;
2326    }
2327    // Fallback for everything else (devices, etc.) — treat as regular
2328    MODE_REGULAR
2329}
2330
2331#[cfg(test)]
2332mod tests {
2333    #![allow(clippy::expect_used, clippy::unwrap_used)]
2334
2335    use super::*;
2336    use tempfile::TempDir;
2337
2338    fn dummy_oid() -> ObjectId {
2339        ObjectId::from_bytes(&[0u8; 20]).unwrap()
2340    }
2341
2342    fn make_entry(path: &str) -> IndexEntry {
2343        IndexEntry {
2344            ctime_sec: 0,
2345            ctime_nsec: 0,
2346            mtime_sec: 0,
2347            mtime_nsec: 0,
2348            dev: 0,
2349            ino: 0,
2350            mode: MODE_REGULAR,
2351            uid: 0,
2352            gid: 0,
2353            size: 0,
2354            oid: dummy_oid(),
2355            flags: path.len().min(0xFFF) as u16,
2356            flags_extended: None,
2357            path: path.as_bytes().to_vec(),
2358            base_index_pos: 0,
2359        }
2360    }
2361
2362    #[test]
2363    fn round_trip_empty_index() {
2364        let dir = TempDir::new().unwrap();
2365        let path = dir.path().join("index");
2366
2367        let idx = Index::new();
2368        idx.write(&path).unwrap();
2369
2370        let loaded = Index::load(&path).unwrap();
2371        assert_eq!(loaded.entries.len(), 0);
2372    }
2373
2374    #[test]
2375    fn round_trip_with_entries() {
2376        let dir = TempDir::new().unwrap();
2377        let path = dir.path().join("index");
2378
2379        let mut idx = Index::new();
2380        idx.add_or_replace(make_entry("foo.txt"));
2381        idx.add_or_replace(make_entry("bar/baz.txt"));
2382        idx.write(&path).unwrap();
2383
2384        let loaded = Index::load(&path).unwrap();
2385        assert_eq!(loaded.entries.len(), 2);
2386        assert_eq!(loaded.entries[0].path, b"bar/baz.txt");
2387        assert_eq!(loaded.entries[1].path, b"foo.txt");
2388    }
2389
2390    #[test]
2391    fn remove_descendants_under_path_drops_nested_only() {
2392        let mut idx = Index::new();
2393        idx.add_or_replace(make_entry("d/e"));
2394        idx.add_or_replace(make_entry("d-other"));
2395        idx.add_or_replace(make_entry("prefix/d"));
2396        idx.remove_descendants_under_path("d");
2397        let paths: Vec<_> = idx.entries.iter().map(|e| e.path.as_slice()).collect();
2398        assert_eq!(paths, vec![b"d-other".as_slice(), b"prefix/d".as_slice()]);
2399    }
2400
2401    #[test]
2402    fn requested_v4_writes_v4_on_disk() {
2403        let dir = TempDir::new().unwrap();
2404        let path = dir.path().join("index");
2405
2406        let mut idx = Index {
2407            version: 4,
2408            ..Index::default()
2409        };
2410        idx.add_or_replace(make_entry("one"));
2411        idx.add_or_replace(make_entry("two/one"));
2412        idx.write(&path).unwrap();
2413
2414        let data = fs::read(&path).unwrap();
2415        assert_eq!(&data[4..8], &4u32.to_be_bytes());
2416
2417        let loaded = Index::load(&path).unwrap();
2418        assert_eq!(loaded.version, 4);
2419        assert_eq!(loaded.entries[0].path, b"one");
2420        assert_eq!(loaded.entries[1].path, b"two/one");
2421    }
2422}