Skip to main content

grit_lib/
index.rs

1//! Git index (staging area) reading and writing.
2//!
3//! The index file (`.git/index`) stores the current state of the staging area.
4//! It uses a binary format with a 12-byte header, fixed-size index entries,
5//! and optional extensions, followed by a trailing SHA-1 over the whole file.
6//!
7//! # Format version
8//!
9//! This implementation supports index versions 2 and 3. Requests for version 4
10//! currently fall back to a non-compressed index on write because path
11//! compression is not yet implemented.
12//!
13//! # References
14//!
15//! See `Documentation/technical/index-format.txt` in the Git source tree for
16//! the authoritative format specification.
17
18use std::collections::{BTreeMap, BTreeSet};
19use std::fs;
20use std::io::{self, Write};
21use std::path::Path;
22
23use sha1::{Digest, Sha1};
24use sha2::Sha256;
25
26use crate::config::ConfigSet;
27use crate::error::{Error, Result};
28use crate::objects::{parse_tree, HashAlgo, ObjectId, ObjectKind, TreeEntry};
29use crate::odb::Odb;
30use crate::repo::Repository;
31use crate::resolve_undo::{self, write_resolve_undo_payload, ResolveUndoRecord};
32use crate::rev_parse;
33use crate::untracked_cache;
34
35/// File mode for a regular (non-executable) file.
36pub const MODE_REGULAR: u32 = 0o100644;
37/// File mode for an executable file.
38pub const MODE_EXECUTABLE: u32 = 0o100755;
39/// File mode for a symbolic link.
40pub const MODE_SYMLINK: u32 = 0o120000;
41/// File mode for a gitlink (submodule).
42pub const MODE_GITLINK: u32 = 0o160000;
43/// File mode for a directory (tree) entry — only used in tree objects, not index.
44pub const MODE_TREE: u32 = 0o040000;
45
46/// Git index extension signature `sdir` (sparse directory entries present).
47const INDEX_EXT_SPARSE_DIRECTORIES: u32 = u32::from_be_bytes(*b"sdir");
48/// Git index extension signature `UNTR` (untracked cache).
49const INDEX_EXT_UNTRACKED: u32 = u32::from_be_bytes(*b"UNTR");
50/// Git index extension signature `FSMN` (fsmonitor).
51const INDEX_EXT_FSMONITOR: u32 = u32::from_be_bytes(*b"FSMN");
52/// Git index extension signature `REUC` (resolve undo).
53const INDEX_EXT_RESOLVE_UNDO: u32 = u32::from_be_bytes(*b"REUC");
54/// Git index extension signature `link` (split index).
55const INDEX_EXT_LINK: u32 = u32::from_be_bytes(*b"link");
56
57/// A single entry in the Git index.
58#[derive(Debug, Clone, PartialEq, Eq)]
59pub struct IndexEntry {
60    /// Time the file metadata last changed (seconds since epoch).
61    pub ctime_sec: u32,
62    /// Nanosecond fraction of `ctime_sec`.
63    pub ctime_nsec: u32,
64    /// Time the file data last changed (seconds since epoch).
65    pub mtime_sec: u32,
66    /// Nanosecond fraction of `mtime_sec`.
67    pub mtime_nsec: u32,
68    /// Device number.
69    pub dev: u32,
70    /// Inode number.
71    pub ino: u32,
72    /// Unix file mode (`MODE_REGULAR`, `MODE_EXECUTABLE`, `MODE_SYMLINK`, …).
73    pub mode: u32,
74    /// Owner UID.
75    pub uid: u32,
76    /// Owner GID.
77    pub gid: u32,
78    /// File size in bytes (truncated to 32 bits).
79    pub size: u32,
80    /// SHA-1 of the blob object.
81    pub oid: ObjectId,
82    /// Entry flags (stage, assume-valid, extended, …).
83    pub flags: u16,
84    /// Extended flags (v3+ only).
85    pub flags_extended: Option<u16>,
86    /// Path relative to the repository root.  May contain `/` separators.
87    pub path: Vec<u8>,
88    /// Split index: position in shared base (1-based), or 0 if not from shared index.
89    pub base_index_pos: u32,
90}
91
92impl IndexEntry {
93    /// Merge stage (0 = normal, 1–3 = conflict stages).
94    #[must_use]
95    pub fn stage(&self) -> u8 {
96        ((self.flags >> 12) & 0x3) as u8
97    }
98
99    pub(crate) fn set_stage(&mut self, stage: u8) {
100        self.flags = (self.flags & 0x0FFF) | ((stage as u16 & 0x3) << 12);
101    }
102
103    /// Whether the assume-unchanged bit is set.
104    #[must_use]
105    pub fn assume_unchanged(&self) -> bool {
106        self.flags & 0x8000 != 0
107    }
108
109    /// Whether the skip-worktree bit is set (extended flags, v3+).
110    #[must_use]
111    pub fn skip_worktree(&self) -> bool {
112        self.flags_extended
113            .map(|f| f & 0x4000 != 0)
114            .unwrap_or(false)
115    }
116
117    /// Set the assume-unchanged bit.
118    pub fn set_assume_unchanged(&mut self, value: bool) {
119        if value {
120            self.flags |= 0x8000;
121        } else {
122            self.flags &= !0x8000;
123        }
124    }
125
126    /// Set the skip-worktree bit (promotes entry to v3).
127    pub fn set_skip_worktree(&mut self, value: bool) {
128        let fe = self.flags_extended.get_or_insert(0);
129        if value {
130            *fe |= 0x4000;
131        } else {
132            *fe &= !0x4000;
133            if *fe == 0 {
134                self.flags_extended = None;
135            }
136        }
137    }
138
139    /// Whether the intent-to-add bit is set (extended flags, v3+).
140    #[must_use]
141    pub fn intent_to_add(&self) -> bool {
142        self.flags_extended
143            .map(|f| f & 0x2000 != 0)
144            .unwrap_or(false)
145    }
146
147    /// Set the intent-to-add bit (promotes entry to v3).
148    pub fn set_intent_to_add(&mut self, value: bool) {
149        let fe = self.flags_extended.get_or_insert(0);
150        if value {
151            *fe |= 0x2000;
152        } else {
153            *fe &= !0x2000;
154            if *fe == 0 {
155                self.flags_extended = None;
156            }
157        }
158    }
159
160    /// Sparse-index placeholder: tree mode, stage 0, and `SKIP_WORKTREE` set.
161    #[must_use]
162    pub fn is_sparse_directory_placeholder(&self) -> bool {
163        self.mode == MODE_TREE && self.stage() == 0 && self.skip_worktree()
164    }
165
166    /// In-memory only: `ls-files --with-tree` hides stage-1 overlay rows that duplicate stage 0.
167    const FLAG_EXT_OVERLAY_TREE_SKIP: u16 = 0x8000;
168    /// In-memory and on-disk compatibility bit for fsmonitor validity (`git ls-files -f`).
169    const FLAG_EXT_FSMONITOR_VALID: u16 = 0x1000;
170    /// Extended flags Git persists in index v3 (`CE_EXTENDED_FLAGS` in `read-cache-ll.h`).
171    const FLAG_EXT_ON_DISK: u16 = Self::FLAG_EXT_FSMONITOR_VALID | 0x2000 | 0x4000;
172
173    /// Extended flag bits safe to write to a Git-compatible on-disk index.
174    fn disk_flags_extended(fe: u16) -> u16 {
175        fe & Self::FLAG_EXT_ON_DISK
176    }
177
178    #[must_use]
179    pub fn overlay_tree_skip_output(&self) -> bool {
180        self.flags_extended
181            .is_some_and(|fe| fe & Self::FLAG_EXT_OVERLAY_TREE_SKIP != 0)
182    }
183
184    fn set_overlay_tree_skip_output(&mut self, value: bool) {
185        let fe = self.flags_extended.get_or_insert(0);
186        if value {
187            *fe |= Self::FLAG_EXT_OVERLAY_TREE_SKIP;
188        } else {
189            *fe &= !Self::FLAG_EXT_OVERLAY_TREE_SKIP;
190            if *fe == 0 {
191                self.flags_extended = None;
192            }
193        }
194    }
195
196    /// Whether the fsmonitor-valid bit is set.
197    #[must_use]
198    pub fn fsmonitor_valid(&self) -> bool {
199        self.flags_extended
200            .is_some_and(|fe| fe & Self::FLAG_EXT_FSMONITOR_VALID != 0)
201    }
202
203    /// Set or clear the fsmonitor-valid bit.
204    pub fn set_fsmonitor_valid(&mut self, value: bool) {
205        let fe = self.flags_extended.get_or_insert(0);
206        if value {
207            *fe |= Self::FLAG_EXT_FSMONITOR_VALID;
208        } else {
209            *fe &= !Self::FLAG_EXT_FSMONITOR_VALID;
210            if *fe == 0 {
211                self.flags_extended = None;
212            }
213        }
214    }
215}
216
217/// The in-memory representation of the Git index file.
218#[derive(Debug, Clone, Default)]
219pub struct Index {
220    /// Index format version (2 or 3).
221    pub version: u32,
222    /// Index entries, sorted by (path, stage).
223    pub entries: Vec<IndexEntry>,
224    /// When true, the on-disk index includes the `sdir` extension (sparse index).
225    pub sparse_directories: bool,
226    /// Optional untracked-cache extension (`UNTR`), matching Git's `istate->untracked`.
227    pub untracked_cache: Option<untracked_cache::UntrackedCache>,
228    /// Optional fsmonitor token extension (`FSMN`).
229    pub fsmonitor_last_update: Option<String>,
230    /// Optional `REUC` resolve-undo extension (paths that were unmerged before a resolution).
231    pub resolve_undo: Option<BTreeMap<Vec<u8>, ResolveUndoRecord>>,
232    /// Split index `link` extension (bitmaps cleared after load merge).
233    pub(crate) split_link: Option<crate::split_index::SplitIndexLink>,
234    /// Root tree OID from a valid `TREE` index extension (`cache_tree`), when present.
235    pub cache_tree_root: Option<ObjectId>,
236    /// Parsed `TREE` index extension (`cache-tree`) preserving invalid and subtree nodes.
237    pub cache_tree: Option<CacheTreeNode>,
238    /// The repository's object hash algorithm. Determines the width of entry
239    /// object IDs and the trailing checksum on disk. Set authoritatively from
240    /// the object database when loaded via the repository; defaults to SHA-1.
241    pub hash_algo: HashAlgo,
242}
243
244/// One node from Git's `TREE` index extension.
245#[derive(Debug, Clone, PartialEq, Eq)]
246pub struct CacheTreeNode {
247    /// Path component for this node. The root node stores an empty name.
248    pub name: Vec<u8>,
249    /// Number of index entries covered by this node, or `-1` when invalid.
250    pub entry_count: i32,
251    /// Tree object ID for valid nodes. Invalid nodes do not store an object ID.
252    pub oid: Option<ObjectId>,
253    /// Immediate child cache-tree nodes.
254    pub children: Vec<CacheTreeNode>,
255}
256
257impl CacheTreeNode {
258    /// Create a valid cache-tree node.
259    #[must_use]
260    pub fn valid(
261        name: Vec<u8>,
262        entry_count: i32,
263        oid: ObjectId,
264        children: Vec<CacheTreeNode>,
265    ) -> Self {
266        Self {
267            name,
268            entry_count,
269            oid: Some(oid),
270            children,
271        }
272    }
273
274    /// Mark this node as invalid while preserving its children.
275    pub fn invalidate(&mut self) {
276        self.entry_count = -1;
277        self.oid = None;
278    }
279
280    /// Returns whether this node has a valid cached tree object ID.
281    #[must_use]
282    pub fn is_valid(&self) -> bool {
283        self.entry_count >= 0 && self.oid.is_some()
284    }
285}
286
287/// Options for loading an index from disk.
288#[derive(Debug, Clone, Copy)]
289pub struct IndexLoadOptions {
290    /// If the index contains sparse directory placeholders, expand them to full file entries.
291    pub expand_sparse_directories: bool,
292}
293
294impl Default for IndexLoadOptions {
295    fn default() -> Self {
296        Self {
297            expand_sparse_directories: true,
298        }
299    }
300}
301
302/// Version used after an invalid `GIT_INDEX_VERSION` value (matches Git stderr: "Using version 3").
303const INDEX_ENV_INVALID_FALLBACK: u32 = 3;
304/// Version used after an invalid `index.version` config value (same message as env).
305const INDEX_CONFIG_INVALID_FALLBACK: u32 = 3;
306/// Minimum supported index version.
307const INDEX_FORMAT_LB: u32 = 2;
308/// Maximum supported index version (version 4 requests are accepted and
309/// downgraded on write).
310const INDEX_FORMAT_UB: u32 = 4;
311/// Index extension signature `TREE` (cache-tree).
312const INDEX_EXT_CACHE_TREE: u32 = 0x5452_4545;
313
314/// Best-effort read of Git's `TREE` index extension (`cache_tree_read`).
315fn parse_cache_tree(data: &[u8], hash_len: usize) -> Option<CacheTreeNode> {
316    let (node, pos) = parse_cache_tree_node(data, 0, hash_len)?;
317    if pos == data.len() {
318        Some(node)
319    } else {
320        None
321    }
322}
323
324fn parse_cache_tree_node(
325    data: &[u8],
326    mut pos: usize,
327    hash_len: usize,
328) -> Option<(CacheTreeNode, usize)> {
329    let name_end = data.get(pos..)?.iter().position(|&b| b == 0)? + pos;
330    let name = data[pos..name_end].to_vec();
331    pos = name_end + 1;
332
333    let (entry_count, consumed) = parse_signed_int_prefix(&data[pos..])?;
334    pos += consumed;
335    if data.get(pos) != Some(&b' ') {
336        return None;
337    }
338    pos += 1;
339    let (subtree_count, consumed) = parse_signed_int_prefix(&data[pos..])?;
340    if subtree_count < 0 {
341        return None;
342    }
343    pos += consumed;
344    if data.get(pos) != Some(&b'\n') {
345        return None;
346    }
347    pos += 1;
348
349    let oid = if entry_count >= 0 {
350        if data.len().saturating_sub(pos) < hash_len {
351            return None;
352        }
353        let oid = ObjectId::from_bytes(&data[pos..pos + hash_len]).ok()?;
354        pos += hash_len;
355        Some(oid)
356    } else {
357        None
358    };
359
360    let mut children = Vec::with_capacity(subtree_count as usize);
361    for _ in 0..subtree_count {
362        let (child, next) = parse_cache_tree_node(data, pos, hash_len)?;
363        children.push(child);
364        pos = next;
365    }
366
367    Some((
368        CacheTreeNode {
369            name,
370            entry_count,
371            oid,
372            children,
373        },
374        pos,
375    ))
376}
377
378fn parse_signed_int_prefix(data: &[u8]) -> Option<(i32, usize)> {
379    let mut j = 0usize;
380    while j < data.len() && data[j] == b' ' {
381        j += 1;
382    }
383    let start = j;
384    if j < data.len() && data[j] == b'-' {
385        j += 1;
386    }
387    let digit_start = j;
388    while j < data.len() && data[j].is_ascii_digit() {
389        j += 1;
390    }
391    if j == digit_start {
392        return None;
393    }
394    let s = std::str::from_utf8(&data[start..j]).ok()?;
395    let v: i32 = s.parse().ok()?;
396    Some((v, j))
397}
398
399fn serialize_cache_tree_node(node: &CacheTreeNode, out: &mut Vec<u8>) {
400    out.extend_from_slice(&node.name);
401    out.push(0);
402    out.extend_from_slice(node.entry_count.to_string().as_bytes());
403    out.push(b' ');
404    out.extend_from_slice(node.children.len().to_string().as_bytes());
405    out.push(b'\n');
406    if node.entry_count >= 0 {
407        if let Some(oid) = node.oid {
408            out.extend_from_slice(oid.as_bytes());
409        }
410    }
411    for child in &node.children {
412        serialize_cache_tree_node(child, out);
413    }
414}
415
416fn format_cache_tree_node(node: &CacheTreeNode, parent_path: &str, out: &mut String) {
417    let path = if node.name.is_empty() {
418        String::new()
419    } else {
420        let name = String::from_utf8_lossy(&node.name);
421        format!("{parent_path}{name}/")
422    };
423    if node.is_valid() {
424        if let Some(oid) = node.oid {
425            out.push_str(&format!(
426                "{} {} ({} entries, {} subtrees)\n",
427                oid,
428                path,
429                node.entry_count,
430                node.children.len()
431            ));
432        }
433    } else {
434        out.push_str(&format!(
435            "{:<40} {} ({} subtrees)\n",
436            "invalid",
437            path,
438            node.children.len()
439        ));
440    }
441    for child in &node.children {
442        format_cache_tree_node(child, &path, out);
443    }
444}
445
446/// Emit a single cache-tree node line in `test-tool dump-cache-tree` format.
447fn dump_one_line(node: &CacheTreeNode, pfx: &str, out: &mut String) {
448    if node.entry_count < 0 {
449        out.push_str(&format!(
450            "{:<40} {} ({} subtrees)\n",
451            "invalid",
452            pfx,
453            node.children.len()
454        ));
455    } else {
456        let oid = node.oid.unwrap_or_else(ObjectId::zero);
457        out.push_str(&format!(
458            "{} {} ({} entries, {} subtrees)\n",
459            oid,
460            pfx,
461            node.entry_count,
462            node.children.len()
463        ));
464    }
465}
466
467/// Walk the stored cache-tree (`it`) against a freshly built reference (`ref`),
468/// emitting only the nodes that exist in both — matching Git's `dump_cache_tree`.
469fn dump_cache_tree_pair(
470    it: &CacheTreeNode,
471    reference: &CacheTreeNode,
472    pfx: &str,
473    out: &mut String,
474) {
475    dump_one_line(it, pfx, out);
476
477    for child in &it.children {
478        let Some(ref_child) = reference.children.iter().find(|c| c.name == child.name) else {
479            continue;
480        };
481        let name = String::from_utf8_lossy(&child.name);
482        let child_pfx = format!("{pfx}{name}/");
483        dump_cache_tree_pair(child, ref_child, &child_pfx, out);
484    }
485}
486
487/// Read `GIT_INDEX_VERSION` and return the requested version.
488///
489/// If the environment variable is unset, returns `None`.
490/// If it is set but invalid (non-numeric or out of range 2..=4), prints a
491/// warning to stderr and returns the default version.
492pub fn get_index_format_from_env() -> Option<u32> {
493    let val = std::env::var("GIT_INDEX_VERSION").ok()?;
494    if val.is_empty() {
495        return None;
496    }
497    match val.parse::<u32>() {
498        Ok(v) if (INDEX_FORMAT_LB..=INDEX_FORMAT_UB).contains(&v) => Some(v),
499        _ => {
500            eprintln!(
501                "warning: GIT_INDEX_VERSION set, but the value is invalid.\n\
502                 Using version {INDEX_ENV_INVALID_FALLBACK}"
503            );
504            Some(INDEX_ENV_INVALID_FALLBACK)
505        }
506    }
507}
508
509impl Index {
510    /// Create a new, empty index.
511    ///
512    /// Respects `GIT_INDEX_VERSION` if set, otherwise defaults to version 2.
513    #[must_use]
514    pub fn new() -> Self {
515        let version = get_index_format_from_env().unwrap_or(2);
516        Self {
517            version,
518            entries: Vec::new(),
519            sparse_directories: false,
520            untracked_cache: None,
521            fsmonitor_last_update: None,
522            resolve_undo: None,
523            split_link: None,
524            cache_tree_root: None,
525            cache_tree: None,
526            hash_algo: HashAlgo::Sha1,
527        }
528    }
529
530    /// Create a new, empty index at a fixed version without consulting
531    /// `GIT_INDEX_VERSION` or config.
532    ///
533    /// Unlike [`Self::new`], this never reads the environment and therefore never
534    /// emits the "GIT_INDEX_VERSION set, but the value is invalid" warning. Use it
535    /// for throwaway baseline indexes (e.g. sparse-index diff references) where the
536    /// version is irrelevant and Git resolves the format only once per operation.
537    #[must_use]
538    pub fn empty(version: u32) -> Self {
539        Self {
540            version,
541            entries: Vec::new(),
542            sparse_directories: false,
543            untracked_cache: None,
544            fsmonitor_last_update: None,
545            resolve_undo: None,
546            split_link: None,
547            cache_tree_root: None,
548            cache_tree: None,
549            hash_algo: HashAlgo::Sha1,
550        }
551    }
552
553    /// Create a new empty index, respecting config values for version.
554    ///
555    /// Priority matches Git's `prepare_repo_settings`: `GIT_INDEX_VERSION` env, then
556    /// `feature.manyFiles` (implies version 4), then `index.version` (overrides version).
557    pub fn new_with_config(
558        config_index_version: Option<&str>,
559        config_many_files: Option<&str>,
560    ) -> Self {
561        if let Some(v) = get_index_format_from_env() {
562            return Self {
563                version: v,
564                entries: Vec::new(),
565                sparse_directories: false,
566                untracked_cache: None,
567                fsmonitor_last_update: None,
568                resolve_undo: None,
569                split_link: None,
570                cache_tree_root: None,
571                cache_tree: None,
572                hash_algo: HashAlgo::default(),
573            };
574        }
575
576        let many_files = config_truthy(config_many_files);
577        let mut version = if many_files { 4 } else { 2 };
578
579        if let Some(val) = config_index_version {
580            let trimmed = val.trim();
581            if !trimmed.is_empty() {
582                match trimmed.parse::<u32>() {
583                    Ok(v) if (INDEX_FORMAT_LB..=INDEX_FORMAT_UB).contains(&v) => {
584                        version = v;
585                    }
586                    _ => {
587                        eprintln!(
588                            "warning: index.version set, but the value is invalid.\n\
589                             Using version {INDEX_CONFIG_INVALID_FALLBACK}"
590                        );
591                        version = INDEX_CONFIG_INVALID_FALLBACK;
592                    }
593                }
594            }
595        }
596
597        Self {
598            version,
599            entries: Vec::new(),
600            sparse_directories: false,
601            untracked_cache: None,
602            fsmonitor_last_update: None,
603            resolve_undo: None,
604            split_link: None,
605            cache_tree_root: None,
606            cache_tree: None,
607            hash_algo: HashAlgo::default(),
608        }
609    }
610
611    /// New empty index using a loaded [`ConfigSet`] (includes `-c` / `GIT_CONFIG_PARAMETERS`).
612    ///
613    /// Same precedence as [`Self::new_with_config`], but reads `feature.manyFiles` and
614    /// `index.version` from `config`.
615    #[must_use]
616    pub fn new_from_config(config: &ConfigSet) -> Self {
617        if let Some(v) = get_index_format_from_env() {
618            return Self {
619                version: v,
620                entries: Vec::new(),
621                sparse_directories: false,
622                untracked_cache: None,
623                fsmonitor_last_update: None,
624                resolve_undo: None,
625                split_link: None,
626                cache_tree_root: None,
627                cache_tree: None,
628                hash_algo: HashAlgo::default(),
629            };
630        }
631
632        let many_files = config
633            .get_bool("feature.manyFiles")
634            .and_then(|r| r.ok())
635            .unwrap_or(false);
636        let mut version = if many_files { 4 } else { 2 };
637
638        if let Some(val) = config.get("index.version") {
639            let trimmed = val.trim();
640            if !trimmed.is_empty() {
641                match trimmed.parse::<u32>() {
642                    Ok(v) if (INDEX_FORMAT_LB..=INDEX_FORMAT_UB).contains(&v) => {
643                        version = v;
644                    }
645                    _ => {
646                        eprintln!(
647                            "warning: index.version set, but the value is invalid.\n\
648                             Using version {INDEX_CONFIG_INVALID_FALLBACK}"
649                        );
650                        version = INDEX_CONFIG_INVALID_FALLBACK;
651                    }
652                }
653            }
654        }
655
656        Self {
657            version,
658            entries: Vec::new(),
659            sparse_directories: false,
660            untracked_cache: None,
661            fsmonitor_last_update: None,
662            resolve_undo: None,
663            split_link: None,
664            cache_tree_root: None,
665            cache_tree: None,
666            hash_algo: HashAlgo::default(),
667        }
668    }
669
670    /// Load an index from the given file path without expanding sparse-directory placeholders.
671    ///
672    /// Returns an empty index if the file does not exist.
673    ///
674    /// # Errors
675    ///
676    /// Returns [`Error::IndexError`] if the file is present but corrupt.
677    pub fn load(path: &Path) -> Result<Self> {
678        match fs::read(path) {
679            Ok(data) => Self::parse(&data),
680            Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(Self {
681                sparse_directories: false,
682                ..Self::new()
683            }),
684            Err(e) => Err(Error::Io(e)),
685        }
686    }
687
688    /// Load an index and expand sparse-directory placeholders using the object database.
689    ///
690    /// After a successful return, [`Index::sparse_directories`] is cleared and every
691    /// placeholder is replaced by the blob entries from the referenced tree.
692    pub fn load_expand_sparse(path: &Path, odb: &Odb) -> Result<Self> {
693        let mut idx = match fs::read(path) {
694            Ok(data) => Self::parse_with_algo(&data, odb.hash_algo())?,
695            Err(e) if e.kind() == io::ErrorKind::NotFound => Self {
696                sparse_directories: false,
697                hash_algo: odb.hash_algo(),
698                ..Self::new()
699            },
700            Err(e) => return Err(Error::Io(e)),
701        };
702        idx.expand_sparse_directory_placeholders(odb)?;
703        Ok(idx)
704    }
705
706    /// Like [`Index::load_expand_sparse`], but treats a missing index or Git's
707    /// `"file too short"` placeholder as an empty index.
708    pub fn load_expand_sparse_optional(path: &Path, odb: &Odb) -> Result<Self> {
709        let algo = odb.hash_algo();
710        let mut idx = match fs::read(path) {
711            Ok(data) => Self::parse_with_algo(&data, algo).or_else(|e| match e {
712                Error::IndexError(msg) if msg == "file too short" => Ok(Self {
713                    hash_algo: algo,
714                    ..Self::new()
715                }),
716                other => Err(other),
717            })?,
718            Err(e) if e.kind() == io::ErrorKind::NotFound => Self {
719                hash_algo: algo,
720                ..Self::new()
721            },
722            Err(e) => return Err(Error::Io(e)),
723        };
724        idx.expand_sparse_directory_placeholders(odb)?;
725        Ok(idx)
726    }
727
728    /// Returns true if the index contains sparse-index tree placeholders (`MODE_TREE` + skip-worktree).
729    #[must_use]
730    pub fn has_sparse_directory_placeholders(&self) -> bool {
731        self.entries
732            .iter()
733            .any(IndexEntry::is_sparse_directory_placeholder)
734    }
735
736    /// Replace sparse-directory placeholder entries with all blob paths from their trees.
737    ///
738    /// Each placeholder must reference a tree object. New entries are marked skip-worktree like Git's
739    /// expanded index, except we keep `sparse_directories` false in memory after expansion.
740    pub fn expand_sparse_directory_placeholders(&mut self, odb: &Odb) -> Result<()> {
741        if !self.has_sparse_directory_placeholders() {
742            return Ok(());
743        }
744        let mut out: Vec<IndexEntry> = Vec::with_capacity(self.entries.len());
745        for entry in self.entries.drain(..) {
746            if entry.is_sparse_directory_placeholder() {
747                let prefix = trim_trailing_slash_bytes(&entry.path);
748                let blobs = flatten_tree_blobs(odb, &entry.oid, prefix)?;
749                out.extend(blobs);
750            } else {
751                out.push(entry);
752            }
753        }
754        self.entries = out;
755        self.sparse_directories = false;
756        self.sort();
757        Ok(())
758    }
759
760    /// Collapse consecutive skip-worktree subtrees into sparse-directory placeholders when
761    /// `cone_mode` is true and each directory is outside the sparse cone.
762    ///
763    /// `head_tree` is the tree OID at `HEAD`. When `enable_sparse_index` is false, clears
764    /// [`Index::sparse_directories`] and returns without collapsing.
765    pub fn try_collapse_sparse_directories(
766        &mut self,
767        odb: &Odb,
768        head_tree: &ObjectId,
769        patterns: &[String],
770        cone_mode: bool,
771        enable_sparse_index: bool,
772    ) -> Result<()> {
773        if !enable_sparse_index || !cone_mode {
774            self.sparse_directories = false;
775            return Ok(());
776        }
777
778        let mut prefixes = BTreeSet::<Vec<u8>>::new();
779        for e in &self.entries {
780            if e.stage() != 0 || e.mode == MODE_TREE || !e.skip_worktree() {
781                continue;
782            }
783            collect_directory_prefixes(&e.path, &mut prefixes);
784        }
785
786        let mut collapsed_any = false;
787        // Deepest prefixes first so nested dirs collapse before parents.
788        let mut ordered: Vec<Vec<u8>> = prefixes.into_iter().collect();
789        ordered.sort_by_key(|p| std::cmp::Reverse(p.len()));
790
791        for pref in ordered {
792            let pref_str = String::from_utf8_lossy(&pref);
793            if directory_in_cone(&pref_str, patterns, cone_mode) {
794                continue;
795            }
796            let Some(subtree_oid) = tree_oid_for_prefix(odb, head_tree, &pref)? else {
797                continue;
798            };
799            let expected = collect_sparse_aware_expected_blobs(
800                odb,
801                &subtree_oid,
802                &pref,
803                patterns,
804                cone_mode,
805                &self.entries,
806            )?;
807            if expected.is_empty() {
808                continue;
809            }
810            let mut matched = Vec::new();
811            for e in &self.entries {
812                if e.stage() != 0 {
813                    continue;
814                }
815                if path_under_prefix(&e.path, &pref) && e.mode != MODE_TREE {
816                    matched.push(e.clone());
817                }
818            }
819            if matched.len() != expected.len() {
820                continue;
821            }
822            matched.sort_by(|a, b| a.path.cmp(&b.path));
823            let mut exp_sorted = expected;
824            exp_sorted.sort_by(|a, b| a.path.cmp(&b.path));
825            if !matched
826                .iter()
827                .zip(exp_sorted.iter())
828                .all(|(a, b)| a.path == b.path && a.oid == b.oid && a.mode == b.mode)
829            {
830                continue;
831            }
832            if !matched.iter().all(|e| e.skip_worktree()) {
833                continue;
834            }
835            // Git's convert_to_sparse_rec refuses to collapse a directory that contains a
836            // submodule (gitlink); the sparse-directory entry could not faithfully represent
837            // the gitlink's committed OID. Leave such directories expanded.
838            if matched.iter().any(|e| e.mode == MODE_GITLINK)
839                || exp_sorted.iter().any(|e| e.mode == MODE_GITLINK)
840            {
841                continue;
842            }
843
844            let mut path_with_slash = pref.clone();
845            if !path_with_slash.ends_with(b"/") {
846                path_with_slash.push(b'/');
847            }
848            self.entries
849                .retain(|e| e.stage() != 0 || !path_under_prefix(&e.path, &pref));
850            let mut placeholder = IndexEntry {
851                ctime_sec: 0,
852                ctime_nsec: 0,
853                mtime_sec: 0,
854                mtime_nsec: 0,
855                dev: 0,
856                ino: 0,
857                mode: MODE_TREE,
858                uid: 0,
859                gid: 0,
860                size: 0,
861                oid: subtree_oid,
862                flags: path_with_slash.len().min(0xFFF) as u16,
863                flags_extended: Some(0),
864                path: path_with_slash,
865                base_index_pos: 0,
866            };
867            placeholder.set_skip_worktree(true);
868            self.add_or_replace(placeholder);
869            collapsed_any = true;
870        }
871
872        if collapsed_any {
873            self.sort();
874            self.sparse_directories = true;
875        } else {
876            self.sparse_directories = false;
877        }
878        Ok(())
879    }
880
881    /// Parse index bytes (the whole file including trailing SHA-1).
882    ///
883    /// # Errors
884    ///
885    /// Returns [`Error::IndexError`] on structural problems.
886    pub fn parse(data: &[u8]) -> Result<Self> {
887        // The index file does not record its hash width; it is fixed by the
888        // repository. Detect it from the trailing checksum (callers with a
889        // repository should prefer [`Self::parse_with_algo`]).
890        Self::parse_with_algo(data, detect_index_hash_algo(data))
891    }
892
893    /// Parse an index using a known hash algorithm (e.g. from the repository's
894    /// `extensions.objectformat`), which determines the entry OID width and the
895    /// trailing checksum width.
896    pub fn parse_with_algo(data: &[u8], hash_algo: HashAlgo) -> Result<Self> {
897        if data.len() < 12 {
898            return Err(Error::IndexError("file too short".to_owned()));
899        }
900
901        let hash_len = hash_algo.len();
902        // Trailing checksum: normal index is a hash of the body; Git may write
903        // all zeros when `index.skipHash` / `feature.manyFiles` skips it.
904        let (body, checksum) = data.split_at(data.len() - hash_len);
905        if !checksum.iter().all(|&b| b == 0) {
906            let computed = hash_index_body(hash_algo, body);
907            if computed != checksum {
908                return Err(Error::IndexError(format!(
909                    "{} checksum mismatch",
910                    hash_algo.name().to_uppercase()
911                )));
912            }
913        }
914
915        // Header
916        let magic = &body[..4];
917        if magic != b"DIRC" {
918            return Err(Error::IndexError("bad magic: expected DIRC".to_owned()));
919        }
920        let version = u32::from_be_bytes(
921            body[4..8]
922                .try_into()
923                .map_err(|_| Error::IndexError("cannot read version".to_owned()))?,
924        );
925        if version != 2 && version != 3 && version != 4 {
926            return Err(Error::IndexError(format!(
927                "unsupported index version {version}"
928            )));
929        }
930        let count = u32::from_be_bytes(
931            body[8..12]
932                .try_into()
933                .map_err(|_| Error::IndexError("cannot read entry count".to_owned()))?,
934        );
935
936        let mut pos = 12usize;
937        let mut entries = Vec::with_capacity(count as usize);
938
939        for _ in 0..count {
940            // Only v4 prefix-compresses paths against the previous entry;
941            // borrow it from `entries` instead of cloning every path.
942            let prev_path: &[u8] = if version == 4 {
943                entries.last().map_or(&[][..], |e: &IndexEntry| &e.path)
944            } else {
945                &[]
946            };
947            let (entry, consumed) = parse_entry(&body[pos..], version, prev_path, hash_len)?;
948            entries.push(entry);
949            pos += consumed;
950        }
951
952        let mut sparse_directories = false;
953        let mut untracked_cache = None;
954        let mut fsmonitor_last_update = None;
955        let mut resolve_undo = None;
956        let mut split_link = None;
957        let mut cache_tree_root = None;
958        let mut cache_tree = None;
959        while pos + 8 <= body.len() {
960            let sig = u32::from_be_bytes(
961                body[pos..pos + 4]
962                    .try_into()
963                    .map_err(|_| Error::IndexError("truncated extension sig".to_owned()))?,
964            );
965            let ext_sz = u32::from_be_bytes(
966                body[pos + 4..pos + 8]
967                    .try_into()
968                    .map_err(|_| Error::IndexError("truncated extension size".to_owned()))?,
969            ) as usize;
970            pos += 8;
971            if pos + ext_sz > body.len() {
972                return Err(Error::IndexError(
973                    "extension overruns index body".to_owned(),
974                ));
975            }
976            if sig == INDEX_EXT_SPARSE_DIRECTORIES {
977                sparse_directories = true;
978            } else if sig == INDEX_EXT_UNTRACKED {
979                let ext_data = &body[pos..pos + ext_sz];
980                untracked_cache = untracked_cache::parse_untracked_extension(ext_data);
981            } else if sig == INDEX_EXT_FSMONITOR {
982                let ext_data = &body[pos..pos + ext_sz];
983                let token_bytes = if let Some(nul) = ext_data.iter().position(|&b| b == 0) {
984                    &ext_data[..nul]
985                } else {
986                    ext_data
987                };
988                fsmonitor_last_update = Some(String::from_utf8_lossy(token_bytes).into_owned());
989            } else if sig == INDEX_EXT_RESOLVE_UNDO {
990                let ext_data = &body[pos..pos + ext_sz];
991                resolve_undo = Some(resolve_undo::parse_resolve_undo_payload(ext_data)?);
992            } else if sig == INDEX_EXT_LINK {
993                let ext_data = &body[pos..pos + ext_sz];
994                split_link = Some(crate::split_index::parse_link_extension(ext_data, hash_algo)?);
995            } else if sig == INDEX_EXT_CACHE_TREE {
996                let ext_data = &body[pos..pos + ext_sz];
997                cache_tree = parse_cache_tree(ext_data, hash_len);
998                cache_tree_root = cache_tree
999                    .as_ref()
1000                    .and_then(|node| node.oid.filter(|_| node.entry_count >= 0));
1001            }
1002            pos += ext_sz;
1003        }
1004        if pos != body.len() {
1005            return Err(Error::IndexError("junk after index extensions".to_owned()));
1006        }
1007
1008        Ok(Self {
1009            version,
1010            entries,
1011            sparse_directories,
1012            untracked_cache,
1013            fsmonitor_last_update,
1014            resolve_undo,
1015            split_link,
1016            cache_tree_root,
1017            cache_tree,
1018            hash_algo,
1019        })
1020    }
1021
1022    /// Write the index to a file, computing and appending the trailing SHA-1.
1023    ///
1024    /// # Errors
1025    ///
1026    /// Returns [`Error::Io`] on filesystem errors.
1027    pub fn write(&self, path: &Path) -> Result<()> {
1028        let git_dir = path.parent();
1029        let config = git_dir.and_then(|d| ConfigSet::load(Some(d), true).ok());
1030        let skip_hash = index_skip_hash_for_write(config.as_ref());
1031        self.write_to_path(path, skip_hash)
1032    }
1033
1034    /// Write this index to `path` with an explicit trailing-checksum policy.
1035    ///
1036    /// When `skip_hash` is true, the trailing SHA-1 is written as all zeros (Git `index.skipHash`).
1037    pub fn write_to_path(&self, path: &Path, skip_hash: bool) -> Result<()> {
1038        let mut body = Vec::new();
1039        // Fast path: entries loaded from disk (or maintained via `add_or_replace`) are already in
1040        // canonical order; serializing from `&self` skips a full clone of every entry. The
1041        // comparator must stay identical to [`Index::sort`] (path, then stage) — format v4 path
1042        // compression depends on it.
1043        let already_sorted = self
1044            .entries
1045            .is_sorted_by(|a, b| (&a.path, a.stage()) <= (&b.path, b.stage()));
1046        if already_sorted {
1047            self.serialize_into(&mut body)?;
1048        } else {
1049            let mut sorted = self.clone();
1050            sorted.sort();
1051            sorted.serialize_into(&mut body)?;
1052        }
1053
1054        let checksum: Vec<u8> = if skip_hash {
1055            vec![0u8; self.hash_algo.len()]
1056        } else {
1057            hash_index_body(self.hash_algo, &body)
1058        };
1059
1060        let tmp_path = path.with_extension("lock");
1061        let pid_path = pid_path_for_lock(&tmp_path);
1062        let lockfile_pid_enabled = lockfile_pid_enabled(path);
1063
1064        let mut lock_file = match fs::OpenOptions::new()
1065            .write(true)
1066            .create_new(true)
1067            .open(&tmp_path)
1068        {
1069            Ok(file) => file,
1070            Err(e) if e.kind() == io::ErrorKind::AlreadyExists => {
1071                let message = build_lock_exists_message(&tmp_path, &pid_path, &e);
1072                return Err(Error::Io(io::Error::new(
1073                    io::ErrorKind::AlreadyExists,
1074                    message,
1075                )));
1076            }
1077            Err(e) => return Err(Error::Io(e)),
1078        };
1079
1080        let mut wrote_pid_file = false;
1081        if lockfile_pid_enabled {
1082            if let Err(e) = write_lock_pid_file(&pid_path) {
1083                let _ = fs::remove_file(&tmp_path);
1084                return Err(Error::Io(e));
1085            }
1086            wrote_pid_file = true;
1087        }
1088
1089        if let Err(e) = (|| -> io::Result<()> {
1090            lock_file.write_all(&body)?;
1091            lock_file.write_all(&checksum)?;
1092            Ok(())
1093        })() {
1094            let _ = fs::remove_file(&tmp_path);
1095            if wrote_pid_file {
1096                let _ = fs::remove_file(&pid_path);
1097            }
1098            return Err(Error::Io(e));
1099        }
1100        drop(lock_file);
1101
1102        if let Err(e) = fs::rename(&tmp_path, path) {
1103            let _ = fs::remove_file(&tmp_path);
1104            if wrote_pid_file {
1105                let _ = fs::remove_file(&pid_path);
1106            }
1107            return Err(Error::Io(e));
1108        }
1109        {
1110            if wrote_pid_file {
1111                let _ = fs::remove_file(&pid_path);
1112            }
1113        }
1114        Ok(())
1115    }
1116
1117    /// Serialise the index body (without trailing checksum) into `out`.
1118    ///
1119    /// Callers must have sorted entries when using format 4 (path compression depends on order).
1120    pub(crate) fn serialize_into(&self, out: &mut Vec<u8>) -> Result<()> {
1121        let has_extended_flags = self.entries.iter().any(|e| e.flags_extended.is_some());
1122        let write_version = if self.version >= 4 {
1123            4
1124        } else if has_extended_flags {
1125            3
1126        } else if self.version >= 3 {
1127            2
1128        } else {
1129            self.version
1130        };
1131        // Header
1132        out.extend_from_slice(b"DIRC");
1133        out.extend_from_slice(&write_version.to_be_bytes());
1134        out.extend_from_slice(&(self.entries.len() as u32).to_be_bytes());
1135
1136        if write_version == 4 {
1137            let mut previous_path: Vec<u8> = Vec::new();
1138            for entry in &self.entries {
1139                serialize_entry_v4(entry, &mut previous_path, out);
1140            }
1141        } else {
1142            for entry in &self.entries {
1143                serialize_entry(entry, write_version, out);
1144            }
1145        }
1146        if self.sparse_directories {
1147            out.extend_from_slice(&INDEX_EXT_SPARSE_DIRECTORIES.to_be_bytes());
1148            out.extend_from_slice(&0u32.to_be_bytes());
1149        }
1150        if let Some(uc) = &self.untracked_cache {
1151            let payload = untracked_cache::write_untracked_extension(uc);
1152            out.extend_from_slice(&INDEX_EXT_UNTRACKED.to_be_bytes());
1153            out.extend_from_slice(&(payload.len() as u32).to_be_bytes());
1154            out.extend_from_slice(&payload);
1155        }
1156        if let Some(token) = &self.fsmonitor_last_update {
1157            let mut payload = token.as_bytes().to_vec();
1158            payload.push(0);
1159            out.extend_from_slice(&INDEX_EXT_FSMONITOR.to_be_bytes());
1160            out.extend_from_slice(&(payload.len() as u32).to_be_bytes());
1161            out.extend_from_slice(&payload);
1162        }
1163        if let Some(ru) = &self.resolve_undo {
1164            let payload = write_resolve_undo_payload(ru);
1165            if !payload.is_empty() {
1166                out.extend_from_slice(&INDEX_EXT_RESOLVE_UNDO.to_be_bytes());
1167                out.extend_from_slice(&(payload.len() as u32).to_be_bytes());
1168                out.extend_from_slice(&payload);
1169            }
1170        }
1171        if let Some(sl) = &self.split_link {
1172            use crate::ewah_bitmap::EwahBitmap;
1173            let del = sl
1174                .delete_bitmap
1175                .as_ref()
1176                .cloned()
1177                .unwrap_or_else(EwahBitmap::new);
1178            let rep = sl
1179                .replace_bitmap
1180                .as_ref()
1181                .cloned()
1182                .unwrap_or_else(EwahBitmap::new);
1183            let payload =
1184                crate::split_index::serialize_link_extension_payload(&sl.base_oid, &del, &rep);
1185            out.extend_from_slice(&INDEX_EXT_LINK.to_be_bytes());
1186            out.extend_from_slice(&(payload.len() as u32).to_be_bytes());
1187            out.extend_from_slice(&payload);
1188        }
1189        if let Some(cache_tree) = &self.cache_tree {
1190            let mut payload = Vec::new();
1191            serialize_cache_tree_node(cache_tree, &mut payload);
1192            out.extend_from_slice(&INDEX_EXT_CACHE_TREE.to_be_bytes());
1193            out.extend_from_slice(&(payload.len() as u32).to_be_bytes());
1194            out.extend_from_slice(&payload);
1195        }
1196        Ok(())
1197    }
1198
1199    /// Add or replace an entry (matched by path + stage).
1200    pub fn add_or_replace(&mut self, entry: IndexEntry) {
1201        let path = entry.path.clone();
1202        let stage = entry.stage();
1203        let mut inserted_stage0 = false;
1204        // Binary search for the insertion point by (path, stage)
1205        let result = self.entries.binary_search_by(|e| {
1206            e.path
1207                .as_slice()
1208                .cmp(path.as_slice())
1209                .then_with(|| e.stage().cmp(&stage))
1210        });
1211        match result {
1212            Ok(pos) => {
1213                // Preserve split-index row binding across refresh/replace (Git `ce->index`).
1214                let mut e = entry;
1215                e.base_index_pos = self.entries[pos].base_index_pos;
1216                self.entries[pos] = e;
1217            }
1218            Err(pos) => {
1219                // Not found — insert at sorted position
1220                self.entries.insert(pos, entry);
1221                inserted_stage0 = stage == 0;
1222            }
1223        }
1224        if inserted_stage0 {
1225            if let Ok(p) = std::str::from_utf8(&path) {
1226                self.invalidate_untracked_cache_for_path(p);
1227            }
1228        }
1229        if stage == 0 {
1230            self.invalidate_cache_tree_for_path(&path);
1231        }
1232    }
1233
1234    /// Stage a file at stage 0, removing any conflict stage entries (1, 2, 3)
1235    /// for the same path. This is the correct behavior for `git add` on a
1236    /// conflicted file during merge/cherry-pick resolution.
1237    pub fn stage_file(&mut self, entry: IndexEntry) {
1238        let path = entry.path.clone();
1239        for e in &self.entries {
1240            if e.path == path && e.stage() != 0 {
1241                resolve_undo::record_resolve_undo_for_entry(&mut self.resolve_undo, e);
1242            }
1243        }
1244        // Remove conflict stages first
1245        self.entries.retain(|e| e.path != path || e.stage() == 0);
1246        // Then add/replace stage-0 entry
1247        self.add_or_replace(entry);
1248    }
1249
1250    /// Drop all resolve-undo records (matches Git `resolve_undo_clear_index`).
1251    pub fn clear_resolve_undo(&mut self) {
1252        self.resolve_undo = None;
1253    }
1254
1255    /// Remove and return the resolve-undo record for `path`, if any.
1256    pub fn take_resolve_undo_record(&mut self, path: &[u8]) -> Option<ResolveUndoRecord> {
1257        let map = self.resolve_undo.as_mut()?;
1258        let ru = map.remove(path)?;
1259        if map.is_empty() {
1260            self.resolve_undo = None;
1261        }
1262        Some(ru)
1263    }
1264
1265    /// Replace all index entries for `path` with unmerged stages from `record`.
1266    pub fn install_unmerged_from_resolve_undo(&mut self, path: &[u8], record: &ResolveUndoRecord) {
1267        self.entries.retain(|e| e.path != path);
1268        for stage in 1u8..=3u8 {
1269            let i = (stage - 1) as usize;
1270            if record.modes[i] == 0 {
1271                continue;
1272            }
1273            let entry = IndexEntry {
1274                ctime_sec: 0,
1275                ctime_nsec: 0,
1276                mtime_sec: 0,
1277                mtime_nsec: 0,
1278                dev: 0,
1279                ino: 0,
1280                mode: record.modes[i],
1281                uid: 0,
1282                gid: 0,
1283                size: 0,
1284                oid: record.oids[i],
1285                flags: path.len().min(0xFFF) as u16 | ((stage as u16) << 12),
1286                flags_extended: None,
1287                path: path.to_vec(),
1288                base_index_pos: 0,
1289            };
1290            self.add_or_replace(entry);
1291        }
1292        self.sort();
1293    }
1294
1295    /// Re-create unmerged index entries for `path` from the resolve-undo extension.
1296    ///
1297    /// Returns `true` when a resolve-undo record existed and was consumed (Git `unmerge_one`).
1298    pub fn unmerge_path_from_resolve_undo(&mut self, path: &[u8]) -> bool {
1299        let Some(record) = self.take_resolve_undo_record(path) else {
1300            return false;
1301        };
1302        self.install_unmerged_from_resolve_undo(path, &record);
1303        // Git's `unmerge_index_entry` swaps the resolved stage-0 entry for unmerged
1304        // stages 1/2/3 via `remove_index_entry_at` + `add_index_entry`, both of which
1305        // call `cache_tree_invalidate_path`. After committing the resolution the index
1306        // carries a valid TREE extension whose stage-0 entry for `path` no longer
1307        // matches; without invalidating it, the next write-tree trips
1308        // `verify_cache_tree` ("corrupted cache-tree has entries not present in index").
1309        if let Ok(p) = std::str::from_utf8(path) {
1310            self.invalidate_untracked_cache_for_path(p);
1311        }
1312        self.invalidate_cache_tree_for_path(path);
1313        true
1314    }
1315
1316    /// Remove all entries matching the given path (all stages).
1317    ///
1318    /// Returns `true` if at least one entry was removed.
1319    pub fn remove(&mut self, path: &[u8]) -> bool {
1320        let mut removed_any = false;
1321        for e in &self.entries {
1322            if e.path == path {
1323                if e.stage() != 0 {
1324                    resolve_undo::record_resolve_undo_for_entry(&mut self.resolve_undo, e);
1325                }
1326                removed_any = true;
1327            }
1328        }
1329        if !removed_any {
1330            return false;
1331        }
1332        self.entries.retain(|e| e.path != path);
1333        if let Ok(p) = std::str::from_utf8(path) {
1334            self.invalidate_untracked_cache_for_path(p);
1335        }
1336        self.invalidate_cache_tree_for_path(path);
1337        true
1338    }
1339
1340    /// Remove every index entry for `path` (all merge stages), like `remove_file_from_index`.
1341    ///
1342    /// Returns whether any entry was removed.
1343    pub fn remove_path_all_stages(&mut self, path: &[u8]) -> bool {
1344        self.remove(path)
1345    }
1346
1347    /// Invalidate UNTR nodes affected by an index change (Git `untracked_cache_*_index`).
1348    pub fn invalidate_untracked_cache_for_path(&mut self, path: &str) {
1349        if let Some(uc) = self.untracked_cache.as_mut() {
1350            untracked_cache::invalidate_path(uc, path);
1351        }
1352    }
1353
1354    /// Remove every index entry whose path lies strictly under `path` (all stages).
1355    ///
1356    /// Used when staging a file at `path` that replaces a former directory: Git removes
1357    /// tracked paths like `path/child` from the index so they do not remain alongside
1358    /// the new blob entry.
1359    pub fn remove_descendants_under_path(&mut self, path: &str) {
1360        let prefix = path.as_bytes();
1361        if prefix.is_empty() {
1362            return;
1363        }
1364        let plen = prefix.len();
1365        let had_descendant = self.entries.iter().any(|e| {
1366            let ep = e.path.as_slice();
1367            ep.len() > plen && ep.starts_with(prefix) && ep[plen] == b'/'
1368        });
1369        for e in self.entries.iter() {
1370            let ep = e.path.as_slice();
1371            if ep.len() > plen && ep.starts_with(prefix) && ep[plen] == b'/' && e.stage() != 0 {
1372                resolve_undo::record_resolve_undo_for_entry(&mut self.resolve_undo, e);
1373            }
1374        }
1375        self.entries.retain(|e| {
1376            let ep = e.path.as_slice();
1377            if ep.len() <= plen {
1378                return true;
1379            }
1380            if !ep.starts_with(prefix) {
1381                return true;
1382            }
1383            // Drop paths strictly under `prefix/` (keep same-length prefix matches like "d-other").
1384            ep[plen] != b'/'
1385        });
1386        if had_descendant {
1387            self.invalidate_untracked_cache_for_path(path);
1388            self.invalidate_cache_tree_for_path(path.as_bytes());
1389        }
1390    }
1391
1392    /// Replace the cache-tree extension with a valid tree.
1393    pub fn set_cache_tree(&mut self, cache_tree: CacheTreeNode) {
1394        self.cache_tree_root = cache_tree.oid.filter(|_| cache_tree.entry_count >= 0);
1395        self.cache_tree = Some(cache_tree);
1396    }
1397
1398    /// Remove the cache-tree extension.
1399    pub fn clear_cache_tree(&mut self) {
1400        self.cache_tree_root = None;
1401        self.cache_tree = None;
1402    }
1403
1404    /// Mark cache-tree nodes affected by an index path change as invalid.
1405    ///
1406    /// Mirrors Git's `do_invalidate_path` (`cache-tree.c`): each ancestor node along `path`
1407    /// is invalidated (`entry_count = -1`), and when the final path component names an existing
1408    /// **subtree** node, that subtree is removed entirely. The removal is essential for the
1409    /// directory→file transition (e.g. tracked dir `a/b/` replaced by file `a/b`): without it,
1410    /// stale descendant nodes (`a/b/c`, …) keep their positive `entry_count` and later trip
1411    /// [`crate::write_tree::verify_cache_tree`] with "corrupted cache-tree has entries not present
1412    /// in index".
1413    pub fn invalidate_cache_tree_for_path(&mut self, path: &[u8]) {
1414        let Some(root) = self.cache_tree.as_mut() else {
1415            self.cache_tree_root = None;
1416            return;
1417        };
1418        self.cache_tree_root = None;
1419        Self::do_invalidate_cache_tree_path(root, path);
1420    }
1421
1422    /// Recursive worker for [`Self::invalidate_cache_tree_for_path`], mirroring
1423    /// Git's `do_invalidate_path`.
1424    fn do_invalidate_cache_tree_path(node: &mut CacheTreeNode, path: &[u8]) {
1425        node.invalidate();
1426        let slash = path.iter().position(|&b| b == b'/');
1427        match slash {
1428            // Final component: drop the matching subtree node, if any.
1429            None => {
1430                if !path.is_empty() {
1431                    node.children.retain(|child| child.name != path);
1432                }
1433            }
1434            // Interior component: descend into the named subtree and recurse on the rest.
1435            Some(idx) => {
1436                let (component, rest) = (&path[..idx], &path[idx + 1..]);
1437                if let Some(child) = node
1438                    .children
1439                    .iter_mut()
1440                    .find(|child| child.name == component)
1441                {
1442                    Self::do_invalidate_cache_tree_path(child, rest);
1443                }
1444            }
1445        }
1446    }
1447
1448    /// Format the parsed cache-tree extension like Git's `test-tool dump-cache-tree`.
1449    #[must_use]
1450    pub fn format_cache_tree_dump(&self) -> String {
1451        let Some(root) = self.cache_tree.as_ref() else {
1452            return String::new();
1453        };
1454        let mut out = String::new();
1455        format_cache_tree_node(root, "", &mut out);
1456        out
1457    }
1458
1459    /// Produce `test-tool dump-cache-tree` output for this index.
1460    ///
1461    /// Mirrors Git's `cmd__dump_cache_tree`: the stored cache-tree (`it`) is
1462    /// compared against a freshly computed reference (`ref`) built from the
1463    /// current index entries with `WRITE_TREE_DRY_RUN`. Only nodes present in
1464    /// both trees are dumped; the `#(ref)` divergence lines that Git would emit
1465    /// are filtered out by the harness, so they are intentionally omitted here.
1466    ///
1467    /// If the index has no stored cache-tree, output is empty (Git dumps nothing
1468    /// because `it` is NULL).
1469    ///
1470    /// # Errors
1471    ///
1472    /// Returns an error if the reference cache-tree cannot be built (for example,
1473    /// if a tree object cannot be written to the object database).
1474    pub fn dump_cache_tree(&self, odb: &Odb) -> Result<String> {
1475        let Some(it_root) = self.cache_tree.as_ref() else {
1476            return Ok(String::new());
1477        };
1478        let ref_root = crate::write_tree::build_cache_tree_from_index(odb, self)?;
1479        let mut out = String::new();
1480        dump_cache_tree_pair(it_root, &ref_root, "", &mut out);
1481        Ok(out)
1482    }
1483
1484    /// Sort entries in Git's canonical order: by path, then by stage.
1485    pub fn sort(&mut self) {
1486        self.entries
1487            .sort_by(|a, b| a.path.cmp(&b.path).then_with(|| a.stage().cmp(&b.stage())));
1488    }
1489
1490    /// Collapse duplicate `(path, stage)` entries, keeping the **last** one in current order.
1491    ///
1492    /// A valid Git index never holds two entries with the same path and stage; `add_index_entry`
1493    /// replaces an existing same-name entry in place. When grit builds an index by flattening a
1494    /// tree that has duplicate path entries (see `t4058-diff-duplicates`), the naive flatten yields
1495    /// several identical-path entries. This restores Git's invariant by keeping the last entry for
1496    /// each `(path, stage)` — matching `add_index_entry`'s replace-on-collision semantics where the
1497    /// final tree entry for a path wins.
1498    pub fn dedup_paths_keep_last(&mut self) {
1499        let mut seen: std::collections::HashSet<(Vec<u8>, u8)> = std::collections::HashSet::new();
1500        let mut kept: Vec<IndexEntry> = Vec::with_capacity(self.entries.len());
1501        // Walk in reverse so the *last* occurrence of each (path, stage) is the one retained.
1502        for entry in self.entries.iter().rev() {
1503            if seen.insert((entry.path.clone(), entry.stage())) {
1504                kept.push(entry.clone());
1505            }
1506        }
1507        kept.reverse();
1508        self.entries = kept;
1509    }
1510
1511    /// OID of the shared index when this index uses split-index mode (`link` extension).
1512    #[must_use]
1513    pub fn split_index_base_oid(&self) -> Option<ObjectId> {
1514        self.split_link.as_ref().map(|l| l.base_oid)
1515    }
1516
1517    /// Find an entry by path and stage (0 for normal entries).
1518    #[must_use]
1519    pub fn get(&self, path: &[u8], stage: u8) -> Option<&IndexEntry> {
1520        self.entries
1521            .iter()
1522            .find(|e| e.path == path && e.stage() == stage)
1523    }
1524
1525    /// Find a mutable entry by path and stage.
1526    pub fn get_mut(&mut self, path: &[u8], stage: u8) -> Option<&mut IndexEntry> {
1527        self.entries
1528            .iter_mut()
1529            .find(|e| e.path == path && e.stage() == stage)
1530    }
1531
1532    /// Merge tree contents from `treeish` into this index as virtual stage-1 entries, matching
1533    /// Git's `overlay_tree_on_index` used by `git ls-files --with-tree`.
1534    ///
1535    /// Existing unmerged entries (stages 1–3) are shifted to stage 3 so stage 1 is free for the
1536    /// overlay. Stage-1 paths that already exist at stage 0 are marked so `ls-files` can skip
1537    /// them (Git's `CE_UPDATE` on the stage-1 entry).
1538    ///
1539    /// # Parameters
1540    ///
1541    /// - `repo` — repository whose object database is used to read the tree.
1542    /// - `treeish` — revision or tree OID string (`HEAD`, `HEAD~1`, full SHA, etc.).
1543    /// - `prefix` — optional path prefix (bytes, no trailing slash except empty); only paths under
1544    ///   this prefix are considered from the tree. Pass empty slice for the full tree.
1545    ///
1546    /// # Errors
1547    ///
1548    /// Returns [`Error`] if `treeish` cannot be resolved, the tree cannot be read, or an object is
1549    /// missing from the ODB.
1550    pub fn overlay_tree_on_index(
1551        &mut self,
1552        repo: &Repository,
1553        treeish: &str,
1554        prefix: &[u8],
1555    ) -> Result<()> {
1556        let oid = rev_parse::resolve_revision(repo, treeish)?;
1557        let tree_oid = peel_to_tree_oid(repo, oid)?;
1558        for e in self.entries.iter_mut() {
1559            if e.stage() != 0 {
1560                e.set_stage(3);
1561            }
1562        }
1563        self.sort();
1564        let has_stage1 = self.entries.iter().any(|e| e.stage() == 1);
1565        let mut appended: Vec<IndexEntry> = Vec::new();
1566        read_tree_into_overlay(repo, &tree_oid, prefix, &[], has_stage1, &mut appended)?;
1567        for e in appended {
1568            self.add_or_replace(e);
1569        }
1570        if !has_stage1 {
1571            self.sort();
1572        }
1573        let mut last_stage0: Option<&[u8]> = None;
1574        for e in &mut self.entries {
1575            match e.stage() {
1576                0 => {
1577                    last_stage0 = Some(e.path.as_slice());
1578                }
1579                1 if last_stage0.is_some_and(|p| p == e.path.as_slice()) => {
1580                    e.set_overlay_tree_skip_output(true);
1581                }
1582                _ => {}
1583            }
1584        }
1585        Ok(())
1586    }
1587}
1588
1589fn peel_to_tree_oid(repo: &Repository, oid: ObjectId) -> Result<ObjectId> {
1590    let obj = repo.odb.read(&oid)?;
1591    match obj.kind {
1592        ObjectKind::Tree => Ok(oid),
1593        ObjectKind::Commit => {
1594            let commit = crate::objects::parse_commit(&obj.data)?;
1595            Ok(commit.tree)
1596        }
1597        ObjectKind::Tag => {
1598            let tag = crate::objects::parse_tag(&obj.data)?;
1599            peel_to_tree_oid(repo, tag.object)
1600        }
1601        _ => Err(Error::ObjectNotFound(format!(
1602            "cannot peel {oid} to tree for --with-tree"
1603        ))),
1604    }
1605}
1606
1607fn read_tree_into_overlay(
1608    repo: &Repository,
1609    tree_oid: &ObjectId,
1610    prefix: &[u8],
1611    rel_base: &[u8],
1612    use_replace_path: bool,
1613    out: &mut Vec<IndexEntry>,
1614) -> Result<()> {
1615    let obj = repo.odb.read(tree_oid)?;
1616    if obj.kind != ObjectKind::Tree {
1617        return Err(Error::ObjectNotFound(format!(
1618            "object {tree_oid} is not a tree"
1619        )));
1620    }
1621    let entries = parse_tree(&obj.data)?;
1622    for TreeEntry { mode, name, oid } in entries {
1623        if mode == MODE_TREE {
1624            let mut path = rel_base.to_vec();
1625            if !path.is_empty() {
1626                path.push(b'/');
1627            }
1628            path.extend_from_slice(&name);
1629            if !prefix_under_or_equal(prefix, &path) {
1630                continue;
1631            }
1632            read_tree_into_overlay(repo, &oid, prefix, &path, use_replace_path, out)?;
1633            continue;
1634        }
1635        if mode == MODE_GITLINK {
1636            continue;
1637        }
1638        let mut path = rel_base.to_vec();
1639        if !path.is_empty() {
1640            path.push(b'/');
1641        }
1642        path.extend_from_slice(&name);
1643        if !prefix_under_or_equal(prefix, &path) {
1644            continue;
1645        }
1646        let entry = synthetic_stage1_index_entry(mode, &path, oid);
1647        if use_replace_path {
1648            if let Some(pos) = out.iter().position(|e| e.path == path && e.stage() == 1) {
1649                out[pos] = entry;
1650            } else {
1651                out.push(entry);
1652            }
1653        } else {
1654            out.push(entry);
1655        }
1656    }
1657    Ok(())
1658}
1659
1660fn prefix_under_or_equal(prefix: &[u8], path: &[u8]) -> bool {
1661    if prefix.is_empty() {
1662        return true;
1663    }
1664    if path == prefix {
1665        return true;
1666    }
1667    path.len() > prefix.len() && path.starts_with(prefix) && path[prefix.len()] == b'/'
1668}
1669
1670fn synthetic_stage1_index_entry(mode: u32, path: &[u8], oid: ObjectId) -> IndexEntry {
1671    let path_len = path.len().min(0xFFF) as u16;
1672    let flags = (1u16 << 12) | path_len;
1673    IndexEntry {
1674        ctime_sec: 0,
1675        ctime_nsec: 0,
1676        mtime_sec: 0,
1677        mtime_nsec: 0,
1678        dev: 0,
1679        ino: 0,
1680        mode,
1681        uid: 0,
1682        gid: 0,
1683        size: 0,
1684        oid,
1685        flags,
1686        flags_extended: None,
1687        path: path.to_vec(),
1688        base_index_pos: 0,
1689    }
1690}
1691
1692fn config_truthy(raw: Option<&str>) -> bool {
1693    let Some(val) = raw else {
1694        return false;
1695    };
1696    let lowered = val.trim().to_lowercase();
1697    matches!(lowered.as_str(), "true" | "yes" | "1" | "on")
1698}
1699
1700/// Whether to write 20 zero bytes instead of the SHA-1 of the index body.
1701///
1702/// Mirrors Git `prepare_repo_settings`: `feature.manyFiles` enables skip-hash unless
1703/// `index.skipHash` / `index.skiphash` is explicitly false; otherwise honor true `index.skipHash`.
1704pub(crate) fn index_skip_hash_for_write(config: Option<&ConfigSet>) -> bool {
1705    let Some(config) = config else {
1706        return false;
1707    };
1708    let many_files = config
1709        .get_bool("feature.manyFiles")
1710        .and_then(|r| r.ok())
1711        .unwrap_or(false);
1712    if many_files {
1713        if let Some(Ok(false)) = config.get_bool("index.skipHash") {
1714            return false;
1715        }
1716        if let Some(Ok(false)) = config.get_bool("index.skiphash") {
1717            return false;
1718        }
1719        return true;
1720    }
1721    for key in ["index.skipHash", "index.skiphash"] {
1722        if let Some(Ok(true)) = config.get_bool(key) {
1723            return true;
1724        }
1725    }
1726    false
1727}
1728
1729fn trim_trailing_slash_bytes(path: &[u8]) -> &[u8] {
1730    path.strip_suffix(b"/").unwrap_or(path)
1731}
1732
1733fn path_under_prefix(path: &[u8], prefix: &[u8]) -> bool {
1734    if path == prefix {
1735        return true;
1736    }
1737    if prefix.is_empty() {
1738        return true;
1739    }
1740    path.len() > prefix.len() && path.starts_with(prefix) && path[prefix.len()] == b'/'
1741}
1742
1743fn directory_in_cone(dir_path: &str, patterns: &[String], cone_mode: bool) -> bool {
1744    // Match Git `path_in_cone_mode_sparse_checkout`: a *directory* is in the cone when its
1745    // contents are recursively included. `path_matches_sparse_patterns` distinguishes a
1746    // directory from a file by a trailing slash (expanded-cone matching uses dtype the way
1747    // Git does), so pass the directory with a trailing slash. Collapse prefixes arrive
1748    // without one (e.g. `before`, `folder1`), so append it to avoid a top-level directory
1749    // being mistaken for an always-in-cone top-level file. The root (empty) directory is
1750    // always in the cone (`/*` + `!/*/` excludes every top-level directory, so e.g. `deep`
1751    // collapses to a single placeholder instead of leaving `deep/deeper1/` etc.).
1752    let dir = dir_path.trim_end_matches('/');
1753    if dir.is_empty() {
1754        return true;
1755    }
1756    let with_slash = format!("{dir}/");
1757    crate::sparse_checkout::path_matches_sparse_patterns(&with_slash, patterns, cone_mode)
1758}
1759
1760fn collect_directory_prefixes(path: &[u8], out: &mut BTreeSet<Vec<u8>>) {
1761    for (i, &b) in path.iter().enumerate() {
1762        if b == b'/' {
1763            out.insert(path[..i].to_vec());
1764        }
1765    }
1766}
1767
1768fn tree_oid_for_prefix(odb: &Odb, root_tree: &ObjectId, prefix: &[u8]) -> Result<Option<ObjectId>> {
1769    if prefix.is_empty() {
1770        return Ok(Some(*root_tree));
1771    }
1772    let pref_str = String::from_utf8_lossy(prefix);
1773    let components: Vec<&str> = pref_str.split('/').filter(|c| !c.is_empty()).collect();
1774    let mut current = *root_tree;
1775    for comp in components {
1776        let obj = odb.read(&current)?;
1777        if obj.kind != ObjectKind::Tree {
1778            return Ok(None);
1779        }
1780        let entries = parse_tree(&obj.data)?;
1781        let mut next = None;
1782        for e in entries {
1783            if e.name == comp.as_bytes() {
1784                if e.mode == MODE_TREE {
1785                    next = Some(e.oid);
1786                }
1787                break;
1788            }
1789        }
1790        current = match next {
1791            Some(o) => o,
1792            None => return Ok(None),
1793        };
1794    }
1795    Ok(Some(current))
1796}
1797
1798/// Build the list of blob index entries under `prefix` that match `HEAD` at `tree_oid`,
1799/// treating existing sparse-directory placeholders in `entries` as opaque subtrees (like Git).
1800fn collect_sparse_aware_expected_blobs(
1801    odb: &Odb,
1802    tree_oid: &ObjectId,
1803    prefix: &[u8],
1804    patterns: &[String],
1805    cone_mode: bool,
1806    entries: &[IndexEntry],
1807) -> Result<Vec<IndexEntry>> {
1808    let mut out = Vec::new();
1809    walk_sparse_aware(
1810        odb, tree_oid, prefix, patterns, cone_mode, entries, &mut out,
1811    )?;
1812    Ok(out)
1813}
1814
1815fn walk_sparse_aware(
1816    odb: &Odb,
1817    tree_oid: &ObjectId,
1818    prefix: &[u8],
1819    patterns: &[String],
1820    cone_mode: bool,
1821    entries: &[IndexEntry],
1822    out: &mut Vec<IndexEntry>,
1823) -> Result<()> {
1824    let obj = odb.read(tree_oid)?;
1825    if obj.kind != ObjectKind::Tree {
1826        return Err(Error::IndexError(format!("expected tree at {}", tree_oid)));
1827    }
1828    let tree_entries = parse_tree(&obj.data)?;
1829    for te in tree_entries {
1830        let path = if prefix.is_empty() {
1831            te.name.clone()
1832        } else {
1833            let mut p = prefix.to_vec();
1834            p.push(b'/');
1835            p.extend_from_slice(&te.name);
1836            p
1837        };
1838        if te.mode == MODE_TREE {
1839            let path_slash = {
1840                let mut p = path.clone();
1841                p.push(b'/');
1842                p
1843            };
1844            if entries.iter().any(|e| {
1845                e.stage() == 0
1846                    && e.is_sparse_directory_placeholder()
1847                    && e.path == path_slash
1848                    && e.oid == te.oid
1849            }) {
1850                continue;
1851            }
1852            walk_sparse_aware(odb, &te.oid, &path, patterns, cone_mode, entries, out)?;
1853        } else {
1854            let path_len = path.len().min(0xFFF) as u16;
1855            let path_str = String::from_utf8_lossy(&path);
1856            if crate::sparse_checkout::path_matches_sparse_patterns(&path_str, patterns, cone_mode)
1857            {
1858                continue;
1859            }
1860            let mut e = IndexEntry {
1861                ctime_sec: 0,
1862                ctime_nsec: 0,
1863                mtime_sec: 0,
1864                mtime_nsec: 0,
1865                dev: 0,
1866                ino: 0,
1867                mode: te.mode,
1868                uid: 0,
1869                gid: 0,
1870                size: 0,
1871                oid: te.oid,
1872                flags: path_len,
1873                flags_extended: Some(0),
1874                path,
1875                base_index_pos: 0,
1876            };
1877            e.set_skip_worktree(true);
1878            out.push(e);
1879        }
1880    }
1881    Ok(())
1882}
1883
1884fn flatten_tree_blobs(odb: &Odb, tree_oid: &ObjectId, prefix: &[u8]) -> Result<Vec<IndexEntry>> {
1885    let obj = odb.read(tree_oid)?;
1886    if obj.kind != ObjectKind::Tree {
1887        return Err(Error::IndexError(format!("expected tree at {}", tree_oid)));
1888    }
1889    let entries = parse_tree(&obj.data)?;
1890    let mut out = Vec::new();
1891    for te in entries {
1892        let path = if prefix.is_empty() {
1893            te.name.clone()
1894        } else {
1895            let mut p = prefix.to_vec();
1896            p.push(b'/');
1897            p.extend_from_slice(&te.name);
1898            p
1899        };
1900        if te.mode == MODE_TREE {
1901            let sub = flatten_tree_blobs(odb, &te.oid, &path)?;
1902            out.extend(sub);
1903        } else {
1904            let path_len = path.len().min(0xFFF) as u16;
1905            let mut e = IndexEntry {
1906                ctime_sec: 0,
1907                ctime_nsec: 0,
1908                mtime_sec: 0,
1909                mtime_nsec: 0,
1910                dev: 0,
1911                ino: 0,
1912                mode: te.mode,
1913                uid: 0,
1914                gid: 0,
1915                size: 0,
1916                oid: te.oid,
1917                flags: path_len,
1918                flags_extended: Some(0),
1919                path,
1920                base_index_pos: 0,
1921            };
1922            e.set_skip_worktree(true);
1923            out.push(e);
1924        }
1925    }
1926    Ok(out)
1927}
1928
1929fn lockfile_pid_enabled(index_path: &Path) -> bool {
1930    let git_dir = match index_path.parent() {
1931        Some(dir) => dir,
1932        None => return false,
1933    };
1934
1935    ConfigSet::load(Some(git_dir), true)
1936        .ok()
1937        .and_then(|cfg| cfg.get_bool("core.lockfilepid"))
1938        .and_then(|res| res.ok())
1939        .unwrap_or(false)
1940}
1941
1942fn pid_path_for_lock(lock_path: &Path) -> std::path::PathBuf {
1943    let file_name = lock_path
1944        .file_name()
1945        .map(|s| s.to_string_lossy().to_string())
1946        .unwrap_or_else(|| "index.lock".to_owned());
1947    let pid_name = if let Some(base) = file_name.strip_suffix(".lock") {
1948        format!("{base}~pid.lock")
1949    } else {
1950        format!("{file_name}~pid.lock")
1951    };
1952    lock_path.with_file_name(pid_name)
1953}
1954
1955fn write_lock_pid_file(pid_path: &Path) -> io::Result<()> {
1956    use std::io::Write as _;
1957    let mut file = fs::OpenOptions::new()
1958        .write(true)
1959        .create(true)
1960        .truncate(true)
1961        .open(pid_path)?;
1962    writeln!(file, "pid {}", std::process::id())?;
1963    Ok(())
1964}
1965
1966/// Detail lines Git prints when the index lock file already exists (used by stash and similar).
1967pub fn format_index_lock_blocked_detail(index_path: &Path) -> String {
1968    let lock_path = index_path.with_extension("lock");
1969    let pid_path = pid_path_for_lock(&lock_path);
1970    let err = io::Error::new(io::ErrorKind::AlreadyExists, "File exists");
1971    build_lock_exists_message(&lock_path, &pid_path, &err)
1972}
1973
1974fn build_lock_exists_message(lock_path: &Path, pid_path: &Path, err: &io::Error) -> String {
1975    let mut msg = format!("Unable to create '{}': {}.\n\n", lock_path.display(), err);
1976
1977    if let Some(pid) = read_lock_pid(pid_path) {
1978        if is_process_running(pid) {
1979            msg.push_str(&format!(
1980                "Lock is held by process {pid}; if no git process is running, the lock file may be stale (PIDs can be reused)"
1981            ));
1982        } else {
1983            msg.push_str(&format!(
1984                "Lock was held by process {pid}, which is no longer running; the lock file appears to be stale"
1985            ));
1986        }
1987    } else {
1988        msg.push_str(
1989            "Another git process seems to be running in this repository, or the lock file may be stale",
1990        );
1991    }
1992
1993    msg
1994}
1995
1996fn read_lock_pid(pid_path: &Path) -> Option<u64> {
1997    let raw = fs::read_to_string(pid_path).ok()?;
1998    let trimmed = raw.trim();
1999    if let Some(v) = trimmed.strip_prefix("pid ") {
2000        return v.trim().parse::<u64>().ok();
2001    }
2002    trimmed.parse::<u64>().ok()
2003}
2004
2005fn is_process_running(pid: u64) -> bool {
2006    #[cfg(target_os = "linux")]
2007    {
2008        let proc_path = std::path::PathBuf::from(format!("/proc/{pid}"));
2009        proc_path.exists()
2010    }
2011
2012    #[cfg(not(target_os = "linux"))]
2013    {
2014        let status = std::process::Command::new("kill")
2015            .arg("-0")
2016            .arg(pid.to_string())
2017            .status();
2018        status.map(|s| s.success()).unwrap_or(false)
2019    }
2020}
2021
2022/// Hash an index body with the given algorithm, returning the raw checksum.
2023fn hash_index_body(algo: HashAlgo, body: &[u8]) -> Vec<u8> {
2024    match algo {
2025        HashAlgo::Sha1 => {
2026            let mut hasher = Sha1::new();
2027            hasher.update(body);
2028            hasher.finalize().to_vec()
2029        }
2030        HashAlgo::Sha256 => {
2031            let mut hasher = Sha256::new();
2032            hasher.update(body);
2033            hasher.finalize().to_vec()
2034        }
2035    }
2036}
2037
2038/// Best-effort detection of an index file's hash algorithm from its trailing
2039/// checksum. Used by [`Index::parse`] when no repository algorithm is known.
2040/// Callers with a repository should use [`Index::parse_with_algo`] instead,
2041/// which is authoritative even when the checksum is skipped (all zeros).
2042fn detect_index_hash_algo(data: &[u8]) -> HashAlgo {
2043    // A SHA-256 index ends with a 32-byte checksum over the body. If that
2044    // verifies, the file is SHA-256; otherwise assume SHA-1.
2045    let sha256_len = HashAlgo::Sha256.len();
2046    if data.len() > sha256_len + 12 {
2047        let (body, checksum) = data.split_at(data.len() - sha256_len);
2048        if checksum.iter().any(|&b| b != 0)
2049            && hash_index_body(HashAlgo::Sha256, body) == checksum
2050        {
2051            return HashAlgo::Sha256;
2052        }
2053    }
2054    HashAlgo::Sha1
2055}
2056
2057/// Parse a single index entry from `data`, returning `(entry, bytes_consumed)`.
2058///
2059/// `hash_len` is the raw object-id width (20 for SHA-1, 32 for SHA-256).
2060fn parse_entry(
2061    data: &[u8],
2062    version: u32,
2063    prev_path: &[u8],
2064    hash_len: usize,
2065) -> Result<(IndexEntry, usize)> {
2066    // 40 bytes of stat fields + the OID + 2 flag bytes.
2067    if data.len() < 40 + hash_len + 2 {
2068        return Err(Error::IndexError("entry too short".to_owned()));
2069    }
2070
2071    let mut pos = 0;
2072
2073    macro_rules! read_u32 {
2074        () => {{
2075            let v = u32::from_be_bytes(
2076                data[pos..pos + 4]
2077                    .try_into()
2078                    .map_err(|_| Error::IndexError("truncated u32".to_owned()))?,
2079            );
2080            pos += 4;
2081            v
2082        }};
2083    }
2084
2085    let ctime_sec = read_u32!();
2086    let ctime_nsec = read_u32!();
2087    let mtime_sec = read_u32!();
2088    let mtime_nsec = read_u32!();
2089    let dev = read_u32!();
2090    let ino = read_u32!();
2091    let mode = read_u32!();
2092    let uid = read_u32!();
2093    let gid = read_u32!();
2094    let size = read_u32!();
2095
2096    let oid = ObjectId::from_bytes(&data[pos..pos + hash_len])?;
2097    pos += hash_len;
2098
2099    let flags = u16::from_be_bytes(
2100        data[pos..pos + 2]
2101            .try_into()
2102            .map_err(|_| Error::IndexError("truncated flags".to_owned()))?,
2103    );
2104    pos += 2;
2105
2106    let flags_extended = if version >= 3 && flags & 0x4000 != 0 {
2107        let fe = u16::from_be_bytes(
2108            data[pos..pos + 2]
2109                .try_into()
2110                .map_err(|_| Error::IndexError("truncated extended flags".to_owned()))?,
2111        );
2112        pos += 2;
2113        Some(fe)
2114    } else {
2115        None
2116    };
2117
2118    let path;
2119    if version == 4 {
2120        // V4: prefix-compressed path
2121        let (strip_len, varint_bytes) = read_varint(&data[pos..]);
2122        pos += varint_bytes;
2123        let nul = data[pos..]
2124            .iter()
2125            .position(|&b| b == 0)
2126            .ok_or_else(|| Error::IndexError("v4 entry path missing NUL".to_owned()))?;
2127        let suffix = &data[pos..pos + nul];
2128        pos += nul + 1;
2129        let keep = prev_path.len().saturating_sub(strip_len);
2130        let mut full_path = Vec::with_capacity(keep + suffix.len());
2131        full_path.extend_from_slice(&prev_path[..keep]);
2132        full_path.extend_from_slice(suffix);
2133        path = full_path;
2134    } else {
2135        // V2/V3: NUL-terminated full path + padding
2136        let nul = data[pos..]
2137            .iter()
2138            .position(|&b| b == 0)
2139            .ok_or_else(|| Error::IndexError("entry path missing NUL terminator".to_owned()))?;
2140        path = data[pos..pos + nul].to_vec();
2141        pos += nul + 1;
2142        let entry_start = 0usize;
2143        let entry_len = pos - entry_start;
2144        let padded = (entry_len + 7) & !7;
2145        let padding = padded.saturating_sub(entry_len);
2146        pos += padding;
2147    }
2148
2149    Ok((
2150        IndexEntry {
2151            ctime_sec,
2152            ctime_nsec,
2153            mtime_sec,
2154            mtime_nsec,
2155            dev,
2156            ino,
2157            mode,
2158            uid,
2159            gid,
2160            size,
2161            oid,
2162            flags,
2163            flags_extended,
2164            path,
2165            base_index_pos: 0,
2166        },
2167        pos,
2168    ))
2169}
2170
2171/// Serialise a single index entry into `out`.
2172/// Read a variable-length integer (git's index v4 varint encoding).
2173/// Returns (value, bytes_consumed).
2174fn write_varint(out: &mut Vec<u8>, mut value: usize) {
2175    loop {
2176        let mut b = (value & 0x7F) as u8;
2177        value >>= 7;
2178        if value != 0 {
2179            b |= 0x80;
2180        }
2181        out.push(b);
2182        if value == 0 {
2183            break;
2184        }
2185    }
2186}
2187
2188fn read_varint(data: &[u8]) -> (usize, usize) {
2189    let mut value: usize = 0;
2190    let mut shift = 0usize;
2191    let mut pos = 0;
2192    loop {
2193        if pos >= data.len() {
2194            break;
2195        }
2196        let byte = data[pos] as usize;
2197        pos += 1;
2198        value |= (byte & 0x7F) << shift;
2199        if byte & 0x80 == 0 {
2200            break;
2201        }
2202        shift += 7;
2203        // Prevent infinite loops on malformed data
2204        if shift > 28 {
2205            break;
2206        }
2207    }
2208    (value, pos)
2209}
2210
2211fn serialize_entry_v4(entry: &IndexEntry, previous_path: &mut Vec<u8>, out: &mut Vec<u8>) {
2212    let write_u32 = |out: &mut Vec<u8>, v: u32| out.extend_from_slice(&v.to_be_bytes());
2213
2214    write_u32(out, entry.ctime_sec);
2215    write_u32(out, entry.ctime_nsec);
2216    write_u32(out, entry.mtime_sec);
2217    write_u32(out, entry.mtime_nsec);
2218    write_u32(out, entry.dev);
2219    write_u32(out, entry.ino);
2220    write_u32(out, entry.mode);
2221    write_u32(out, entry.uid);
2222    write_u32(out, entry.gid);
2223    write_u32(out, entry.size);
2224    out.extend_from_slice(entry.oid.as_bytes());
2225
2226    let mut flags = entry.flags;
2227    let disk_ext = entry
2228        .flags_extended
2229        .map(IndexEntry::disk_flags_extended)
2230        .filter(|fe| *fe != 0);
2231    if disk_ext.is_some() {
2232        flags |= 0x4000;
2233    } else {
2234        flags &= !0x4000;
2235    }
2236    let path_len = entry.path.len().min(0xFFF) as u16;
2237    flags = (flags & 0xF000) | path_len;
2238    out.extend_from_slice(&flags.to_be_bytes());
2239
2240    if let Some(fe) = disk_ext {
2241        out.extend_from_slice(&fe.to_be_bytes());
2242    }
2243
2244    let common = previous_path
2245        .iter()
2246        .zip(entry.path.iter())
2247        .take_while(|(a, b)| a == b)
2248        .count();
2249    let to_remove = previous_path.len().saturating_sub(common);
2250    write_varint(out, to_remove);
2251    out.extend_from_slice(&entry.path[common..]);
2252    out.push(0);
2253
2254    previous_path.clear();
2255    previous_path.extend_from_slice(&entry.path);
2256}
2257
2258fn serialize_entry(entry: &IndexEntry, version: u32, out: &mut Vec<u8>) {
2259    let start = out.len();
2260
2261    let write_u32 = |out: &mut Vec<u8>, v: u32| out.extend_from_slice(&v.to_be_bytes());
2262
2263    write_u32(out, entry.ctime_sec);
2264    write_u32(out, entry.ctime_nsec);
2265    write_u32(out, entry.mtime_sec);
2266    write_u32(out, entry.mtime_nsec);
2267    write_u32(out, entry.dev);
2268    write_u32(out, entry.ino);
2269    write_u32(out, entry.mode);
2270    write_u32(out, entry.uid);
2271    write_u32(out, entry.gid);
2272    write_u32(out, entry.size);
2273    out.extend_from_slice(entry.oid.as_bytes());
2274
2275    // Set or clear the extended-flags bit in flags
2276    let mut flags = entry.flags;
2277    let disk_ext = entry
2278        .flags_extended
2279        .map(IndexEntry::disk_flags_extended)
2280        .filter(|fe| *fe != 0);
2281    if version >= 3 && disk_ext.is_some() {
2282        flags |= 0x4000;
2283    } else {
2284        flags &= !0x4000;
2285    }
2286    // Overwrite path length bits (bottom 12)
2287    let path_len = entry.path.len().min(0xFFF) as u16;
2288    flags = (flags & 0xF000) | path_len;
2289    out.extend_from_slice(&flags.to_be_bytes());
2290
2291    if version >= 3 {
2292        if let Some(fe) = disk_ext {
2293            out.extend_from_slice(&fe.to_be_bytes());
2294        }
2295    }
2296
2297    out.extend_from_slice(&entry.path);
2298    out.push(0);
2299
2300    // Pad to 8-byte boundary
2301    let entry_len = out.len() - start;
2302    let padded = (entry_len + 7) & !7;
2303    let padding = padded - entry_len;
2304    for _ in 0..padding {
2305        out.push(0);
2306    }
2307}
2308
2309/// Build an [`IndexEntry`] by stat-ing a file on disk.
2310///
2311/// # Parameters
2312///
2313/// - `path` — absolute path to the file.
2314/// - `rel_path` — path relative to the repo root (stored in the index).
2315/// - `oid` — the object ID of the file's blob.
2316/// - `mode` — file mode (use [`MODE_REGULAR`], [`MODE_EXECUTABLE`], etc.).
2317///
2318/// # Errors
2319///
2320/// Returns [`Error::Io`] if `stat` fails.
2321pub fn entry_from_stat(
2322    path: &Path,
2323    rel_path: &[u8],
2324    oid: ObjectId,
2325    mode: u32,
2326) -> Result<IndexEntry> {
2327    let meta = fs::symlink_metadata(path)?;
2328    Ok(entry_from_metadata(&meta, rel_path, oid, mode))
2329}
2330
2331/// Build an [`IndexEntry`] from already-obtained metadata.
2332///
2333/// This avoids a redundant `stat()` call when the caller already has
2334/// filesystem metadata (e.g. from `symlink_metadata`).
2335#[must_use]
2336pub fn entry_from_metadata(
2337    meta: &fs::Metadata,
2338    rel_path: &[u8],
2339    oid: ObjectId,
2340    mode: u32,
2341) -> IndexEntry {
2342    #[cfg(unix)]
2343    {
2344        use std::os::unix::fs::MetadataExt;
2345        IndexEntry {
2346            ctime_sec: meta.ctime() as u32,
2347            ctime_nsec: meta.ctime_nsec() as u32,
2348            mtime_sec: meta.mtime() as u32,
2349            mtime_nsec: meta.mtime_nsec() as u32,
2350            dev: meta.dev() as u32,
2351            ino: meta.ino() as u32,
2352            mode,
2353            uid: meta.uid(),
2354            gid: meta.gid(),
2355            size: meta.size() as u32,
2356            oid,
2357            flags: rel_path.len().min(0xFFF) as u16,
2358            flags_extended: None,
2359            path: rel_path.to_vec(),
2360            base_index_pos: 0,
2361        }
2362    }
2363    #[cfg(not(unix))]
2364    {
2365        use std::time::UNIX_EPOCH;
2366        let mtime = meta
2367            .modified()
2368            .ok()
2369            .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
2370            .unwrap_or_default();
2371        IndexEntry {
2372            ctime_sec: mtime.as_secs() as u32,
2373            ctime_nsec: mtime.subsec_nanos(),
2374            mtime_sec: mtime.as_secs() as u32,
2375            mtime_nsec: mtime.subsec_nanos(),
2376            dev: 0,
2377            ino: 0,
2378            mode,
2379            uid: 0,
2380            gid: 0,
2381            size: meta.len() as u32,
2382            oid,
2383            flags: rel_path.len().min(0xFFF) as u16,
2384            flags_extended: None,
2385            path: rel_path.to_vec(),
2386            base_index_pos: 0,
2387        }
2388    }
2389}
2390
2391/// Convert a `stat` mode to the Git index mode, normalised to one of the
2392/// known constants ([`MODE_REGULAR`], [`MODE_EXECUTABLE`], [`MODE_SYMLINK`]).
2393///
2394/// Only the `S_IFMT` and execute bits are inspected; all other permission bits
2395/// are discarded (Git stores only 644 or 755 for regular files).
2396///
2397/// # Parameters
2398///
2399/// - `raw_mode` — the raw `st_mode` value from `stat(2)`.
2400#[must_use]
2401pub fn normalize_mode(raw_mode: u32) -> u32 {
2402    const S_IFMT: u32 = 0o170000;
2403    const S_IFLNK: u32 = 0o120000;
2404    const S_IFREG: u32 = 0o100000;
2405
2406    let fmt = raw_mode & S_IFMT;
2407    if fmt == S_IFLNK {
2408        return MODE_SYMLINK;
2409    }
2410    if fmt == S_IFREG {
2411        // Executable if any execute bit is set
2412        if raw_mode & 0o111 != 0 {
2413            return MODE_EXECUTABLE;
2414        }
2415        return MODE_REGULAR;
2416    }
2417    // Fallback for everything else (devices, etc.) — treat as regular
2418    MODE_REGULAR
2419}
2420
2421#[cfg(test)]
2422mod tests {
2423    #![allow(clippy::expect_used, clippy::unwrap_used)]
2424
2425    use super::*;
2426    use tempfile::TempDir;
2427
2428    fn dummy_oid() -> ObjectId {
2429        ObjectId::from_bytes(&[0u8; 20]).unwrap()
2430    }
2431
2432    fn make_entry(path: &str) -> IndexEntry {
2433        IndexEntry {
2434            ctime_sec: 0,
2435            ctime_nsec: 0,
2436            mtime_sec: 0,
2437            mtime_nsec: 0,
2438            dev: 0,
2439            ino: 0,
2440            mode: MODE_REGULAR,
2441            uid: 0,
2442            gid: 0,
2443            size: 0,
2444            oid: dummy_oid(),
2445            flags: path.len().min(0xFFF) as u16,
2446            flags_extended: None,
2447            path: path.as_bytes().to_vec(),
2448            base_index_pos: 0,
2449        }
2450    }
2451
2452    #[test]
2453    fn round_trip_empty_index() {
2454        let dir = TempDir::new().unwrap();
2455        let path = dir.path().join("index");
2456
2457        let idx = Index::new();
2458        idx.write(&path).unwrap();
2459
2460        let loaded = Index::load(&path).unwrap();
2461        assert_eq!(loaded.entries.len(), 0);
2462    }
2463
2464    #[test]
2465    fn round_trip_with_entries() {
2466        let dir = TempDir::new().unwrap();
2467        let path = dir.path().join("index");
2468
2469        let mut idx = Index::new();
2470        idx.add_or_replace(make_entry("foo.txt"));
2471        idx.add_or_replace(make_entry("bar/baz.txt"));
2472        idx.write(&path).unwrap();
2473
2474        let loaded = Index::load(&path).unwrap();
2475        assert_eq!(loaded.entries.len(), 2);
2476        assert_eq!(loaded.entries[0].path, b"bar/baz.txt");
2477        assert_eq!(loaded.entries[1].path, b"foo.txt");
2478    }
2479
2480    #[test]
2481    fn remove_descendants_under_path_drops_nested_only() {
2482        let mut idx = Index::new();
2483        idx.add_or_replace(make_entry("d/e"));
2484        idx.add_or_replace(make_entry("d-other"));
2485        idx.add_or_replace(make_entry("prefix/d"));
2486        idx.remove_descendants_under_path("d");
2487        let paths: Vec<_> = idx.entries.iter().map(|e| e.path.as_slice()).collect();
2488        assert_eq!(paths, vec![b"d-other".as_slice(), b"prefix/d".as_slice()]);
2489    }
2490
2491    #[test]
2492    fn requested_v4_writes_v4_on_disk() {
2493        let dir = TempDir::new().unwrap();
2494        let path = dir.path().join("index");
2495
2496        let mut idx = Index {
2497            version: 4,
2498            ..Index::default()
2499        };
2500        idx.add_or_replace(make_entry("one"));
2501        idx.add_or_replace(make_entry("two/one"));
2502        idx.write(&path).unwrap();
2503
2504        let data = fs::read(&path).unwrap();
2505        assert_eq!(&data[4..8], &4u32.to_be_bytes());
2506
2507        let loaded = Index::load(&path).unwrap();
2508        assert_eq!(loaded.version, 4);
2509        assert_eq!(loaded.entries[0].path, b"one");
2510        assert_eq!(loaded.entries[1].path, b"two/one");
2511    }
2512}