Skip to main content

grit_lib/
index.rs

1//! Git index (staging area) reading and writing.
2//!
3//! The index file (`.git/index`) stores the current state of the staging area.
4//! It uses a binary format with a 12-byte header, fixed-size index entries,
5//! and optional extensions, followed by a trailing SHA-1 over the whole file.
6//!
7//! # Format version
8//!
9//! This implementation supports index versions 2 and 3. Requests for version 4
10//! currently fall back to a non-compressed index on write because path
11//! compression is not yet implemented.
12//!
13//! # References
14//!
15//! See `Documentation/technical/index-format.txt` in the Git source tree for
16//! the authoritative format specification.
17
18use std::collections::{BTreeMap, BTreeSet};
19use std::fs;
20use std::io::{self, Write};
21use std::path::Path;
22
23use sha1::{Digest, Sha1};
24
25use crate::config::ConfigSet;
26use crate::error::{Error, Result};
27use crate::objects::{parse_tree, ObjectId, ObjectKind, TreeEntry};
28use crate::odb::Odb;
29use crate::repo::Repository;
30use crate::resolve_undo::{self, write_resolve_undo_payload, ResolveUndoRecord};
31use crate::rev_parse;
32use crate::untracked_cache;
33
34/// File mode for a regular (non-executable) file.
35pub const MODE_REGULAR: u32 = 0o100644;
36/// File mode for an executable file.
37pub const MODE_EXECUTABLE: u32 = 0o100755;
38/// File mode for a symbolic link.
39pub const MODE_SYMLINK: u32 = 0o120000;
40/// File mode for a gitlink (submodule).
41pub const MODE_GITLINK: u32 = 0o160000;
42/// File mode for a directory (tree) entry — only used in tree objects, not index.
43pub const MODE_TREE: u32 = 0o040000;
44
45/// Git index extension signature `sdir` (sparse directory entries present).
46const INDEX_EXT_SPARSE_DIRECTORIES: u32 = u32::from_be_bytes(*b"sdir");
47/// Git index extension signature `UNTR` (untracked cache).
48const INDEX_EXT_UNTRACKED: u32 = u32::from_be_bytes(*b"UNTR");
49/// Git index extension signature `FSMN` (fsmonitor).
50const INDEX_EXT_FSMONITOR: u32 = u32::from_be_bytes(*b"FSMN");
51/// Git index extension signature `REUC` (resolve undo).
52const INDEX_EXT_RESOLVE_UNDO: u32 = u32::from_be_bytes(*b"REUC");
53/// Git index extension signature `link` (split index).
54const INDEX_EXT_LINK: u32 = u32::from_be_bytes(*b"link");
55
56/// A single entry in the Git index.
57#[derive(Debug, Clone, PartialEq, Eq)]
58pub struct IndexEntry {
59    /// Time the file metadata last changed (seconds since epoch).
60    pub ctime_sec: u32,
61    /// Nanosecond fraction of `ctime_sec`.
62    pub ctime_nsec: u32,
63    /// Time the file data last changed (seconds since epoch).
64    pub mtime_sec: u32,
65    /// Nanosecond fraction of `mtime_sec`.
66    pub mtime_nsec: u32,
67    /// Device number.
68    pub dev: u32,
69    /// Inode number.
70    pub ino: u32,
71    /// Unix file mode (`MODE_REGULAR`, `MODE_EXECUTABLE`, `MODE_SYMLINK`, …).
72    pub mode: u32,
73    /// Owner UID.
74    pub uid: u32,
75    /// Owner GID.
76    pub gid: u32,
77    /// File size in bytes (truncated to 32 bits).
78    pub size: u32,
79    /// SHA-1 of the blob object.
80    pub oid: ObjectId,
81    /// Entry flags (stage, assume-valid, extended, …).
82    pub flags: u16,
83    /// Extended flags (v3+ only).
84    pub flags_extended: Option<u16>,
85    /// Path relative to the repository root.  May contain `/` separators.
86    pub path: Vec<u8>,
87    /// Split index: position in shared base (1-based), or 0 if not from shared index.
88    pub base_index_pos: u32,
89}
90
91impl IndexEntry {
92    /// Merge stage (0 = normal, 1–3 = conflict stages).
93    #[must_use]
94    pub fn stage(&self) -> u8 {
95        ((self.flags >> 12) & 0x3) as u8
96    }
97
98    pub(crate) fn set_stage(&mut self, stage: u8) {
99        self.flags = (self.flags & 0x0FFF) | ((stage as u16 & 0x3) << 12);
100    }
101
102    /// Whether the assume-unchanged bit is set.
103    #[must_use]
104    pub fn assume_unchanged(&self) -> bool {
105        self.flags & 0x8000 != 0
106    }
107
108    /// Whether the skip-worktree bit is set (extended flags, v3+).
109    #[must_use]
110    pub fn skip_worktree(&self) -> bool {
111        self.flags_extended
112            .map(|f| f & 0x4000 != 0)
113            .unwrap_or(false)
114    }
115
116    /// Set the assume-unchanged bit.
117    pub fn set_assume_unchanged(&mut self, value: bool) {
118        if value {
119            self.flags |= 0x8000;
120        } else {
121            self.flags &= !0x8000;
122        }
123    }
124
125    /// Set the skip-worktree bit (promotes entry to v3).
126    pub fn set_skip_worktree(&mut self, value: bool) {
127        let fe = self.flags_extended.get_or_insert(0);
128        if value {
129            *fe |= 0x4000;
130        } else {
131            *fe &= !0x4000;
132            if *fe == 0 {
133                self.flags_extended = None;
134            }
135        }
136    }
137
138    /// Whether the intent-to-add bit is set (extended flags, v3+).
139    #[must_use]
140    pub fn intent_to_add(&self) -> bool {
141        self.flags_extended
142            .map(|f| f & 0x2000 != 0)
143            .unwrap_or(false)
144    }
145
146    /// Set the intent-to-add bit (promotes entry to v3).
147    pub fn set_intent_to_add(&mut self, value: bool) {
148        let fe = self.flags_extended.get_or_insert(0);
149        if value {
150            *fe |= 0x2000;
151        } else {
152            *fe &= !0x2000;
153            if *fe == 0 {
154                self.flags_extended = None;
155            }
156        }
157    }
158
159    /// Sparse-index placeholder: tree mode, stage 0, and `SKIP_WORKTREE` set.
160    #[must_use]
161    pub fn is_sparse_directory_placeholder(&self) -> bool {
162        self.mode == MODE_TREE && self.stage() == 0 && self.skip_worktree()
163    }
164
165    /// In-memory only: `ls-files --with-tree` hides stage-1 overlay rows that duplicate stage 0.
166    const FLAG_EXT_OVERLAY_TREE_SKIP: u16 = 0x8000;
167    /// In-memory and on-disk compatibility bit for fsmonitor validity (`git ls-files -f`).
168    const FLAG_EXT_FSMONITOR_VALID: u16 = 0x1000;
169    /// Extended flags Git persists in index v3 (`CE_EXTENDED_FLAGS` in `read-cache-ll.h`).
170    const FLAG_EXT_ON_DISK: u16 = 0x2000 | 0x4000;
171
172    /// Extended flag bits safe to write to a Git-compatible on-disk index (excludes fsmonitor).
173    fn disk_flags_extended(fe: u16) -> u16 {
174        fe & Self::FLAG_EXT_ON_DISK
175    }
176
177    #[must_use]
178    pub fn overlay_tree_skip_output(&self) -> bool {
179        self.flags_extended
180            .is_some_and(|fe| fe & Self::FLAG_EXT_OVERLAY_TREE_SKIP != 0)
181    }
182
183    fn set_overlay_tree_skip_output(&mut self, value: bool) {
184        let fe = self.flags_extended.get_or_insert(0);
185        if value {
186            *fe |= Self::FLAG_EXT_OVERLAY_TREE_SKIP;
187        } else {
188            *fe &= !Self::FLAG_EXT_OVERLAY_TREE_SKIP;
189            if *fe == 0 {
190                self.flags_extended = None;
191            }
192        }
193    }
194
195    /// Whether the fsmonitor-valid bit is set.
196    #[must_use]
197    pub fn fsmonitor_valid(&self) -> bool {
198        self.flags_extended
199            .is_some_and(|fe| fe & Self::FLAG_EXT_FSMONITOR_VALID != 0)
200    }
201
202    /// Set or clear the fsmonitor-valid bit.
203    pub fn set_fsmonitor_valid(&mut self, value: bool) {
204        let fe = self.flags_extended.get_or_insert(0);
205        if value {
206            *fe |= Self::FLAG_EXT_FSMONITOR_VALID;
207        } else {
208            *fe &= !Self::FLAG_EXT_FSMONITOR_VALID;
209            if *fe == 0 {
210                self.flags_extended = None;
211            }
212        }
213    }
214}
215
216/// The in-memory representation of the Git index file.
217#[derive(Debug, Clone, Default)]
218pub struct Index {
219    /// Index format version (2 or 3).
220    pub version: u32,
221    /// Index entries, sorted by (path, stage).
222    pub entries: Vec<IndexEntry>,
223    /// When true, the on-disk index includes the `sdir` extension (sparse index).
224    pub sparse_directories: bool,
225    /// Optional untracked-cache extension (`UNTR`), matching Git's `istate->untracked`.
226    pub untracked_cache: Option<untracked_cache::UntrackedCache>,
227    /// Optional fsmonitor token extension (`FSMN`).
228    pub fsmonitor_last_update: Option<String>,
229    /// Optional `REUC` resolve-undo extension (paths that were unmerged before a resolution).
230    pub resolve_undo: Option<BTreeMap<Vec<u8>, ResolveUndoRecord>>,
231    /// Split index `link` extension (bitmaps cleared after load merge).
232    pub(crate) split_link: Option<crate::split_index::SplitIndexLink>,
233    /// Root tree OID from a valid `TREE` index extension (`cache_tree`), when present.
234    pub cache_tree_root: Option<ObjectId>,
235    /// Parsed `TREE` index extension (`cache-tree`) preserving invalid and subtree nodes.
236    pub cache_tree: Option<CacheTreeNode>,
237}
238
239/// One node from Git's `TREE` index extension.
240#[derive(Debug, Clone, PartialEq, Eq)]
241pub struct CacheTreeNode {
242    /// Path component for this node. The root node stores an empty name.
243    pub name: Vec<u8>,
244    /// Number of index entries covered by this node, or `-1` when invalid.
245    pub entry_count: i32,
246    /// Tree object ID for valid nodes. Invalid nodes do not store an object ID.
247    pub oid: Option<ObjectId>,
248    /// Immediate child cache-tree nodes.
249    pub children: Vec<CacheTreeNode>,
250}
251
252impl CacheTreeNode {
253    /// Create a valid cache-tree node.
254    #[must_use]
255    pub fn valid(
256        name: Vec<u8>,
257        entry_count: i32,
258        oid: ObjectId,
259        children: Vec<CacheTreeNode>,
260    ) -> Self {
261        Self {
262            name,
263            entry_count,
264            oid: Some(oid),
265            children,
266        }
267    }
268
269    /// Mark this node as invalid while preserving its children.
270    pub fn invalidate(&mut self) {
271        self.entry_count = -1;
272        self.oid = None;
273    }
274
275    /// Returns whether this node has a valid cached tree object ID.
276    #[must_use]
277    pub fn is_valid(&self) -> bool {
278        self.entry_count >= 0 && self.oid.is_some()
279    }
280}
281
282/// Options for loading an index from disk.
283#[derive(Debug, Clone, Copy)]
284pub struct IndexLoadOptions {
285    /// If the index contains sparse directory placeholders, expand them to full file entries.
286    pub expand_sparse_directories: bool,
287}
288
289impl Default for IndexLoadOptions {
290    fn default() -> Self {
291        Self {
292            expand_sparse_directories: true,
293        }
294    }
295}
296
297/// Version used after an invalid `GIT_INDEX_VERSION` value (matches Git stderr: "Using version 3").
298const INDEX_ENV_INVALID_FALLBACK: u32 = 3;
299/// Version used after an invalid `index.version` config value (same message as env).
300const INDEX_CONFIG_INVALID_FALLBACK: u32 = 3;
301/// Minimum supported index version.
302const INDEX_FORMAT_LB: u32 = 2;
303/// Maximum supported index version (version 4 requests are accepted and
304/// downgraded on write).
305const INDEX_FORMAT_UB: u32 = 4;
306/// Index extension signature `TREE` (cache-tree).
307const INDEX_EXT_CACHE_TREE: u32 = 0x5452_4545;
308
309/// Best-effort read of Git's `TREE` index extension (`cache_tree_read`).
310fn parse_cache_tree(data: &[u8]) -> Option<CacheTreeNode> {
311    let (node, pos) = parse_cache_tree_node(data, 0)?;
312    if pos == data.len() {
313        Some(node)
314    } else {
315        None
316    }
317}
318
319fn parse_cache_tree_node(data: &[u8], mut pos: usize) -> Option<(CacheTreeNode, usize)> {
320    let name_end = data.get(pos..)?.iter().position(|&b| b == 0)? + pos;
321    let name = data[pos..name_end].to_vec();
322    pos = name_end + 1;
323
324    let (entry_count, consumed) = parse_signed_int_prefix(&data[pos..])?;
325    pos += consumed;
326    if data.get(pos) != Some(&b' ') {
327        return None;
328    }
329    pos += 1;
330    let (subtree_count, consumed) = parse_signed_int_prefix(&data[pos..])?;
331    if subtree_count < 0 {
332        return None;
333    }
334    pos += consumed;
335    if data.get(pos) != Some(&b'\n') {
336        return None;
337    }
338    pos += 1;
339
340    let oid = if entry_count >= 0 {
341        if data.len().saturating_sub(pos) < 20 {
342            return None;
343        }
344        let oid = ObjectId::from_bytes(&data[pos..pos + 20]).ok()?;
345        pos += 20;
346        Some(oid)
347    } else {
348        None
349    };
350
351    let mut children = Vec::with_capacity(subtree_count as usize);
352    for _ in 0..subtree_count {
353        let (child, next) = parse_cache_tree_node(data, pos)?;
354        children.push(child);
355        pos = next;
356    }
357
358    Some((
359        CacheTreeNode {
360            name,
361            entry_count,
362            oid,
363            children,
364        },
365        pos,
366    ))
367}
368
369fn parse_signed_int_prefix(data: &[u8]) -> Option<(i32, usize)> {
370    let mut j = 0usize;
371    while j < data.len() && data[j] == b' ' {
372        j += 1;
373    }
374    let start = j;
375    if j < data.len() && data[j] == b'-' {
376        j += 1;
377    }
378    let digit_start = j;
379    while j < data.len() && data[j].is_ascii_digit() {
380        j += 1;
381    }
382    if j == digit_start {
383        return None;
384    }
385    let s = std::str::from_utf8(&data[start..j]).ok()?;
386    let v: i32 = s.parse().ok()?;
387    Some((v, j))
388}
389
390fn serialize_cache_tree_node(node: &CacheTreeNode, out: &mut Vec<u8>) {
391    out.extend_from_slice(&node.name);
392    out.push(0);
393    out.extend_from_slice(node.entry_count.to_string().as_bytes());
394    out.push(b' ');
395    out.extend_from_slice(node.children.len().to_string().as_bytes());
396    out.push(b'\n');
397    if node.entry_count >= 0 {
398        if let Some(oid) = node.oid {
399            out.extend_from_slice(oid.as_bytes());
400        }
401    }
402    for child in &node.children {
403        serialize_cache_tree_node(child, out);
404    }
405}
406
407fn format_cache_tree_node(node: &CacheTreeNode, parent_path: &str, out: &mut String) {
408    let path = if node.name.is_empty() {
409        String::new()
410    } else {
411        let name = String::from_utf8_lossy(&node.name);
412        format!("{parent_path}{name}/")
413    };
414    if node.is_valid() {
415        if let Some(oid) = node.oid {
416            out.push_str(&format!(
417                "{} {} ({} entries, {} subtrees)\n",
418                oid,
419                path,
420                node.entry_count,
421                node.children.len()
422            ));
423        }
424    } else {
425        out.push_str(&format!(
426            "{:<40} {} ({} subtrees)\n",
427            "invalid",
428            path,
429            node.children.len()
430        ));
431    }
432    for child in &node.children {
433        format_cache_tree_node(child, &path, out);
434    }
435}
436
437/// Emit a single cache-tree node line in `test-tool dump-cache-tree` format.
438fn dump_one_line(node: &CacheTreeNode, pfx: &str, out: &mut String) {
439    if node.entry_count < 0 {
440        out.push_str(&format!(
441            "{:<40} {} ({} subtrees)\n",
442            "invalid",
443            pfx,
444            node.children.len()
445        ));
446    } else {
447        let oid = node.oid.unwrap_or_else(ObjectId::zero);
448        out.push_str(&format!(
449            "{} {} ({} entries, {} subtrees)\n",
450            oid,
451            pfx,
452            node.entry_count,
453            node.children.len()
454        ));
455    }
456}
457
458/// Walk the stored cache-tree (`it`) against a freshly built reference (`ref`),
459/// emitting only the nodes that exist in both — matching Git's `dump_cache_tree`.
460fn dump_cache_tree_pair(
461    it: &CacheTreeNode,
462    reference: &CacheTreeNode,
463    pfx: &str,
464    out: &mut String,
465) {
466    dump_one_line(it, pfx, out);
467
468    for child in &it.children {
469        let Some(ref_child) = reference.children.iter().find(|c| c.name == child.name) else {
470            continue;
471        };
472        let name = String::from_utf8_lossy(&child.name);
473        let child_pfx = format!("{pfx}{name}/");
474        dump_cache_tree_pair(child, ref_child, &child_pfx, out);
475    }
476}
477
478/// Read `GIT_INDEX_VERSION` and return the requested version.
479///
480/// If the environment variable is unset, returns `None`.
481/// If it is set but invalid (non-numeric or out of range 2..=4), prints a
482/// warning to stderr and returns the default version.
483pub fn get_index_format_from_env() -> Option<u32> {
484    let val = std::env::var("GIT_INDEX_VERSION").ok()?;
485    if val.is_empty() {
486        return None;
487    }
488    match val.parse::<u32>() {
489        Ok(v) if (INDEX_FORMAT_LB..=INDEX_FORMAT_UB).contains(&v) => Some(v),
490        _ => {
491            eprintln!(
492                "warning: GIT_INDEX_VERSION set, but the value is invalid.\n\
493                 Using version {INDEX_ENV_INVALID_FALLBACK}"
494            );
495            Some(INDEX_ENV_INVALID_FALLBACK)
496        }
497    }
498}
499
500impl Index {
501    /// Create a new, empty index.
502    ///
503    /// Respects `GIT_INDEX_VERSION` if set, otherwise defaults to version 2.
504    #[must_use]
505    pub fn new() -> Self {
506        let version = get_index_format_from_env().unwrap_or(2);
507        Self {
508            version,
509            entries: Vec::new(),
510            sparse_directories: false,
511            untracked_cache: None,
512            fsmonitor_last_update: None,
513            resolve_undo: None,
514            split_link: None,
515            cache_tree_root: None,
516            cache_tree: None,
517        }
518    }
519
520    /// Create a new empty index, respecting config values for version.
521    ///
522    /// Priority matches Git's `prepare_repo_settings`: `GIT_INDEX_VERSION` env, then
523    /// `feature.manyFiles` (implies version 4), then `index.version` (overrides version).
524    pub fn new_with_config(
525        config_index_version: Option<&str>,
526        config_many_files: Option<&str>,
527    ) -> Self {
528        if let Some(v) = get_index_format_from_env() {
529            return Self {
530                version: v,
531                entries: Vec::new(),
532                sparse_directories: false,
533                untracked_cache: None,
534                fsmonitor_last_update: None,
535                resolve_undo: None,
536                split_link: None,
537                cache_tree_root: None,
538                cache_tree: None,
539            };
540        }
541
542        let many_files = config_truthy(config_many_files);
543        let mut version = if many_files { 4 } else { 2 };
544
545        if let Some(val) = config_index_version {
546            let trimmed = val.trim();
547            if !trimmed.is_empty() {
548                match trimmed.parse::<u32>() {
549                    Ok(v) if (INDEX_FORMAT_LB..=INDEX_FORMAT_UB).contains(&v) => {
550                        version = v;
551                    }
552                    _ => {
553                        eprintln!(
554                            "warning: index.version set, but the value is invalid.\n\
555                             Using version {INDEX_CONFIG_INVALID_FALLBACK}"
556                        );
557                        version = INDEX_CONFIG_INVALID_FALLBACK;
558                    }
559                }
560            }
561        }
562
563        Self {
564            version,
565            entries: Vec::new(),
566            sparse_directories: false,
567            untracked_cache: None,
568            fsmonitor_last_update: None,
569            resolve_undo: None,
570            split_link: None,
571            cache_tree_root: None,
572            cache_tree: None,
573        }
574    }
575
576    /// New empty index using a loaded [`ConfigSet`] (includes `-c` / `GIT_CONFIG_PARAMETERS`).
577    ///
578    /// Same precedence as [`Self::new_with_config`], but reads `feature.manyFiles` and
579    /// `index.version` from `config`.
580    #[must_use]
581    pub fn new_from_config(config: &ConfigSet) -> Self {
582        if let Some(v) = get_index_format_from_env() {
583            return Self {
584                version: v,
585                entries: Vec::new(),
586                sparse_directories: false,
587                untracked_cache: None,
588                fsmonitor_last_update: None,
589                resolve_undo: None,
590                split_link: None,
591                cache_tree_root: None,
592                cache_tree: None,
593            };
594        }
595
596        let many_files = config
597            .get_bool("feature.manyFiles")
598            .and_then(|r| r.ok())
599            .unwrap_or(false);
600        let mut version = if many_files { 4 } else { 2 };
601
602        if let Some(val) = config.get("index.version") {
603            let trimmed = val.trim();
604            if !trimmed.is_empty() {
605                match trimmed.parse::<u32>() {
606                    Ok(v) if (INDEX_FORMAT_LB..=INDEX_FORMAT_UB).contains(&v) => {
607                        version = v;
608                    }
609                    _ => {
610                        eprintln!(
611                            "warning: index.version set, but the value is invalid.\n\
612                             Using version {INDEX_CONFIG_INVALID_FALLBACK}"
613                        );
614                        version = INDEX_CONFIG_INVALID_FALLBACK;
615                    }
616                }
617            }
618        }
619
620        Self {
621            version,
622            entries: Vec::new(),
623            sparse_directories: false,
624            untracked_cache: None,
625            fsmonitor_last_update: None,
626            resolve_undo: None,
627            split_link: None,
628            cache_tree_root: None,
629            cache_tree: None,
630        }
631    }
632
633    /// Load an index from the given file path without expanding sparse-directory placeholders.
634    ///
635    /// Returns an empty index if the file does not exist.
636    ///
637    /// # Errors
638    ///
639    /// Returns [`Error::IndexError`] if the file is present but corrupt.
640    pub fn load(path: &Path) -> Result<Self> {
641        match fs::read(path) {
642            Ok(data) => Self::parse(&data),
643            Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(Self {
644                sparse_directories: false,
645                ..Self::new()
646            }),
647            Err(e) => Err(Error::Io(e)),
648        }
649    }
650
651    /// Load an index and expand sparse-directory placeholders using the object database.
652    ///
653    /// After a successful return, [`Index::sparse_directories`] is cleared and every
654    /// placeholder is replaced by the blob entries from the referenced tree.
655    pub fn load_expand_sparse(path: &Path, odb: &Odb) -> Result<Self> {
656        let mut idx = Self::load(path)?;
657        idx.expand_sparse_directory_placeholders(odb)?;
658        Ok(idx)
659    }
660
661    /// Like [`Index::load_expand_sparse`], but treats a missing index or Git's
662    /// `"file too short"` placeholder as an empty index.
663    pub fn load_expand_sparse_optional(path: &Path, odb: &Odb) -> Result<Self> {
664        let mut idx = match fs::read(path) {
665            Ok(data) => Self::parse(&data).or_else(|e| match e {
666                Error::IndexError(msg) if msg == "file too short" => Ok(Self::new()),
667                other => Err(other),
668            })?,
669            Err(e) if e.kind() == io::ErrorKind::NotFound => Self::new(),
670            Err(e) => return Err(Error::Io(e)),
671        };
672        idx.expand_sparse_directory_placeholders(odb)?;
673        Ok(idx)
674    }
675
676    /// Returns true if the index contains sparse-index tree placeholders (`MODE_TREE` + skip-worktree).
677    #[must_use]
678    pub fn has_sparse_directory_placeholders(&self) -> bool {
679        self.entries
680            .iter()
681            .any(IndexEntry::is_sparse_directory_placeholder)
682    }
683
684    /// Replace sparse-directory placeholder entries with all blob paths from their trees.
685    ///
686    /// Each placeholder must reference a tree object. New entries are marked skip-worktree like Git's
687    /// expanded index, except we keep `sparse_directories` false in memory after expansion.
688    pub fn expand_sparse_directory_placeholders(&mut self, odb: &Odb) -> Result<()> {
689        if !self.has_sparse_directory_placeholders() {
690            return Ok(());
691        }
692        let mut out: Vec<IndexEntry> = Vec::with_capacity(self.entries.len());
693        for entry in self.entries.drain(..) {
694            if entry.is_sparse_directory_placeholder() {
695                let prefix = trim_trailing_slash_bytes(&entry.path);
696                let blobs = flatten_tree_blobs(odb, &entry.oid, prefix)?;
697                out.extend(blobs);
698            } else {
699                out.push(entry);
700            }
701        }
702        self.entries = out;
703        self.sparse_directories = false;
704        self.sort();
705        Ok(())
706    }
707
708    /// Collapse consecutive skip-worktree subtrees into sparse-directory placeholders when
709    /// `cone_mode` is true and each directory is outside the sparse cone.
710    ///
711    /// `head_tree` is the tree OID at `HEAD`. When `enable_sparse_index` is false, clears
712    /// [`Index::sparse_directories`] and returns without collapsing.
713    pub fn try_collapse_sparse_directories(
714        &mut self,
715        odb: &Odb,
716        head_tree: &ObjectId,
717        patterns: &[String],
718        cone_mode: bool,
719        enable_sparse_index: bool,
720    ) -> Result<()> {
721        if !enable_sparse_index || !cone_mode {
722            self.sparse_directories = false;
723            return Ok(());
724        }
725
726        let mut prefixes = BTreeSet::<Vec<u8>>::new();
727        for e in &self.entries {
728            if e.stage() != 0 || e.mode == MODE_TREE || !e.skip_worktree() {
729                continue;
730            }
731            collect_directory_prefixes(&e.path, &mut prefixes);
732        }
733
734        let mut collapsed_any = false;
735        // Deepest prefixes first so nested dirs collapse before parents.
736        let mut ordered: Vec<Vec<u8>> = prefixes.into_iter().collect();
737        ordered.sort_by_key(|p| std::cmp::Reverse(p.len()));
738
739        for pref in ordered {
740            let pref_str = String::from_utf8_lossy(&pref);
741            if directory_in_cone(&pref_str, patterns, cone_mode) {
742                continue;
743            }
744            let Some(subtree_oid) = tree_oid_for_prefix(odb, head_tree, &pref)? else {
745                continue;
746            };
747            let expected = collect_sparse_aware_expected_blobs(
748                odb,
749                &subtree_oid,
750                &pref,
751                patterns,
752                cone_mode,
753                &self.entries,
754            )?;
755            if expected.is_empty() {
756                continue;
757            }
758            let mut matched = Vec::new();
759            for e in &self.entries {
760                if e.stage() != 0 {
761                    continue;
762                }
763                if path_under_prefix(&e.path, &pref) && e.mode != MODE_TREE {
764                    matched.push(e.clone());
765                }
766            }
767            if matched.len() != expected.len() {
768                continue;
769            }
770            matched.sort_by(|a, b| a.path.cmp(&b.path));
771            let mut exp_sorted = expected;
772            exp_sorted.sort_by(|a, b| a.path.cmp(&b.path));
773            if !matched
774                .iter()
775                .zip(exp_sorted.iter())
776                .all(|(a, b)| a.path == b.path && a.oid == b.oid && a.mode == b.mode)
777            {
778                continue;
779            }
780            if !matched.iter().all(|e| e.skip_worktree()) {
781                continue;
782            }
783            // Git's convert_to_sparse_rec refuses to collapse a directory that contains a
784            // submodule (gitlink); the sparse-directory entry could not faithfully represent
785            // the gitlink's committed OID. Leave such directories expanded.
786            if matched.iter().any(|e| e.mode == MODE_GITLINK)
787                || exp_sorted.iter().any(|e| e.mode == MODE_GITLINK)
788            {
789                continue;
790            }
791
792            let mut path_with_slash = pref.clone();
793            if !path_with_slash.ends_with(b"/") {
794                path_with_slash.push(b'/');
795            }
796            self.entries
797                .retain(|e| e.stage() != 0 || !path_under_prefix(&e.path, &pref));
798            let mut placeholder = IndexEntry {
799                ctime_sec: 0,
800                ctime_nsec: 0,
801                mtime_sec: 0,
802                mtime_nsec: 0,
803                dev: 0,
804                ino: 0,
805                mode: MODE_TREE,
806                uid: 0,
807                gid: 0,
808                size: 0,
809                oid: subtree_oid,
810                flags: path_with_slash.len().min(0xFFF) as u16,
811                flags_extended: Some(0),
812                path: path_with_slash,
813                base_index_pos: 0,
814            };
815            placeholder.set_skip_worktree(true);
816            self.add_or_replace(placeholder);
817            collapsed_any = true;
818        }
819
820        if collapsed_any {
821            self.sort();
822            self.sparse_directories = true;
823        } else {
824            self.sparse_directories = false;
825        }
826        Ok(())
827    }
828
829    /// Parse index bytes (the whole file including trailing SHA-1).
830    ///
831    /// # Errors
832    ///
833    /// Returns [`Error::IndexError`] on structural problems.
834    pub fn parse(data: &[u8]) -> Result<Self> {
835        if data.len() < 12 {
836            return Err(Error::IndexError("file too short".to_owned()));
837        }
838
839        // Trailing SHA-1: normal index is a hash of the body; Git may write all zeros when
840        // `index.skipHash` / `feature.manyFiles` skips computing the checksum.
841        let (body, checksum) = data.split_at(data.len() - 20);
842        if !checksum.iter().all(|&b| b == 0) {
843            let mut hasher = Sha1::new();
844            hasher.update(body);
845            let computed = hasher.finalize();
846            if computed.as_slice() != checksum {
847                return Err(Error::IndexError("SHA-1 checksum mismatch".to_owned()));
848            }
849        }
850
851        // Header
852        let magic = &body[..4];
853        if magic != b"DIRC" {
854            return Err(Error::IndexError("bad magic: expected DIRC".to_owned()));
855        }
856        let version = u32::from_be_bytes(
857            body[4..8]
858                .try_into()
859                .map_err(|_| Error::IndexError("cannot read version".to_owned()))?,
860        );
861        if version != 2 && version != 3 && version != 4 {
862            return Err(Error::IndexError(format!(
863                "unsupported index version {version}"
864            )));
865        }
866        let count = u32::from_be_bytes(
867            body[8..12]
868                .try_into()
869                .map_err(|_| Error::IndexError("cannot read entry count".to_owned()))?,
870        );
871
872        let mut pos = 12usize;
873        let mut entries = Vec::with_capacity(count as usize);
874
875        let mut prev_path: Vec<u8> = Vec::new();
876        for _ in 0..count {
877            let (entry, consumed) = parse_entry(&body[pos..], version, &prev_path)?;
878            prev_path = entry.path.clone();
879            entries.push(entry);
880            pos += consumed;
881        }
882
883        let mut sparse_directories = false;
884        let mut untracked_cache = None;
885        let mut fsmonitor_last_update = None;
886        let mut resolve_undo = None;
887        let mut split_link = None;
888        let mut cache_tree_root = None;
889        let mut cache_tree = None;
890        while pos + 8 <= body.len() {
891            let sig = u32::from_be_bytes(
892                body[pos..pos + 4]
893                    .try_into()
894                    .map_err(|_| Error::IndexError("truncated extension sig".to_owned()))?,
895            );
896            let ext_sz = u32::from_be_bytes(
897                body[pos + 4..pos + 8]
898                    .try_into()
899                    .map_err(|_| Error::IndexError("truncated extension size".to_owned()))?,
900            ) as usize;
901            pos += 8;
902            if pos + ext_sz > body.len() {
903                return Err(Error::IndexError(
904                    "extension overruns index body".to_owned(),
905                ));
906            }
907            if sig == INDEX_EXT_SPARSE_DIRECTORIES {
908                sparse_directories = true;
909            } else if sig == INDEX_EXT_UNTRACKED {
910                let ext_data = &body[pos..pos + ext_sz];
911                untracked_cache = untracked_cache::parse_untracked_extension(ext_data);
912            } else if sig == INDEX_EXT_FSMONITOR {
913                let ext_data = &body[pos..pos + ext_sz];
914                let token_bytes = if let Some(nul) = ext_data.iter().position(|&b| b == 0) {
915                    &ext_data[..nul]
916                } else {
917                    ext_data
918                };
919                fsmonitor_last_update = Some(String::from_utf8_lossy(token_bytes).into_owned());
920            } else if sig == INDEX_EXT_RESOLVE_UNDO {
921                let ext_data = &body[pos..pos + ext_sz];
922                resolve_undo = Some(resolve_undo::parse_resolve_undo_payload(ext_data)?);
923            } else if sig == INDEX_EXT_LINK {
924                let ext_data = &body[pos..pos + ext_sz];
925                split_link = Some(crate::split_index::parse_link_extension(ext_data)?);
926            } else if sig == INDEX_EXT_CACHE_TREE {
927                let ext_data = &body[pos..pos + ext_sz];
928                cache_tree = parse_cache_tree(ext_data);
929                cache_tree_root = cache_tree
930                    .as_ref()
931                    .and_then(|node| node.oid.filter(|_| node.entry_count >= 0));
932            }
933            pos += ext_sz;
934        }
935        if pos != body.len() {
936            return Err(Error::IndexError("junk after index extensions".to_owned()));
937        }
938
939        Ok(Self {
940            version,
941            entries,
942            sparse_directories,
943            untracked_cache,
944            fsmonitor_last_update,
945            resolve_undo,
946            split_link,
947            cache_tree_root,
948            cache_tree,
949        })
950    }
951
952    /// Write the index to a file, computing and appending the trailing SHA-1.
953    ///
954    /// # Errors
955    ///
956    /// Returns [`Error::Io`] on filesystem errors.
957    pub fn write(&self, path: &Path) -> Result<()> {
958        let git_dir = path.parent();
959        let config = git_dir.and_then(|d| ConfigSet::load(Some(d), true).ok());
960        let skip_hash = index_skip_hash_for_write(config.as_ref());
961        self.write_to_path(path, skip_hash)
962    }
963
964    /// Write this index to `path` with an explicit trailing-checksum policy.
965    ///
966    /// When `skip_hash` is true, the trailing SHA-1 is written as all zeros (Git `index.skipHash`).
967    pub fn write_to_path(&self, path: &Path, skip_hash: bool) -> Result<()> {
968        let mut body = Vec::new();
969        // Fast path: entries loaded from disk (or maintained via `add_or_replace`) are already in
970        // canonical order; serializing from `&self` skips a full clone of every entry. The
971        // comparator must stay identical to [`Index::sort`] (path, then stage) — format v4 path
972        // compression depends on it.
973        let already_sorted = self
974            .entries
975            .is_sorted_by(|a, b| (&a.path, a.stage()) <= (&b.path, b.stage()));
976        if already_sorted {
977            self.serialize_into(&mut body)?;
978        } else {
979            let mut sorted = self.clone();
980            sorted.sort();
981            sorted.serialize_into(&mut body)?;
982        }
983
984        let checksum: [u8; 20] = if skip_hash {
985            [0u8; 20]
986        } else {
987            let mut hasher = Sha1::new();
988            hasher.update(&body);
989            hasher.finalize().into()
990        };
991
992        let tmp_path = path.with_extension("lock");
993        let pid_path = pid_path_for_lock(&tmp_path);
994        let lockfile_pid_enabled = lockfile_pid_enabled(path);
995
996        let mut lock_file = match fs::OpenOptions::new()
997            .write(true)
998            .create_new(true)
999            .open(&tmp_path)
1000        {
1001            Ok(file) => file,
1002            Err(e) if e.kind() == io::ErrorKind::AlreadyExists => {
1003                let message = build_lock_exists_message(&tmp_path, &pid_path, &e);
1004                return Err(Error::Io(io::Error::new(
1005                    io::ErrorKind::AlreadyExists,
1006                    message,
1007                )));
1008            }
1009            Err(e) => return Err(Error::Io(e)),
1010        };
1011
1012        let mut wrote_pid_file = false;
1013        if lockfile_pid_enabled {
1014            if let Err(e) = write_lock_pid_file(&pid_path) {
1015                let _ = fs::remove_file(&tmp_path);
1016                return Err(Error::Io(e));
1017            }
1018            wrote_pid_file = true;
1019        }
1020
1021        if let Err(e) = (|| -> io::Result<()> {
1022            lock_file.write_all(&body)?;
1023            lock_file.write_all(&checksum)?;
1024            Ok(())
1025        })() {
1026            let _ = fs::remove_file(&tmp_path);
1027            if wrote_pid_file {
1028                let _ = fs::remove_file(&pid_path);
1029            }
1030            return Err(Error::Io(e));
1031        }
1032        drop(lock_file);
1033
1034        if let Err(e) = fs::rename(&tmp_path, path) {
1035            let _ = fs::remove_file(&tmp_path);
1036            if wrote_pid_file {
1037                let _ = fs::remove_file(&pid_path);
1038            }
1039            return Err(Error::Io(e));
1040        }
1041        {
1042            if wrote_pid_file {
1043                let _ = fs::remove_file(&pid_path);
1044            }
1045        }
1046        Ok(())
1047    }
1048
1049    /// Serialise the index body (without trailing checksum) into `out`.
1050    ///
1051    /// Callers must have sorted entries when using format 4 (path compression depends on order).
1052    pub(crate) fn serialize_into(&self, out: &mut Vec<u8>) -> Result<()> {
1053        let has_extended_flags = self.entries.iter().any(|e| e.flags_extended.is_some());
1054        let write_version = if self.version >= 4 {
1055            4
1056        } else if has_extended_flags {
1057            3
1058        } else if self.version >= 3 {
1059            2
1060        } else {
1061            self.version
1062        };
1063        // Header
1064        out.extend_from_slice(b"DIRC");
1065        out.extend_from_slice(&write_version.to_be_bytes());
1066        out.extend_from_slice(&(self.entries.len() as u32).to_be_bytes());
1067
1068        if write_version == 4 {
1069            let mut previous_path: Vec<u8> = Vec::new();
1070            for entry in &self.entries {
1071                serialize_entry_v4(entry, &mut previous_path, out);
1072            }
1073        } else {
1074            for entry in &self.entries {
1075                serialize_entry(entry, write_version, out);
1076            }
1077        }
1078        if self.sparse_directories {
1079            out.extend_from_slice(&INDEX_EXT_SPARSE_DIRECTORIES.to_be_bytes());
1080            out.extend_from_slice(&0u32.to_be_bytes());
1081        }
1082        if let Some(uc) = &self.untracked_cache {
1083            let payload = untracked_cache::write_untracked_extension(uc);
1084            out.extend_from_slice(&INDEX_EXT_UNTRACKED.to_be_bytes());
1085            out.extend_from_slice(&(payload.len() as u32).to_be_bytes());
1086            out.extend_from_slice(&payload);
1087        }
1088        if let Some(token) = &self.fsmonitor_last_update {
1089            let mut payload = token.as_bytes().to_vec();
1090            payload.push(0);
1091            out.extend_from_slice(&INDEX_EXT_FSMONITOR.to_be_bytes());
1092            out.extend_from_slice(&(payload.len() as u32).to_be_bytes());
1093            out.extend_from_slice(&payload);
1094        }
1095        if let Some(ru) = &self.resolve_undo {
1096            let payload = write_resolve_undo_payload(ru);
1097            if !payload.is_empty() {
1098                out.extend_from_slice(&INDEX_EXT_RESOLVE_UNDO.to_be_bytes());
1099                out.extend_from_slice(&(payload.len() as u32).to_be_bytes());
1100                out.extend_from_slice(&payload);
1101            }
1102        }
1103        if let Some(sl) = &self.split_link {
1104            use crate::ewah_bitmap::EwahBitmap;
1105            let del = sl
1106                .delete_bitmap
1107                .as_ref()
1108                .cloned()
1109                .unwrap_or_else(EwahBitmap::new);
1110            let rep = sl
1111                .replace_bitmap
1112                .as_ref()
1113                .cloned()
1114                .unwrap_or_else(EwahBitmap::new);
1115            let payload =
1116                crate::split_index::serialize_link_extension_payload(&sl.base_oid, &del, &rep);
1117            out.extend_from_slice(&INDEX_EXT_LINK.to_be_bytes());
1118            out.extend_from_slice(&(payload.len() as u32).to_be_bytes());
1119            out.extend_from_slice(&payload);
1120        }
1121        if let Some(cache_tree) = &self.cache_tree {
1122            let mut payload = Vec::new();
1123            serialize_cache_tree_node(cache_tree, &mut payload);
1124            out.extend_from_slice(&INDEX_EXT_CACHE_TREE.to_be_bytes());
1125            out.extend_from_slice(&(payload.len() as u32).to_be_bytes());
1126            out.extend_from_slice(&payload);
1127        }
1128        Ok(())
1129    }
1130
1131    /// Add or replace an entry (matched by path + stage).
1132    pub fn add_or_replace(&mut self, entry: IndexEntry) {
1133        let path = entry.path.clone();
1134        let stage = entry.stage();
1135        let mut inserted_stage0 = false;
1136        // Binary search for the insertion point by (path, stage)
1137        let result = self.entries.binary_search_by(|e| {
1138            e.path
1139                .as_slice()
1140                .cmp(path.as_slice())
1141                .then_with(|| e.stage().cmp(&stage))
1142        });
1143        match result {
1144            Ok(pos) => {
1145                // Preserve split-index row binding across refresh/replace (Git `ce->index`).
1146                let mut e = entry;
1147                e.base_index_pos = self.entries[pos].base_index_pos;
1148                self.entries[pos] = e;
1149            }
1150            Err(pos) => {
1151                // Not found — insert at sorted position
1152                self.entries.insert(pos, entry);
1153                inserted_stage0 = stage == 0;
1154            }
1155        }
1156        if inserted_stage0 {
1157            if let Ok(p) = std::str::from_utf8(&path) {
1158                self.invalidate_untracked_cache_for_path(p);
1159            }
1160        }
1161        if stage == 0 {
1162            self.invalidate_cache_tree_for_path(&path);
1163        }
1164    }
1165
1166    /// Stage a file at stage 0, removing any conflict stage entries (1, 2, 3)
1167    /// for the same path. This is the correct behavior for `git add` on a
1168    /// conflicted file during merge/cherry-pick resolution.
1169    pub fn stage_file(&mut self, entry: IndexEntry) {
1170        let path = entry.path.clone();
1171        for e in &self.entries {
1172            if e.path == path && e.stage() != 0 {
1173                resolve_undo::record_resolve_undo_for_entry(&mut self.resolve_undo, e);
1174            }
1175        }
1176        // Remove conflict stages first
1177        self.entries.retain(|e| e.path != path || e.stage() == 0);
1178        // Then add/replace stage-0 entry
1179        self.add_or_replace(entry);
1180    }
1181
1182    /// Drop all resolve-undo records (matches Git `resolve_undo_clear_index`).
1183    pub fn clear_resolve_undo(&mut self) {
1184        self.resolve_undo = None;
1185    }
1186
1187    /// Remove and return the resolve-undo record for `path`, if any.
1188    pub fn take_resolve_undo_record(&mut self, path: &[u8]) -> Option<ResolveUndoRecord> {
1189        let map = self.resolve_undo.as_mut()?;
1190        let ru = map.remove(path)?;
1191        if map.is_empty() {
1192            self.resolve_undo = None;
1193        }
1194        Some(ru)
1195    }
1196
1197    /// Replace all index entries for `path` with unmerged stages from `record`.
1198    pub fn install_unmerged_from_resolve_undo(&mut self, path: &[u8], record: &ResolveUndoRecord) {
1199        self.entries.retain(|e| e.path != path);
1200        for stage in 1u8..=3u8 {
1201            let i = (stage - 1) as usize;
1202            if record.modes[i] == 0 {
1203                continue;
1204            }
1205            let entry = IndexEntry {
1206                ctime_sec: 0,
1207                ctime_nsec: 0,
1208                mtime_sec: 0,
1209                mtime_nsec: 0,
1210                dev: 0,
1211                ino: 0,
1212                mode: record.modes[i],
1213                uid: 0,
1214                gid: 0,
1215                size: 0,
1216                oid: record.oids[i],
1217                flags: path.len().min(0xFFF) as u16 | ((stage as u16) << 12),
1218                flags_extended: None,
1219                path: path.to_vec(),
1220                base_index_pos: 0,
1221            };
1222            self.add_or_replace(entry);
1223        }
1224        self.sort();
1225    }
1226
1227    /// Re-create unmerged index entries for `path` from the resolve-undo extension.
1228    ///
1229    /// Returns `true` when a resolve-undo record existed and was consumed (Git `unmerge_one`).
1230    pub fn unmerge_path_from_resolve_undo(&mut self, path: &[u8]) -> bool {
1231        let Some(record) = self.take_resolve_undo_record(path) else {
1232            return false;
1233        };
1234        self.install_unmerged_from_resolve_undo(path, &record);
1235        true
1236    }
1237
1238    /// Remove all entries matching the given path (all stages).
1239    ///
1240    /// Returns `true` if at least one entry was removed.
1241    pub fn remove(&mut self, path: &[u8]) -> bool {
1242        let mut removed_any = false;
1243        for e in &self.entries {
1244            if e.path == path {
1245                if e.stage() != 0 {
1246                    resolve_undo::record_resolve_undo_for_entry(&mut self.resolve_undo, e);
1247                }
1248                removed_any = true;
1249            }
1250        }
1251        if !removed_any {
1252            return false;
1253        }
1254        self.entries.retain(|e| e.path != path);
1255        if let Ok(p) = std::str::from_utf8(path) {
1256            self.invalidate_untracked_cache_for_path(p);
1257        }
1258        self.invalidate_cache_tree_for_path(path);
1259        true
1260    }
1261
1262    /// Remove every index entry for `path` (all merge stages), like `remove_file_from_index`.
1263    ///
1264    /// Returns whether any entry was removed.
1265    pub fn remove_path_all_stages(&mut self, path: &[u8]) -> bool {
1266        self.remove(path)
1267    }
1268
1269    /// Invalidate UNTR nodes affected by an index change (Git `untracked_cache_*_index`).
1270    pub fn invalidate_untracked_cache_for_path(&mut self, path: &str) {
1271        if let Some(uc) = self.untracked_cache.as_mut() {
1272            untracked_cache::invalidate_path(uc, path);
1273        }
1274    }
1275
1276    /// Remove every index entry whose path lies strictly under `path` (all stages).
1277    ///
1278    /// Used when staging a file at `path` that replaces a former directory: Git removes
1279    /// tracked paths like `path/child` from the index so they do not remain alongside
1280    /// the new blob entry.
1281    pub fn remove_descendants_under_path(&mut self, path: &str) {
1282        let prefix = path.as_bytes();
1283        if prefix.is_empty() {
1284            return;
1285        }
1286        let plen = prefix.len();
1287        let had_descendant = self.entries.iter().any(|e| {
1288            let ep = e.path.as_slice();
1289            ep.len() > plen && ep.starts_with(prefix) && ep[plen] == b'/'
1290        });
1291        for e in self.entries.iter() {
1292            let ep = e.path.as_slice();
1293            if ep.len() > plen && ep.starts_with(prefix) && ep[plen] == b'/' && e.stage() != 0 {
1294                resolve_undo::record_resolve_undo_for_entry(&mut self.resolve_undo, e);
1295            }
1296        }
1297        self.entries.retain(|e| {
1298            let ep = e.path.as_slice();
1299            if ep.len() <= plen {
1300                return true;
1301            }
1302            if !ep.starts_with(prefix) {
1303                return true;
1304            }
1305            // Drop paths strictly under `prefix/` (keep same-length prefix matches like "d-other").
1306            ep[plen] != b'/'
1307        });
1308        if had_descendant {
1309            self.invalidate_untracked_cache_for_path(path);
1310            self.invalidate_cache_tree_for_path(path.as_bytes());
1311        }
1312    }
1313
1314    /// Replace the cache-tree extension with a valid tree.
1315    pub fn set_cache_tree(&mut self, cache_tree: CacheTreeNode) {
1316        self.cache_tree_root = cache_tree.oid.filter(|_| cache_tree.entry_count >= 0);
1317        self.cache_tree = Some(cache_tree);
1318    }
1319
1320    /// Remove the cache-tree extension.
1321    pub fn clear_cache_tree(&mut self) {
1322        self.cache_tree_root = None;
1323        self.cache_tree = None;
1324    }
1325
1326    /// Mark cache-tree nodes affected by an index path change as invalid.
1327    pub fn invalidate_cache_tree_for_path(&mut self, path: &[u8]) {
1328        let Some(root) = self.cache_tree.as_mut() else {
1329            self.cache_tree_root = None;
1330            return;
1331        };
1332        root.invalidate();
1333        self.cache_tree_root = None;
1334
1335        let mut current = root;
1336        for component in path.split(|&b| b == b'/') {
1337            if component.is_empty() {
1338                break;
1339            }
1340            let Some(child) = current
1341                .children
1342                .iter_mut()
1343                .find(|child| child.name == component)
1344            else {
1345                break;
1346            };
1347            child.invalidate();
1348            current = child;
1349        }
1350    }
1351
1352    /// Format the parsed cache-tree extension like Git's `test-tool dump-cache-tree`.
1353    #[must_use]
1354    pub fn format_cache_tree_dump(&self) -> String {
1355        let Some(root) = self.cache_tree.as_ref() else {
1356            return String::new();
1357        };
1358        let mut out = String::new();
1359        format_cache_tree_node(root, "", &mut out);
1360        out
1361    }
1362
1363    /// Produce `test-tool dump-cache-tree` output for this index.
1364    ///
1365    /// Mirrors Git's `cmd__dump_cache_tree`: the stored cache-tree (`it`) is
1366    /// compared against a freshly computed reference (`ref`) built from the
1367    /// current index entries with `WRITE_TREE_DRY_RUN`. Only nodes present in
1368    /// both trees are dumped; the `#(ref)` divergence lines that Git would emit
1369    /// are filtered out by the harness, so they are intentionally omitted here.
1370    ///
1371    /// If the index has no stored cache-tree, output is empty (Git dumps nothing
1372    /// because `it` is NULL).
1373    ///
1374    /// # Errors
1375    ///
1376    /// Returns an error if the reference cache-tree cannot be built (for example,
1377    /// if a tree object cannot be written to the object database).
1378    pub fn dump_cache_tree(&self, odb: &Odb) -> Result<String> {
1379        let Some(it_root) = self.cache_tree.as_ref() else {
1380            return Ok(String::new());
1381        };
1382        let ref_root = crate::write_tree::build_cache_tree_from_index(odb, self)?;
1383        let mut out = String::new();
1384        dump_cache_tree_pair(it_root, &ref_root, "", &mut out);
1385        Ok(out)
1386    }
1387
1388    /// Sort entries in Git's canonical order: by path, then by stage.
1389    pub fn sort(&mut self) {
1390        self.entries
1391            .sort_by(|a, b| a.path.cmp(&b.path).then_with(|| a.stage().cmp(&b.stage())));
1392    }
1393
1394    /// OID of the shared index when this index uses split-index mode (`link` extension).
1395    #[must_use]
1396    pub fn split_index_base_oid(&self) -> Option<ObjectId> {
1397        self.split_link.as_ref().map(|l| l.base_oid)
1398    }
1399
1400    /// Find an entry by path and stage (0 for normal entries).
1401    #[must_use]
1402    pub fn get(&self, path: &[u8], stage: u8) -> Option<&IndexEntry> {
1403        self.entries
1404            .iter()
1405            .find(|e| e.path == path && e.stage() == stage)
1406    }
1407
1408    /// Find a mutable entry by path and stage.
1409    pub fn get_mut(&mut self, path: &[u8], stage: u8) -> Option<&mut IndexEntry> {
1410        self.entries
1411            .iter_mut()
1412            .find(|e| e.path == path && e.stage() == stage)
1413    }
1414
1415    /// Merge tree contents from `treeish` into this index as virtual stage-1 entries, matching
1416    /// Git's `overlay_tree_on_index` used by `git ls-files --with-tree`.
1417    ///
1418    /// Existing unmerged entries (stages 1–3) are shifted to stage 3 so stage 1 is free for the
1419    /// overlay. Stage-1 paths that already exist at stage 0 are marked so `ls-files` can skip
1420    /// them (Git's `CE_UPDATE` on the stage-1 entry).
1421    ///
1422    /// # Parameters
1423    ///
1424    /// - `repo` — repository whose object database is used to read the tree.
1425    /// - `treeish` — revision or tree OID string (`HEAD`, `HEAD~1`, full SHA, etc.).
1426    /// - `prefix` — optional path prefix (bytes, no trailing slash except empty); only paths under
1427    ///   this prefix are considered from the tree. Pass empty slice for the full tree.
1428    ///
1429    /// # Errors
1430    ///
1431    /// Returns [`Error`] if `treeish` cannot be resolved, the tree cannot be read, or an object is
1432    /// missing from the ODB.
1433    pub fn overlay_tree_on_index(
1434        &mut self,
1435        repo: &Repository,
1436        treeish: &str,
1437        prefix: &[u8],
1438    ) -> Result<()> {
1439        let oid = rev_parse::resolve_revision(repo, treeish)?;
1440        let tree_oid = peel_to_tree_oid(repo, oid)?;
1441        for e in self.entries.iter_mut() {
1442            if e.stage() != 0 {
1443                e.set_stage(3);
1444            }
1445        }
1446        self.sort();
1447        let has_stage1 = self.entries.iter().any(|e| e.stage() == 1);
1448        let mut appended: Vec<IndexEntry> = Vec::new();
1449        read_tree_into_overlay(repo, &tree_oid, prefix, &[], has_stage1, &mut appended)?;
1450        for e in appended {
1451            self.add_or_replace(e);
1452        }
1453        if !has_stage1 {
1454            self.sort();
1455        }
1456        let mut last_stage0: Option<&[u8]> = None;
1457        for e in &mut self.entries {
1458            match e.stage() {
1459                0 => {
1460                    last_stage0 = Some(e.path.as_slice());
1461                }
1462                1 if last_stage0.is_some_and(|p| p == e.path.as_slice()) => {
1463                    e.set_overlay_tree_skip_output(true);
1464                }
1465                _ => {}
1466            }
1467        }
1468        Ok(())
1469    }
1470}
1471
1472fn peel_to_tree_oid(repo: &Repository, oid: ObjectId) -> Result<ObjectId> {
1473    let obj = repo.odb.read(&oid)?;
1474    match obj.kind {
1475        ObjectKind::Tree => Ok(oid),
1476        ObjectKind::Commit => {
1477            let commit = crate::objects::parse_commit(&obj.data)?;
1478            Ok(commit.tree)
1479        }
1480        ObjectKind::Tag => {
1481            let tag = crate::objects::parse_tag(&obj.data)?;
1482            peel_to_tree_oid(repo, tag.object)
1483        }
1484        _ => Err(Error::ObjectNotFound(format!(
1485            "cannot peel {oid} to tree for --with-tree"
1486        ))),
1487    }
1488}
1489
1490fn read_tree_into_overlay(
1491    repo: &Repository,
1492    tree_oid: &ObjectId,
1493    prefix: &[u8],
1494    rel_base: &[u8],
1495    use_replace_path: bool,
1496    out: &mut Vec<IndexEntry>,
1497) -> Result<()> {
1498    let obj = repo.odb.read(tree_oid)?;
1499    if obj.kind != ObjectKind::Tree {
1500        return Err(Error::ObjectNotFound(format!(
1501            "object {tree_oid} is not a tree"
1502        )));
1503    }
1504    let entries = parse_tree(&obj.data)?;
1505    for TreeEntry { mode, name, oid } in entries {
1506        if mode == MODE_TREE {
1507            let mut path = rel_base.to_vec();
1508            if !path.is_empty() {
1509                path.push(b'/');
1510            }
1511            path.extend_from_slice(&name);
1512            if !prefix_under_or_equal(prefix, &path) {
1513                continue;
1514            }
1515            read_tree_into_overlay(repo, &oid, prefix, &path, use_replace_path, out)?;
1516            continue;
1517        }
1518        if mode == MODE_GITLINK {
1519            continue;
1520        }
1521        let mut path = rel_base.to_vec();
1522        if !path.is_empty() {
1523            path.push(b'/');
1524        }
1525        path.extend_from_slice(&name);
1526        if !prefix_under_or_equal(prefix, &path) {
1527            continue;
1528        }
1529        let entry = synthetic_stage1_index_entry(mode, &path, oid);
1530        if use_replace_path {
1531            if let Some(pos) = out.iter().position(|e| e.path == path && e.stage() == 1) {
1532                out[pos] = entry;
1533            } else {
1534                out.push(entry);
1535            }
1536        } else {
1537            out.push(entry);
1538        }
1539    }
1540    Ok(())
1541}
1542
1543fn prefix_under_or_equal(prefix: &[u8], path: &[u8]) -> bool {
1544    if prefix.is_empty() {
1545        return true;
1546    }
1547    if path == prefix {
1548        return true;
1549    }
1550    path.len() > prefix.len() && path.starts_with(prefix) && path[prefix.len()] == b'/'
1551}
1552
1553fn synthetic_stage1_index_entry(mode: u32, path: &[u8], oid: ObjectId) -> IndexEntry {
1554    let path_len = path.len().min(0xFFF) as u16;
1555    let flags = (1u16 << 12) | path_len;
1556    IndexEntry {
1557        ctime_sec: 0,
1558        ctime_nsec: 0,
1559        mtime_sec: 0,
1560        mtime_nsec: 0,
1561        dev: 0,
1562        ino: 0,
1563        mode,
1564        uid: 0,
1565        gid: 0,
1566        size: 0,
1567        oid,
1568        flags,
1569        flags_extended: None,
1570        path: path.to_vec(),
1571        base_index_pos: 0,
1572    }
1573}
1574
1575fn config_truthy(raw: Option<&str>) -> bool {
1576    let Some(val) = raw else {
1577        return false;
1578    };
1579    let lowered = val.trim().to_lowercase();
1580    matches!(lowered.as_str(), "true" | "yes" | "1" | "on")
1581}
1582
1583/// Whether to write 20 zero bytes instead of the SHA-1 of the index body.
1584///
1585/// Mirrors Git `prepare_repo_settings`: `feature.manyFiles` enables skip-hash unless
1586/// `index.skipHash` / `index.skiphash` is explicitly false; otherwise honor true `index.skipHash`.
1587pub(crate) fn index_skip_hash_for_write(config: Option<&ConfigSet>) -> bool {
1588    let Some(config) = config else {
1589        return false;
1590    };
1591    let many_files = config
1592        .get_bool("feature.manyFiles")
1593        .and_then(|r| r.ok())
1594        .unwrap_or(false);
1595    if many_files {
1596        if let Some(Ok(false)) = config.get_bool("index.skipHash") {
1597            return false;
1598        }
1599        if let Some(Ok(false)) = config.get_bool("index.skiphash") {
1600            return false;
1601        }
1602        return true;
1603    }
1604    for key in ["index.skipHash", "index.skiphash"] {
1605        if let Some(Ok(true)) = config.get_bool(key) {
1606            return true;
1607        }
1608    }
1609    false
1610}
1611
1612fn trim_trailing_slash_bytes(path: &[u8]) -> &[u8] {
1613    path.strip_suffix(b"/").unwrap_or(path)
1614}
1615
1616fn path_under_prefix(path: &[u8], prefix: &[u8]) -> bool {
1617    if path == prefix {
1618        return true;
1619    }
1620    if prefix.is_empty() {
1621        return true;
1622    }
1623    path.len() > prefix.len() && path.starts_with(prefix) && path[prefix.len()] == b'/'
1624}
1625
1626fn directory_in_cone(dir_path: &str, patterns: &[String], cone_mode: bool) -> bool {
1627    // Match Git `path_in_cone_mode_sparse_checkout`: a *directory* is in the cone when its
1628    // contents are recursively included. `path_matches_sparse_patterns` distinguishes a
1629    // directory from a file by a trailing slash (expanded-cone matching uses dtype the way
1630    // Git does), so pass the directory with a trailing slash. Collapse prefixes arrive
1631    // without one (e.g. `before`, `folder1`), so append it to avoid a top-level directory
1632    // being mistaken for an always-in-cone top-level file. The root (empty) directory is
1633    // always in the cone (`/*` + `!/*/` excludes every top-level directory, so e.g. `deep`
1634    // collapses to a single placeholder instead of leaving `deep/deeper1/` etc.).
1635    let dir = dir_path.trim_end_matches('/');
1636    if dir.is_empty() {
1637        return true;
1638    }
1639    let with_slash = format!("{dir}/");
1640    crate::sparse_checkout::path_matches_sparse_patterns(&with_slash, patterns, cone_mode)
1641}
1642
1643fn collect_directory_prefixes(path: &[u8], out: &mut BTreeSet<Vec<u8>>) {
1644    for (i, &b) in path.iter().enumerate() {
1645        if b == b'/' {
1646            out.insert(path[..i].to_vec());
1647        }
1648    }
1649}
1650
1651fn tree_oid_for_prefix(odb: &Odb, root_tree: &ObjectId, prefix: &[u8]) -> Result<Option<ObjectId>> {
1652    if prefix.is_empty() {
1653        return Ok(Some(*root_tree));
1654    }
1655    let pref_str = String::from_utf8_lossy(prefix);
1656    let components: Vec<&str> = pref_str.split('/').filter(|c| !c.is_empty()).collect();
1657    let mut current = *root_tree;
1658    for comp in components {
1659        let obj = odb.read(&current)?;
1660        if obj.kind != ObjectKind::Tree {
1661            return Ok(None);
1662        }
1663        let entries = parse_tree(&obj.data)?;
1664        let mut next = None;
1665        for e in entries {
1666            if e.name == comp.as_bytes() {
1667                if e.mode == MODE_TREE {
1668                    next = Some(e.oid);
1669                }
1670                break;
1671            }
1672        }
1673        current = match next {
1674            Some(o) => o,
1675            None => return Ok(None),
1676        };
1677    }
1678    Ok(Some(current))
1679}
1680
1681/// Build the list of blob index entries under `prefix` that match `HEAD` at `tree_oid`,
1682/// treating existing sparse-directory placeholders in `entries` as opaque subtrees (like Git).
1683fn collect_sparse_aware_expected_blobs(
1684    odb: &Odb,
1685    tree_oid: &ObjectId,
1686    prefix: &[u8],
1687    patterns: &[String],
1688    cone_mode: bool,
1689    entries: &[IndexEntry],
1690) -> Result<Vec<IndexEntry>> {
1691    let mut out = Vec::new();
1692    walk_sparse_aware(
1693        odb, tree_oid, prefix, patterns, cone_mode, entries, &mut out,
1694    )?;
1695    Ok(out)
1696}
1697
1698fn walk_sparse_aware(
1699    odb: &Odb,
1700    tree_oid: &ObjectId,
1701    prefix: &[u8],
1702    patterns: &[String],
1703    cone_mode: bool,
1704    entries: &[IndexEntry],
1705    out: &mut Vec<IndexEntry>,
1706) -> Result<()> {
1707    let obj = odb.read(tree_oid)?;
1708    if obj.kind != ObjectKind::Tree {
1709        return Err(Error::IndexError(format!("expected tree at {}", tree_oid)));
1710    }
1711    let tree_entries = parse_tree(&obj.data)?;
1712    for te in tree_entries {
1713        let path = if prefix.is_empty() {
1714            te.name.clone()
1715        } else {
1716            let mut p = prefix.to_vec();
1717            p.push(b'/');
1718            p.extend_from_slice(&te.name);
1719            p
1720        };
1721        if te.mode == MODE_TREE {
1722            let path_slash = {
1723                let mut p = path.clone();
1724                p.push(b'/');
1725                p
1726            };
1727            if entries.iter().any(|e| {
1728                e.stage() == 0
1729                    && e.is_sparse_directory_placeholder()
1730                    && e.path == path_slash
1731                    && e.oid == te.oid
1732            }) {
1733                continue;
1734            }
1735            walk_sparse_aware(odb, &te.oid, &path, patterns, cone_mode, entries, out)?;
1736        } else {
1737            let path_len = path.len().min(0xFFF) as u16;
1738            let path_str = String::from_utf8_lossy(&path);
1739            if crate::sparse_checkout::path_matches_sparse_patterns(&path_str, patterns, cone_mode)
1740            {
1741                continue;
1742            }
1743            let mut e = IndexEntry {
1744                ctime_sec: 0,
1745                ctime_nsec: 0,
1746                mtime_sec: 0,
1747                mtime_nsec: 0,
1748                dev: 0,
1749                ino: 0,
1750                mode: te.mode,
1751                uid: 0,
1752                gid: 0,
1753                size: 0,
1754                oid: te.oid,
1755                flags: path_len,
1756                flags_extended: Some(0),
1757                path,
1758                base_index_pos: 0,
1759            };
1760            e.set_skip_worktree(true);
1761            out.push(e);
1762        }
1763    }
1764    Ok(())
1765}
1766
1767fn flatten_tree_blobs(odb: &Odb, tree_oid: &ObjectId, prefix: &[u8]) -> Result<Vec<IndexEntry>> {
1768    let obj = odb.read(tree_oid)?;
1769    if obj.kind != ObjectKind::Tree {
1770        return Err(Error::IndexError(format!("expected tree at {}", tree_oid)));
1771    }
1772    let entries = parse_tree(&obj.data)?;
1773    let mut out = Vec::new();
1774    for te in entries {
1775        let path = if prefix.is_empty() {
1776            te.name.clone()
1777        } else {
1778            let mut p = prefix.to_vec();
1779            p.push(b'/');
1780            p.extend_from_slice(&te.name);
1781            p
1782        };
1783        if te.mode == MODE_TREE {
1784            let sub = flatten_tree_blobs(odb, &te.oid, &path)?;
1785            out.extend(sub);
1786        } else {
1787            let path_len = path.len().min(0xFFF) as u16;
1788            let mut e = IndexEntry {
1789                ctime_sec: 0,
1790                ctime_nsec: 0,
1791                mtime_sec: 0,
1792                mtime_nsec: 0,
1793                dev: 0,
1794                ino: 0,
1795                mode: te.mode,
1796                uid: 0,
1797                gid: 0,
1798                size: 0,
1799                oid: te.oid,
1800                flags: path_len,
1801                flags_extended: Some(0),
1802                path,
1803                base_index_pos: 0,
1804            };
1805            e.set_skip_worktree(true);
1806            out.push(e);
1807        }
1808    }
1809    Ok(out)
1810}
1811
1812fn lockfile_pid_enabled(index_path: &Path) -> bool {
1813    let git_dir = match index_path.parent() {
1814        Some(dir) => dir,
1815        None => return false,
1816    };
1817
1818    ConfigSet::load(Some(git_dir), true)
1819        .ok()
1820        .and_then(|cfg| cfg.get_bool("core.lockfilepid"))
1821        .and_then(|res| res.ok())
1822        .unwrap_or(false)
1823}
1824
1825fn pid_path_for_lock(lock_path: &Path) -> std::path::PathBuf {
1826    let file_name = lock_path
1827        .file_name()
1828        .map(|s| s.to_string_lossy().to_string())
1829        .unwrap_or_else(|| "index.lock".to_owned());
1830    let pid_name = if let Some(base) = file_name.strip_suffix(".lock") {
1831        format!("{base}~pid.lock")
1832    } else {
1833        format!("{file_name}~pid.lock")
1834    };
1835    lock_path.with_file_name(pid_name)
1836}
1837
1838fn write_lock_pid_file(pid_path: &Path) -> io::Result<()> {
1839    use std::io::Write as _;
1840    let mut file = fs::OpenOptions::new()
1841        .write(true)
1842        .create(true)
1843        .truncate(true)
1844        .open(pid_path)?;
1845    writeln!(file, "pid {}", std::process::id())?;
1846    Ok(())
1847}
1848
1849/// Detail lines Git prints when the index lock file already exists (used by stash and similar).
1850pub fn format_index_lock_blocked_detail(index_path: &Path) -> String {
1851    let lock_path = index_path.with_extension("lock");
1852    let pid_path = pid_path_for_lock(&lock_path);
1853    let err = io::Error::new(io::ErrorKind::AlreadyExists, "File exists");
1854    build_lock_exists_message(&lock_path, &pid_path, &err)
1855}
1856
1857fn build_lock_exists_message(lock_path: &Path, pid_path: &Path, err: &io::Error) -> String {
1858    let mut msg = format!("Unable to create '{}': {}.\n\n", lock_path.display(), err);
1859
1860    if let Some(pid) = read_lock_pid(pid_path) {
1861        if is_process_running(pid) {
1862            msg.push_str(&format!(
1863                "Lock is held by process {pid}; if no git process is running, the lock file may be stale (PIDs can be reused)"
1864            ));
1865        } else {
1866            msg.push_str(&format!(
1867                "Lock was held by process {pid}, which is no longer running; the lock file appears to be stale"
1868            ));
1869        }
1870    } else {
1871        msg.push_str(
1872            "Another git process seems to be running in this repository, or the lock file may be stale",
1873        );
1874    }
1875
1876    msg
1877}
1878
1879fn read_lock_pid(pid_path: &Path) -> Option<u64> {
1880    let raw = fs::read_to_string(pid_path).ok()?;
1881    let trimmed = raw.trim();
1882    if let Some(v) = trimmed.strip_prefix("pid ") {
1883        return v.trim().parse::<u64>().ok();
1884    }
1885    trimmed.parse::<u64>().ok()
1886}
1887
1888fn is_process_running(pid: u64) -> bool {
1889    #[cfg(target_os = "linux")]
1890    {
1891        let proc_path = std::path::PathBuf::from(format!("/proc/{pid}"));
1892        proc_path.exists()
1893    }
1894
1895    #[cfg(not(target_os = "linux"))]
1896    {
1897        let status = std::process::Command::new("kill")
1898            .arg("-0")
1899            .arg(pid.to_string())
1900            .status();
1901        status.map(|s| s.success()).unwrap_or(false)
1902    }
1903}
1904
1905/// Parse a single index entry from `data`, returning `(entry, bytes_consumed)`.
1906fn parse_entry(data: &[u8], version: u32, prev_path: &[u8]) -> Result<(IndexEntry, usize)> {
1907    if data.len() < 62 {
1908        return Err(Error::IndexError("entry too short".to_owned()));
1909    }
1910
1911    let mut pos = 0;
1912
1913    macro_rules! read_u32 {
1914        () => {{
1915            let v = u32::from_be_bytes(
1916                data[pos..pos + 4]
1917                    .try_into()
1918                    .map_err(|_| Error::IndexError("truncated u32".to_owned()))?,
1919            );
1920            pos += 4;
1921            v
1922        }};
1923    }
1924
1925    let ctime_sec = read_u32!();
1926    let ctime_nsec = read_u32!();
1927    let mtime_sec = read_u32!();
1928    let mtime_nsec = read_u32!();
1929    let dev = read_u32!();
1930    let ino = read_u32!();
1931    let mode = read_u32!();
1932    let uid = read_u32!();
1933    let gid = read_u32!();
1934    let size = read_u32!();
1935
1936    let oid = ObjectId::from_bytes(&data[pos..pos + 20])?;
1937    pos += 20;
1938
1939    let flags = u16::from_be_bytes(
1940        data[pos..pos + 2]
1941            .try_into()
1942            .map_err(|_| Error::IndexError("truncated flags".to_owned()))?,
1943    );
1944    pos += 2;
1945
1946    let flags_extended = if version >= 3 && flags & 0x4000 != 0 {
1947        let fe = u16::from_be_bytes(
1948            data[pos..pos + 2]
1949                .try_into()
1950                .map_err(|_| Error::IndexError("truncated extended flags".to_owned()))?,
1951        );
1952        pos += 2;
1953        Some(fe)
1954    } else {
1955        None
1956    };
1957
1958    let path;
1959    if version == 4 {
1960        // V4: prefix-compressed path
1961        let (strip_len, varint_bytes) = read_varint(&data[pos..]);
1962        pos += varint_bytes;
1963        let nul = data[pos..]
1964            .iter()
1965            .position(|&b| b == 0)
1966            .ok_or_else(|| Error::IndexError("v4 entry path missing NUL".to_owned()))?;
1967        let suffix = &data[pos..pos + nul];
1968        pos += nul + 1;
1969        let keep = prev_path.len().saturating_sub(strip_len);
1970        let mut full_path = prev_path[..keep].to_vec();
1971        full_path.extend_from_slice(suffix);
1972        path = full_path;
1973    } else {
1974        // V2/V3: NUL-terminated full path + padding
1975        let nul = data[pos..]
1976            .iter()
1977            .position(|&b| b == 0)
1978            .ok_or_else(|| Error::IndexError("entry path missing NUL terminator".to_owned()))?;
1979        path = data[pos..pos + nul].to_vec();
1980        pos += nul + 1;
1981        let entry_start = 0usize;
1982        let entry_len = pos - entry_start;
1983        let padded = (entry_len + 7) & !7;
1984        let padding = padded.saturating_sub(entry_len);
1985        pos += padding;
1986    }
1987
1988    Ok((
1989        IndexEntry {
1990            ctime_sec,
1991            ctime_nsec,
1992            mtime_sec,
1993            mtime_nsec,
1994            dev,
1995            ino,
1996            mode,
1997            uid,
1998            gid,
1999            size,
2000            oid,
2001            flags,
2002            flags_extended,
2003            path,
2004            base_index_pos: 0,
2005        },
2006        pos,
2007    ))
2008}
2009
2010/// Serialise a single index entry into `out`.
2011/// Read a variable-length integer (git's index v4 varint encoding).
2012/// Returns (value, bytes_consumed).
2013fn write_varint(out: &mut Vec<u8>, mut value: usize) {
2014    loop {
2015        let mut b = (value & 0x7F) as u8;
2016        value >>= 7;
2017        if value != 0 {
2018            b |= 0x80;
2019        }
2020        out.push(b);
2021        if value == 0 {
2022            break;
2023        }
2024    }
2025}
2026
2027fn read_varint(data: &[u8]) -> (usize, usize) {
2028    let mut value: usize = 0;
2029    let mut shift = 0usize;
2030    let mut pos = 0;
2031    loop {
2032        if pos >= data.len() {
2033            break;
2034        }
2035        let byte = data[pos] as usize;
2036        pos += 1;
2037        value |= (byte & 0x7F) << shift;
2038        if byte & 0x80 == 0 {
2039            break;
2040        }
2041        shift += 7;
2042        // Prevent infinite loops on malformed data
2043        if shift > 28 {
2044            break;
2045        }
2046    }
2047    (value, pos)
2048}
2049
2050fn serialize_entry_v4(entry: &IndexEntry, previous_path: &mut Vec<u8>, out: &mut Vec<u8>) {
2051    let write_u32 = |out: &mut Vec<u8>, v: u32| out.extend_from_slice(&v.to_be_bytes());
2052
2053    write_u32(out, entry.ctime_sec);
2054    write_u32(out, entry.ctime_nsec);
2055    write_u32(out, entry.mtime_sec);
2056    write_u32(out, entry.mtime_nsec);
2057    write_u32(out, entry.dev);
2058    write_u32(out, entry.ino);
2059    write_u32(out, entry.mode);
2060    write_u32(out, entry.uid);
2061    write_u32(out, entry.gid);
2062    write_u32(out, entry.size);
2063    out.extend_from_slice(entry.oid.as_bytes());
2064
2065    let mut flags = entry.flags;
2066    let disk_ext = entry
2067        .flags_extended
2068        .map(IndexEntry::disk_flags_extended)
2069        .filter(|fe| *fe != 0);
2070    if disk_ext.is_some() {
2071        flags |= 0x4000;
2072    } else {
2073        flags &= !0x4000;
2074    }
2075    let path_len = entry.path.len().min(0xFFF) as u16;
2076    flags = (flags & 0xF000) | path_len;
2077    out.extend_from_slice(&flags.to_be_bytes());
2078
2079    if let Some(fe) = disk_ext {
2080        out.extend_from_slice(&fe.to_be_bytes());
2081    }
2082
2083    let common = previous_path
2084        .iter()
2085        .zip(entry.path.iter())
2086        .take_while(|(a, b)| a == b)
2087        .count();
2088    let to_remove = previous_path.len().saturating_sub(common);
2089    write_varint(out, to_remove);
2090    out.extend_from_slice(&entry.path[common..]);
2091    out.push(0);
2092
2093    previous_path.clear();
2094    previous_path.extend_from_slice(&entry.path);
2095}
2096
2097fn serialize_entry(entry: &IndexEntry, version: u32, out: &mut Vec<u8>) {
2098    let start = out.len();
2099
2100    let write_u32 = |out: &mut Vec<u8>, v: u32| out.extend_from_slice(&v.to_be_bytes());
2101
2102    write_u32(out, entry.ctime_sec);
2103    write_u32(out, entry.ctime_nsec);
2104    write_u32(out, entry.mtime_sec);
2105    write_u32(out, entry.mtime_nsec);
2106    write_u32(out, entry.dev);
2107    write_u32(out, entry.ino);
2108    write_u32(out, entry.mode);
2109    write_u32(out, entry.uid);
2110    write_u32(out, entry.gid);
2111    write_u32(out, entry.size);
2112    out.extend_from_slice(entry.oid.as_bytes());
2113
2114    // Set or clear the extended-flags bit in flags
2115    let mut flags = entry.flags;
2116    let disk_ext = entry
2117        .flags_extended
2118        .map(IndexEntry::disk_flags_extended)
2119        .filter(|fe| *fe != 0);
2120    if version >= 3 && disk_ext.is_some() {
2121        flags |= 0x4000;
2122    } else {
2123        flags &= !0x4000;
2124    }
2125    // Overwrite path length bits (bottom 12)
2126    let path_len = entry.path.len().min(0xFFF) as u16;
2127    flags = (flags & 0xF000) | path_len;
2128    out.extend_from_slice(&flags.to_be_bytes());
2129
2130    if version >= 3 {
2131        if let Some(fe) = disk_ext {
2132            out.extend_from_slice(&fe.to_be_bytes());
2133        }
2134    }
2135
2136    out.extend_from_slice(&entry.path);
2137    out.push(0);
2138
2139    // Pad to 8-byte boundary
2140    let entry_len = out.len() - start;
2141    let padded = (entry_len + 7) & !7;
2142    let padding = padded - entry_len;
2143    for _ in 0..padding {
2144        out.push(0);
2145    }
2146}
2147
2148/// Build an [`IndexEntry`] by stat-ing a file on disk.
2149///
2150/// # Parameters
2151///
2152/// - `path` — absolute path to the file.
2153/// - `rel_path` — path relative to the repo root (stored in the index).
2154/// - `oid` — the object ID of the file's blob.
2155/// - `mode` — file mode (use [`MODE_REGULAR`], [`MODE_EXECUTABLE`], etc.).
2156///
2157/// # Errors
2158///
2159/// Returns [`Error::Io`] if `stat` fails.
2160pub fn entry_from_stat(
2161    path: &Path,
2162    rel_path: &[u8],
2163    oid: ObjectId,
2164    mode: u32,
2165) -> Result<IndexEntry> {
2166    let meta = fs::symlink_metadata(path)?;
2167    Ok(entry_from_metadata(&meta, rel_path, oid, mode))
2168}
2169
2170/// Build an [`IndexEntry`] from already-obtained metadata.
2171///
2172/// This avoids a redundant `stat()` call when the caller already has
2173/// filesystem metadata (e.g. from `symlink_metadata`).
2174#[must_use]
2175pub fn entry_from_metadata(
2176    meta: &fs::Metadata,
2177    rel_path: &[u8],
2178    oid: ObjectId,
2179    mode: u32,
2180) -> IndexEntry {
2181    #[cfg(unix)]
2182    {
2183        use std::os::unix::fs::MetadataExt;
2184        IndexEntry {
2185            ctime_sec: meta.ctime() as u32,
2186            ctime_nsec: meta.ctime_nsec() as u32,
2187            mtime_sec: meta.mtime() as u32,
2188            mtime_nsec: meta.mtime_nsec() as u32,
2189            dev: meta.dev() as u32,
2190            ino: meta.ino() as u32,
2191            mode,
2192            uid: meta.uid(),
2193            gid: meta.gid(),
2194            size: meta.size() as u32,
2195            oid,
2196            flags: rel_path.len().min(0xFFF) as u16,
2197            flags_extended: None,
2198            path: rel_path.to_vec(),
2199            base_index_pos: 0,
2200        }
2201    }
2202    #[cfg(not(unix))]
2203    {
2204        use std::time::UNIX_EPOCH;
2205        let mtime = meta
2206            .modified()
2207            .ok()
2208            .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
2209            .unwrap_or_default();
2210        IndexEntry {
2211            ctime_sec: mtime.as_secs() as u32,
2212            ctime_nsec: mtime.subsec_nanos(),
2213            mtime_sec: mtime.as_secs() as u32,
2214            mtime_nsec: mtime.subsec_nanos(),
2215            dev: 0,
2216            ino: 0,
2217            mode,
2218            uid: 0,
2219            gid: 0,
2220            size: meta.len() as u32,
2221            oid,
2222            flags: rel_path.len().min(0xFFF) as u16,
2223            flags_extended: None,
2224            path: rel_path.to_vec(),
2225            base_index_pos: 0,
2226        }
2227    }
2228}
2229
2230/// Convert a `stat` mode to the Git index mode, normalised to one of the
2231/// known constants ([`MODE_REGULAR`], [`MODE_EXECUTABLE`], [`MODE_SYMLINK`]).
2232///
2233/// Only the `S_IFMT` and execute bits are inspected; all other permission bits
2234/// are discarded (Git stores only 644 or 755 for regular files).
2235///
2236/// # Parameters
2237///
2238/// - `raw_mode` — the raw `st_mode` value from `stat(2)`.
2239#[must_use]
2240pub fn normalize_mode(raw_mode: u32) -> u32 {
2241    const S_IFMT: u32 = 0o170000;
2242    const S_IFLNK: u32 = 0o120000;
2243    const S_IFREG: u32 = 0o100000;
2244
2245    let fmt = raw_mode & S_IFMT;
2246    if fmt == S_IFLNK {
2247        return MODE_SYMLINK;
2248    }
2249    if fmt == S_IFREG {
2250        // Executable if any execute bit is set
2251        if raw_mode & 0o111 != 0 {
2252            return MODE_EXECUTABLE;
2253        }
2254        return MODE_REGULAR;
2255    }
2256    // Fallback for everything else (devices, etc.) — treat as regular
2257    MODE_REGULAR
2258}
2259
2260#[cfg(test)]
2261mod tests {
2262    #![allow(clippy::expect_used, clippy::unwrap_used)]
2263
2264    use super::*;
2265    use tempfile::TempDir;
2266
2267    fn dummy_oid() -> ObjectId {
2268        ObjectId::from_bytes(&[0u8; 20]).unwrap()
2269    }
2270
2271    fn make_entry(path: &str) -> IndexEntry {
2272        IndexEntry {
2273            ctime_sec: 0,
2274            ctime_nsec: 0,
2275            mtime_sec: 0,
2276            mtime_nsec: 0,
2277            dev: 0,
2278            ino: 0,
2279            mode: MODE_REGULAR,
2280            uid: 0,
2281            gid: 0,
2282            size: 0,
2283            oid: dummy_oid(),
2284            flags: path.len().min(0xFFF) as u16,
2285            flags_extended: None,
2286            path: path.as_bytes().to_vec(),
2287            base_index_pos: 0,
2288        }
2289    }
2290
2291    #[test]
2292    fn round_trip_empty_index() {
2293        let dir = TempDir::new().unwrap();
2294        let path = dir.path().join("index");
2295
2296        let idx = Index::new();
2297        idx.write(&path).unwrap();
2298
2299        let loaded = Index::load(&path).unwrap();
2300        assert_eq!(loaded.entries.len(), 0);
2301    }
2302
2303    #[test]
2304    fn round_trip_with_entries() {
2305        let dir = TempDir::new().unwrap();
2306        let path = dir.path().join("index");
2307
2308        let mut idx = Index::new();
2309        idx.add_or_replace(make_entry("foo.txt"));
2310        idx.add_or_replace(make_entry("bar/baz.txt"));
2311        idx.write(&path).unwrap();
2312
2313        let loaded = Index::load(&path).unwrap();
2314        assert_eq!(loaded.entries.len(), 2);
2315        assert_eq!(loaded.entries[0].path, b"bar/baz.txt");
2316        assert_eq!(loaded.entries[1].path, b"foo.txt");
2317    }
2318
2319    #[test]
2320    fn remove_descendants_under_path_drops_nested_only() {
2321        let mut idx = Index::new();
2322        idx.add_or_replace(make_entry("d/e"));
2323        idx.add_or_replace(make_entry("d-other"));
2324        idx.add_or_replace(make_entry("prefix/d"));
2325        idx.remove_descendants_under_path("d");
2326        let paths: Vec<_> = idx.entries.iter().map(|e| e.path.as_slice()).collect();
2327        assert_eq!(paths, vec![b"d-other".as_slice(), b"prefix/d".as_slice()]);
2328    }
2329
2330    #[test]
2331    fn requested_v4_writes_v4_on_disk() {
2332        let dir = TempDir::new().unwrap();
2333        let path = dir.path().join("index");
2334
2335        let mut idx = Index {
2336            version: 4,
2337            ..Index::default()
2338        };
2339        idx.add_or_replace(make_entry("one"));
2340        idx.add_or_replace(make_entry("two/one"));
2341        idx.write(&path).unwrap();
2342
2343        let data = fs::read(&path).unwrap();
2344        assert_eq!(&data[4..8], &4u32.to_be_bytes());
2345
2346        let loaded = Index::load(&path).unwrap();
2347        assert_eq!(loaded.version, 4);
2348        assert_eq!(loaded.entries[0].path, b"one");
2349        assert_eq!(loaded.entries[1].path, b"two/one");
2350    }
2351}