Skip to main content

grit_lib/
index.rs

1//! Git index (staging area) reading and writing.
2//!
3//! The index file (`.git/index`) stores the current state of the staging area.
4//! It uses a binary format with a 12-byte header, fixed-size index entries,
5//! and optional extensions, followed by a trailing SHA-1 over the whole file.
6//!
7//! # Format version
8//!
9//! This implementation supports index versions 2 and 3. Requests for version 4
10//! currently fall back to a non-compressed index on write because path
11//! compression is not yet implemented.
12//!
13//! # References
14//!
15//! See `Documentation/technical/index-format.txt` in the Git source tree for
16//! the authoritative format specification.
17
18use std::collections::{BTreeMap, BTreeSet};
19use std::fs;
20use std::io::{self, Write};
21use std::path::Path;
22
23use sha1::{Digest, Sha1};
24
25use crate::config::ConfigSet;
26use crate::error::{Error, Result};
27use crate::objects::{parse_tree, ObjectId, ObjectKind, TreeEntry};
28use crate::odb::Odb;
29use crate::repo::Repository;
30use crate::resolve_undo::{self, write_resolve_undo_payload, ResolveUndoRecord};
31use crate::rev_parse;
32use crate::untracked_cache;
33
34/// File mode for a regular (non-executable) file.
35pub const MODE_REGULAR: u32 = 0o100644;
36/// File mode for an executable file.
37pub const MODE_EXECUTABLE: u32 = 0o100755;
38/// File mode for a symbolic link.
39pub const MODE_SYMLINK: u32 = 0o120000;
40/// File mode for a gitlink (submodule).
41pub const MODE_GITLINK: u32 = 0o160000;
42/// File mode for a directory (tree) entry — only used in tree objects, not index.
43pub const MODE_TREE: u32 = 0o040000;
44
45/// Git index extension signature `sdir` (sparse directory entries present).
46const INDEX_EXT_SPARSE_DIRECTORIES: u32 = u32::from_be_bytes(*b"sdir");
47/// Git index extension signature `UNTR` (untracked cache).
48const INDEX_EXT_UNTRACKED: u32 = u32::from_be_bytes(*b"UNTR");
49/// Git index extension signature `FSMN` (fsmonitor).
50const INDEX_EXT_FSMONITOR: u32 = u32::from_be_bytes(*b"FSMN");
51/// Git index extension signature `REUC` (resolve undo).
52const INDEX_EXT_RESOLVE_UNDO: u32 = u32::from_be_bytes(*b"REUC");
53/// Git index extension signature `link` (split index).
54const INDEX_EXT_LINK: u32 = u32::from_be_bytes(*b"link");
55
56/// A single entry in the Git index.
57#[derive(Debug, Clone, PartialEq, Eq)]
58pub struct IndexEntry {
59    /// Time the file metadata last changed (seconds since epoch).
60    pub ctime_sec: u32,
61    /// Nanosecond fraction of `ctime_sec`.
62    pub ctime_nsec: u32,
63    /// Time the file data last changed (seconds since epoch).
64    pub mtime_sec: u32,
65    /// Nanosecond fraction of `mtime_sec`.
66    pub mtime_nsec: u32,
67    /// Device number.
68    pub dev: u32,
69    /// Inode number.
70    pub ino: u32,
71    /// Unix file mode (`MODE_REGULAR`, `MODE_EXECUTABLE`, `MODE_SYMLINK`, …).
72    pub mode: u32,
73    /// Owner UID.
74    pub uid: u32,
75    /// Owner GID.
76    pub gid: u32,
77    /// File size in bytes (truncated to 32 bits).
78    pub size: u32,
79    /// SHA-1 of the blob object.
80    pub oid: ObjectId,
81    /// Entry flags (stage, assume-valid, extended, …).
82    pub flags: u16,
83    /// Extended flags (v3+ only).
84    pub flags_extended: Option<u16>,
85    /// Path relative to the repository root.  May contain `/` separators.
86    pub path: Vec<u8>,
87    /// Split index: position in shared base (1-based), or 0 if not from shared index.
88    pub base_index_pos: u32,
89}
90
91impl IndexEntry {
92    /// Merge stage (0 = normal, 1–3 = conflict stages).
93    #[must_use]
94    pub fn stage(&self) -> u8 {
95        ((self.flags >> 12) & 0x3) as u8
96    }
97
98    pub(crate) fn set_stage(&mut self, stage: u8) {
99        self.flags = (self.flags & 0x0FFF) | ((stage as u16 & 0x3) << 12);
100    }
101
102    /// Whether the assume-unchanged bit is set.
103    #[must_use]
104    pub fn assume_unchanged(&self) -> bool {
105        self.flags & 0x8000 != 0
106    }
107
108    /// Whether the skip-worktree bit is set (extended flags, v3+).
109    #[must_use]
110    pub fn skip_worktree(&self) -> bool {
111        self.flags_extended
112            .map(|f| f & 0x4000 != 0)
113            .unwrap_or(false)
114    }
115
116    /// Set the assume-unchanged bit.
117    pub fn set_assume_unchanged(&mut self, value: bool) {
118        if value {
119            self.flags |= 0x8000;
120        } else {
121            self.flags &= !0x8000;
122        }
123    }
124
125    /// Set the skip-worktree bit (promotes entry to v3).
126    pub fn set_skip_worktree(&mut self, value: bool) {
127        let fe = self.flags_extended.get_or_insert(0);
128        if value {
129            *fe |= 0x4000;
130        } else {
131            *fe &= !0x4000;
132            if *fe == 0 {
133                self.flags_extended = None;
134            }
135        }
136    }
137
138    /// Whether the intent-to-add bit is set (extended flags, v3+).
139    #[must_use]
140    pub fn intent_to_add(&self) -> bool {
141        self.flags_extended
142            .map(|f| f & 0x2000 != 0)
143            .unwrap_or(false)
144    }
145
146    /// Set the intent-to-add bit (promotes entry to v3).
147    pub fn set_intent_to_add(&mut self, value: bool) {
148        let fe = self.flags_extended.get_or_insert(0);
149        if value {
150            *fe |= 0x2000;
151        } else {
152            *fe &= !0x2000;
153            if *fe == 0 {
154                self.flags_extended = None;
155            }
156        }
157    }
158
159    /// Sparse-index placeholder: tree mode, stage 0, and `SKIP_WORKTREE` set.
160    #[must_use]
161    pub fn is_sparse_directory_placeholder(&self) -> bool {
162        self.mode == MODE_TREE && self.stage() == 0 && self.skip_worktree()
163    }
164
165    /// In-memory only: `ls-files --with-tree` hides stage-1 overlay rows that duplicate stage 0.
166    const FLAG_EXT_OVERLAY_TREE_SKIP: u16 = 0x8000;
167    /// In-memory and on-disk compatibility bit for fsmonitor validity (`git ls-files -f`).
168    const FLAG_EXT_FSMONITOR_VALID: u16 = 0x1000;
169
170    #[must_use]
171    pub fn overlay_tree_skip_output(&self) -> bool {
172        self.flags_extended
173            .is_some_and(|fe| fe & Self::FLAG_EXT_OVERLAY_TREE_SKIP != 0)
174    }
175
176    fn set_overlay_tree_skip_output(&mut self, value: bool) {
177        let fe = self.flags_extended.get_or_insert(0);
178        if value {
179            *fe |= Self::FLAG_EXT_OVERLAY_TREE_SKIP;
180        } else {
181            *fe &= !Self::FLAG_EXT_OVERLAY_TREE_SKIP;
182            if *fe == 0 {
183                self.flags_extended = None;
184            }
185        }
186    }
187
188    /// Whether the fsmonitor-valid bit is set.
189    #[must_use]
190    pub fn fsmonitor_valid(&self) -> bool {
191        self.flags_extended
192            .is_some_and(|fe| fe & Self::FLAG_EXT_FSMONITOR_VALID != 0)
193    }
194
195    /// Set or clear the fsmonitor-valid bit.
196    pub fn set_fsmonitor_valid(&mut self, value: bool) {
197        let fe = self.flags_extended.get_or_insert(0);
198        if value {
199            *fe |= Self::FLAG_EXT_FSMONITOR_VALID;
200        } else {
201            *fe &= !Self::FLAG_EXT_FSMONITOR_VALID;
202            if *fe == 0 {
203                self.flags_extended = None;
204            }
205        }
206    }
207}
208
209/// The in-memory representation of the Git index file.
210#[derive(Debug, Clone, Default)]
211pub struct Index {
212    /// Index format version (2 or 3).
213    pub version: u32,
214    /// Index entries, sorted by (path, stage).
215    pub entries: Vec<IndexEntry>,
216    /// When true, the on-disk index includes the `sdir` extension (sparse index).
217    pub sparse_directories: bool,
218    /// Optional untracked-cache extension (`UNTR`), matching Git's `istate->untracked`.
219    pub untracked_cache: Option<untracked_cache::UntrackedCache>,
220    /// Optional fsmonitor token extension (`FSMN`).
221    pub fsmonitor_last_update: Option<String>,
222    /// Optional `REUC` resolve-undo extension (paths that were unmerged before a resolution).
223    pub resolve_undo: Option<BTreeMap<Vec<u8>, ResolveUndoRecord>>,
224    /// Split index `link` extension (bitmaps cleared after load merge).
225    pub(crate) split_link: Option<crate::split_index::SplitIndexLink>,
226    /// Root tree OID from a valid `TREE` index extension (`cache_tree`), when present.
227    pub cache_tree_root: Option<ObjectId>,
228}
229
230/// Options for loading an index from disk.
231#[derive(Debug, Clone, Copy)]
232pub struct IndexLoadOptions {
233    /// If the index contains sparse directory placeholders, expand them to full file entries.
234    pub expand_sparse_directories: bool,
235}
236
237impl Default for IndexLoadOptions {
238    fn default() -> Self {
239        Self {
240            expand_sparse_directories: true,
241        }
242    }
243}
244
245/// Version used after an invalid `GIT_INDEX_VERSION` value (matches Git stderr: "Using version 3").
246const INDEX_ENV_INVALID_FALLBACK: u32 = 3;
247/// Version used after an invalid `index.version` config value (same message as env).
248const INDEX_CONFIG_INVALID_FALLBACK: u32 = 3;
249/// Minimum supported index version.
250const INDEX_FORMAT_LB: u32 = 2;
251/// Maximum supported index version (version 4 requests are accepted and
252/// downgraded on write).
253const INDEX_FORMAT_UB: u32 = 4;
254/// Index extension signature `TREE` (cache-tree).
255const INDEX_EXT_CACHE_TREE: u32 = 0x5452_4545;
256
257/// Best-effort read of the root OID from Git's `TREE` index extension (`cache_tree_read`).
258fn parse_cache_tree_root_oid(data: &[u8]) -> Option<ObjectId> {
259    // Root node: leading NUL name (`read_one` skips NUL-terminated name; empty name is one byte).
260    if data.first().copied()? != 0 {
261        return None;
262    }
263    let mut i = 1usize;
264    let (entry_count, ni) = parse_signed_int_prefix(&data[i..])?;
265    i += ni;
266    if data.get(i) != Some(&b' ') {
267        return None;
268    }
269    i += 1;
270    let (_subtree_nr, ni2) = parse_signed_int_prefix(&data[i..])?;
271    i += ni2;
272    if data.get(i) != Some(&b'\n') {
273        return None;
274    }
275    i += 1;
276    if entry_count < 0 {
277        return None;
278    }
279    if data.len().saturating_sub(i) < 20 {
280        return None;
281    }
282    let raw: [u8; 20] = data[i..i + 20].try_into().ok()?;
283    ObjectId::from_bytes(&raw).ok()
284}
285
286fn parse_signed_int_prefix(data: &[u8]) -> Option<(i32, usize)> {
287    let mut j = 0usize;
288    while j < data.len() && data[j] == b' ' {
289        j += 1;
290    }
291    let start = j;
292    if j < data.len() && data[j] == b'-' {
293        j += 1;
294    }
295    let digit_start = j;
296    while j < data.len() && data[j].is_ascii_digit() {
297        j += 1;
298    }
299    if j == digit_start {
300        return None;
301    }
302    let s = std::str::from_utf8(&data[start..j]).ok()?;
303    let v: i32 = s.parse().ok()?;
304    Some((v, j))
305}
306
307/// Read `GIT_INDEX_VERSION` and return the requested version.
308///
309/// If the environment variable is unset, returns `None`.
310/// If it is set but invalid (non-numeric or out of range 2..=4), prints a
311/// warning to stderr and returns the default version.
312pub fn get_index_format_from_env() -> Option<u32> {
313    let val = std::env::var("GIT_INDEX_VERSION").ok()?;
314    if val.is_empty() {
315        return None;
316    }
317    match val.parse::<u32>() {
318        Ok(v) if (INDEX_FORMAT_LB..=INDEX_FORMAT_UB).contains(&v) => Some(v),
319        _ => {
320            eprintln!(
321                "warning: GIT_INDEX_VERSION set, but the value is invalid.\n\
322                 Using version {INDEX_ENV_INVALID_FALLBACK}"
323            );
324            Some(INDEX_ENV_INVALID_FALLBACK)
325        }
326    }
327}
328
329impl Index {
330    /// Create a new, empty index.
331    ///
332    /// Respects `GIT_INDEX_VERSION` if set, otherwise defaults to version 2.
333    #[must_use]
334    pub fn new() -> Self {
335        let version = get_index_format_from_env().unwrap_or(2);
336        Self {
337            version,
338            entries: Vec::new(),
339            sparse_directories: false,
340            untracked_cache: None,
341            fsmonitor_last_update: None,
342            resolve_undo: None,
343            split_link: None,
344            cache_tree_root: None,
345        }
346    }
347
348    /// Create a new empty index, respecting config values for version.
349    ///
350    /// Priority matches Git's `prepare_repo_settings`: `GIT_INDEX_VERSION` env, then
351    /// `feature.manyFiles` (implies version 4), then `index.version` (overrides version).
352    pub fn new_with_config(
353        config_index_version: Option<&str>,
354        config_many_files: Option<&str>,
355    ) -> Self {
356        if let Some(v) = get_index_format_from_env() {
357            return Self {
358                version: v,
359                entries: Vec::new(),
360                sparse_directories: false,
361                untracked_cache: None,
362                fsmonitor_last_update: None,
363                resolve_undo: None,
364                split_link: None,
365                cache_tree_root: None,
366            };
367        }
368
369        let many_files = config_truthy(config_many_files);
370        let mut version = if many_files { 4 } else { 2 };
371
372        if let Some(val) = config_index_version {
373            let trimmed = val.trim();
374            if !trimmed.is_empty() {
375                match trimmed.parse::<u32>() {
376                    Ok(v) if (INDEX_FORMAT_LB..=INDEX_FORMAT_UB).contains(&v) => {
377                        version = v;
378                    }
379                    _ => {
380                        eprintln!(
381                            "warning: index.version set, but the value is invalid.\n\
382                             Using version {INDEX_CONFIG_INVALID_FALLBACK}"
383                        );
384                        version = INDEX_CONFIG_INVALID_FALLBACK;
385                    }
386                }
387            }
388        }
389
390        Self {
391            version,
392            entries: Vec::new(),
393            sparse_directories: false,
394            untracked_cache: None,
395            fsmonitor_last_update: None,
396            resolve_undo: None,
397            split_link: None,
398            cache_tree_root: None,
399        }
400    }
401
402    /// New empty index using a loaded [`ConfigSet`] (includes `-c` / `GIT_CONFIG_PARAMETERS`).
403    ///
404    /// Same precedence as [`Self::new_with_config`], but reads `feature.manyFiles` and
405    /// `index.version` from `config`.
406    #[must_use]
407    pub fn new_from_config(config: &ConfigSet) -> Self {
408        if let Some(v) = get_index_format_from_env() {
409            return Self {
410                version: v,
411                entries: Vec::new(),
412                sparse_directories: false,
413                untracked_cache: None,
414                fsmonitor_last_update: None,
415                resolve_undo: None,
416                split_link: None,
417                cache_tree_root: None,
418            };
419        }
420
421        let many_files = config
422            .get_bool("feature.manyFiles")
423            .and_then(|r| r.ok())
424            .unwrap_or(false);
425        let mut version = if many_files { 4 } else { 2 };
426
427        if let Some(val) = config.get("index.version") {
428            let trimmed = val.trim();
429            if !trimmed.is_empty() {
430                match trimmed.parse::<u32>() {
431                    Ok(v) if (INDEX_FORMAT_LB..=INDEX_FORMAT_UB).contains(&v) => {
432                        version = v;
433                    }
434                    _ => {
435                        eprintln!(
436                            "warning: index.version set, but the value is invalid.\n\
437                             Using version {INDEX_CONFIG_INVALID_FALLBACK}"
438                        );
439                        version = INDEX_CONFIG_INVALID_FALLBACK;
440                    }
441                }
442            }
443        }
444
445        Self {
446            version,
447            entries: Vec::new(),
448            sparse_directories: false,
449            untracked_cache: None,
450            fsmonitor_last_update: None,
451            resolve_undo: None,
452            split_link: None,
453            cache_tree_root: None,
454        }
455    }
456
457    /// Load an index from the given file path without expanding sparse-directory placeholders.
458    ///
459    /// Returns an empty index if the file does not exist.
460    ///
461    /// # Errors
462    ///
463    /// Returns [`Error::IndexError`] if the file is present but corrupt.
464    pub fn load(path: &Path) -> Result<Self> {
465        match fs::read(path) {
466            Ok(data) => Self::parse(&data),
467            Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(Self {
468                sparse_directories: false,
469                ..Self::new()
470            }),
471            Err(e) => Err(Error::Io(e)),
472        }
473    }
474
475    /// Load an index and expand sparse-directory placeholders using the object database.
476    ///
477    /// After a successful return, [`Index::sparse_directories`] is cleared and every
478    /// placeholder is replaced by the blob entries from the referenced tree.
479    pub fn load_expand_sparse(path: &Path, odb: &Odb) -> Result<Self> {
480        let mut idx = Self::load(path)?;
481        idx.expand_sparse_directory_placeholders(odb)?;
482        Ok(idx)
483    }
484
485    /// Like [`Index::load_expand_sparse`], but treats a missing index or Git's
486    /// `"file too short"` placeholder as an empty index.
487    pub fn load_expand_sparse_optional(path: &Path, odb: &Odb) -> Result<Self> {
488        let mut idx = match fs::read(path) {
489            Ok(data) => Self::parse(&data).or_else(|e| match e {
490                Error::IndexError(msg) if msg == "file too short" => Ok(Self::new()),
491                other => Err(other),
492            })?,
493            Err(e) if e.kind() == io::ErrorKind::NotFound => Self::new(),
494            Err(e) => return Err(Error::Io(e)),
495        };
496        idx.expand_sparse_directory_placeholders(odb)?;
497        Ok(idx)
498    }
499
500    /// Returns true if the index contains sparse-index tree placeholders (`MODE_TREE` + skip-worktree).
501    #[must_use]
502    pub fn has_sparse_directory_placeholders(&self) -> bool {
503        self.entries
504            .iter()
505            .any(IndexEntry::is_sparse_directory_placeholder)
506    }
507
508    /// Replace sparse-directory placeholder entries with all blob paths from their trees.
509    ///
510    /// Each placeholder must reference a tree object. New entries are marked skip-worktree like Git's
511    /// expanded index, except we keep `sparse_directories` false in memory after expansion.
512    pub fn expand_sparse_directory_placeholders(&mut self, odb: &Odb) -> Result<()> {
513        if !self.has_sparse_directory_placeholders() {
514            return Ok(());
515        }
516        let mut out: Vec<IndexEntry> = Vec::with_capacity(self.entries.len());
517        for entry in self.entries.drain(..) {
518            if entry.is_sparse_directory_placeholder() {
519                let prefix = trim_trailing_slash_bytes(&entry.path);
520                let blobs = flatten_tree_blobs(odb, &entry.oid, prefix)?;
521                out.extend(blobs);
522            } else {
523                out.push(entry);
524            }
525        }
526        self.entries = out;
527        self.sparse_directories = false;
528        self.sort();
529        Ok(())
530    }
531
532    /// Collapse consecutive skip-worktree subtrees into sparse-directory placeholders when
533    /// `cone_mode` is true and each directory is outside the sparse cone.
534    ///
535    /// `head_tree` is the tree OID at `HEAD`. When `enable_sparse_index` is false, clears
536    /// [`Index::sparse_directories`] and returns without collapsing.
537    pub fn try_collapse_sparse_directories(
538        &mut self,
539        odb: &Odb,
540        head_tree: &ObjectId,
541        patterns: &[String],
542        cone_mode: bool,
543        enable_sparse_index: bool,
544    ) -> Result<()> {
545        if !enable_sparse_index || !cone_mode {
546            self.sparse_directories = false;
547            return Ok(());
548        }
549
550        let mut prefixes = BTreeSet::<Vec<u8>>::new();
551        for e in &self.entries {
552            if e.stage() != 0 || e.mode == MODE_TREE || !e.skip_worktree() {
553                continue;
554            }
555            collect_directory_prefixes(&e.path, &mut prefixes);
556        }
557
558        let mut collapsed_any = false;
559        // Deepest prefixes first so nested dirs collapse before parents.
560        let mut ordered: Vec<Vec<u8>> = prefixes.into_iter().collect();
561        ordered.sort_by_key(|p| std::cmp::Reverse(p.len()));
562
563        for pref in ordered {
564            let pref_str = String::from_utf8_lossy(&pref);
565            if directory_in_cone(&pref_str, patterns, cone_mode) {
566                continue;
567            }
568            let Some(subtree_oid) = tree_oid_for_prefix(odb, head_tree, &pref)? else {
569                continue;
570            };
571            let expected = collect_sparse_aware_expected_blobs(
572                odb,
573                &subtree_oid,
574                &pref,
575                patterns,
576                cone_mode,
577                &self.entries,
578            )?;
579            if expected.is_empty() {
580                continue;
581            }
582            let mut matched = Vec::new();
583            for e in &self.entries {
584                if e.stage() != 0 {
585                    continue;
586                }
587                if path_under_prefix(&e.path, &pref) && e.mode != MODE_TREE {
588                    matched.push(e.clone());
589                }
590            }
591            if matched.len() != expected.len() {
592                continue;
593            }
594            matched.sort_by(|a, b| a.path.cmp(&b.path));
595            let mut exp_sorted = expected;
596            exp_sorted.sort_by(|a, b| a.path.cmp(&b.path));
597            if !matched
598                .iter()
599                .zip(exp_sorted.iter())
600                .all(|(a, b)| a.path == b.path && a.oid == b.oid && a.mode == b.mode)
601            {
602                continue;
603            }
604            if !matched.iter().all(|e| e.skip_worktree()) {
605                continue;
606            }
607
608            let mut path_with_slash = pref.clone();
609            if !path_with_slash.ends_with(b"/") {
610                path_with_slash.push(b'/');
611            }
612            self.entries
613                .retain(|e| e.stage() != 0 || !path_under_prefix(&e.path, &pref));
614            let mut placeholder = IndexEntry {
615                ctime_sec: 0,
616                ctime_nsec: 0,
617                mtime_sec: 0,
618                mtime_nsec: 0,
619                dev: 0,
620                ino: 0,
621                mode: MODE_TREE,
622                uid: 0,
623                gid: 0,
624                size: 0,
625                oid: subtree_oid,
626                flags: path_with_slash.len().min(0xFFF) as u16,
627                flags_extended: Some(0),
628                path: path_with_slash,
629                base_index_pos: 0,
630            };
631            placeholder.set_skip_worktree(true);
632            self.add_or_replace(placeholder);
633            collapsed_any = true;
634        }
635
636        if collapsed_any {
637            self.sort();
638            self.sparse_directories = true;
639        } else {
640            self.sparse_directories = false;
641        }
642        Ok(())
643    }
644
645    /// Parse index bytes (the whole file including trailing SHA-1).
646    ///
647    /// # Errors
648    ///
649    /// Returns [`Error::IndexError`] on structural problems.
650    pub fn parse(data: &[u8]) -> Result<Self> {
651        if data.len() < 12 {
652            return Err(Error::IndexError("file too short".to_owned()));
653        }
654
655        // Trailing SHA-1: normal index is a hash of the body; Git may write all zeros when
656        // `index.skipHash` / `feature.manyFiles` skips computing the checksum.
657        let (body, checksum) = data.split_at(data.len() - 20);
658        if !checksum.iter().all(|&b| b == 0) {
659            let mut hasher = Sha1::new();
660            hasher.update(body);
661            let computed = hasher.finalize();
662            if computed.as_slice() != checksum {
663                return Err(Error::IndexError("SHA-1 checksum mismatch".to_owned()));
664            }
665        }
666
667        // Header
668        let magic = &body[..4];
669        if magic != b"DIRC" {
670            return Err(Error::IndexError("bad magic: expected DIRC".to_owned()));
671        }
672        let version = u32::from_be_bytes(
673            body[4..8]
674                .try_into()
675                .map_err(|_| Error::IndexError("cannot read version".to_owned()))?,
676        );
677        if version != 2 && version != 3 && version != 4 {
678            return Err(Error::IndexError(format!(
679                "unsupported index version {version}"
680            )));
681        }
682        let count = u32::from_be_bytes(
683            body[8..12]
684                .try_into()
685                .map_err(|_| Error::IndexError("cannot read entry count".to_owned()))?,
686        );
687
688        let mut pos = 12usize;
689        let mut entries = Vec::with_capacity(count as usize);
690
691        let mut prev_path: Vec<u8> = Vec::new();
692        for _ in 0..count {
693            let (entry, consumed) = parse_entry(&body[pos..], version, &prev_path)?;
694            prev_path = entry.path.clone();
695            entries.push(entry);
696            pos += consumed;
697        }
698
699        let mut sparse_directories = false;
700        let mut untracked_cache = None;
701        let mut fsmonitor_last_update = None;
702        let mut resolve_undo = None;
703        let mut split_link = None;
704        let mut cache_tree_root = None;
705        while pos + 8 <= body.len() {
706            let sig = u32::from_be_bytes(
707                body[pos..pos + 4]
708                    .try_into()
709                    .map_err(|_| Error::IndexError("truncated extension sig".to_owned()))?,
710            );
711            let ext_sz = u32::from_be_bytes(
712                body[pos + 4..pos + 8]
713                    .try_into()
714                    .map_err(|_| Error::IndexError("truncated extension size".to_owned()))?,
715            ) as usize;
716            pos += 8;
717            if pos + ext_sz > body.len() {
718                return Err(Error::IndexError(
719                    "extension overruns index body".to_owned(),
720                ));
721            }
722            if sig == INDEX_EXT_SPARSE_DIRECTORIES {
723                sparse_directories = true;
724            } else if sig == INDEX_EXT_UNTRACKED {
725                let ext_data = &body[pos..pos + ext_sz];
726                untracked_cache = untracked_cache::parse_untracked_extension(ext_data);
727            } else if sig == INDEX_EXT_FSMONITOR {
728                let ext_data = &body[pos..pos + ext_sz];
729                let token_bytes = if let Some(nul) = ext_data.iter().position(|&b| b == 0) {
730                    &ext_data[..nul]
731                } else {
732                    ext_data
733                };
734                fsmonitor_last_update = Some(String::from_utf8_lossy(token_bytes).into_owned());
735            } else if sig == INDEX_EXT_RESOLVE_UNDO {
736                let ext_data = &body[pos..pos + ext_sz];
737                resolve_undo = Some(resolve_undo::parse_resolve_undo_payload(ext_data)?);
738            } else if sig == INDEX_EXT_LINK {
739                let ext_data = &body[pos..pos + ext_sz];
740                split_link = Some(crate::split_index::parse_link_extension(ext_data)?);
741            } else if sig == INDEX_EXT_CACHE_TREE {
742                let ext_data = &body[pos..pos + ext_sz];
743                cache_tree_root = parse_cache_tree_root_oid(ext_data);
744            }
745            pos += ext_sz;
746        }
747        if pos != body.len() {
748            return Err(Error::IndexError("junk after index extensions".to_owned()));
749        }
750
751        Ok(Self {
752            version,
753            entries,
754            sparse_directories,
755            untracked_cache,
756            fsmonitor_last_update,
757            resolve_undo,
758            split_link,
759            cache_tree_root,
760        })
761    }
762
763    /// Write the index to a file, computing and appending the trailing SHA-1.
764    ///
765    /// # Errors
766    ///
767    /// Returns [`Error::Io`] on filesystem errors.
768    pub fn write(&self, path: &Path) -> Result<()> {
769        let git_dir = path.parent();
770        let config = git_dir.and_then(|d| ConfigSet::load(Some(d), true).ok());
771        let skip_hash = index_skip_hash_for_write(config.as_ref());
772        self.write_to_path(path, skip_hash)
773    }
774
775    /// Write this index to `path` with an explicit trailing-checksum policy.
776    ///
777    /// When `skip_hash` is true, the trailing SHA-1 is written as all zeros (Git `index.skipHash`).
778    pub fn write_to_path(&self, path: &Path, skip_hash: bool) -> Result<()> {
779        let mut sorted = self.clone();
780        sorted.sort();
781
782        let mut body = Vec::new();
783        sorted.serialize_into(&mut body)?;
784
785        let checksum: [u8; 20] = if skip_hash {
786            [0u8; 20]
787        } else {
788            let mut hasher = Sha1::new();
789            hasher.update(&body);
790            hasher.finalize().into()
791        };
792
793        let tmp_path = path.with_extension("lock");
794        let pid_path = pid_path_for_lock(&tmp_path);
795        let lockfile_pid_enabled = lockfile_pid_enabled(path);
796
797        let mut lock_file = match fs::OpenOptions::new()
798            .write(true)
799            .create_new(true)
800            .open(&tmp_path)
801        {
802            Ok(file) => file,
803            Err(e) if e.kind() == io::ErrorKind::AlreadyExists => {
804                let message = build_lock_exists_message(&tmp_path, &pid_path, &e);
805                return Err(Error::Io(io::Error::new(
806                    io::ErrorKind::AlreadyExists,
807                    message,
808                )));
809            }
810            Err(e) => return Err(Error::Io(e)),
811        };
812
813        let mut wrote_pid_file = false;
814        if lockfile_pid_enabled {
815            if let Err(e) = write_lock_pid_file(&pid_path) {
816                let _ = fs::remove_file(&tmp_path);
817                return Err(Error::Io(e));
818            }
819            wrote_pid_file = true;
820        }
821
822        if let Err(e) = (|| -> io::Result<()> {
823            lock_file.write_all(&body)?;
824            lock_file.write_all(&checksum)?;
825            Ok(())
826        })() {
827            let _ = fs::remove_file(&tmp_path);
828            if wrote_pid_file {
829                let _ = fs::remove_file(&pid_path);
830            }
831            return Err(Error::Io(e));
832        }
833        drop(lock_file);
834
835        if let Err(e) = fs::rename(&tmp_path, path) {
836            let _ = fs::remove_file(&tmp_path);
837            if wrote_pid_file {
838                let _ = fs::remove_file(&pid_path);
839            }
840            return Err(Error::Io(e));
841        }
842        {
843            if wrote_pid_file {
844                let _ = fs::remove_file(&pid_path);
845            }
846        }
847        Ok(())
848    }
849
850    /// Serialise the index body (without trailing checksum) into `out`.
851    ///
852    /// Callers must have sorted entries when using format 4 (path compression depends on order).
853    pub(crate) fn serialize_into(&self, out: &mut Vec<u8>) -> Result<()> {
854        let has_extended_flags = self.entries.iter().any(|e| e.flags_extended.is_some());
855        let write_version = if self.version >= 4 {
856            4
857        } else if has_extended_flags {
858            3
859        } else if self.version >= 3 {
860            2
861        } else {
862            self.version
863        };
864        // Header
865        out.extend_from_slice(b"DIRC");
866        out.extend_from_slice(&write_version.to_be_bytes());
867        out.extend_from_slice(&(self.entries.len() as u32).to_be_bytes());
868
869        if write_version == 4 {
870            let mut previous_path: Vec<u8> = Vec::new();
871            for entry in &self.entries {
872                serialize_entry_v4(entry, &mut previous_path, out);
873            }
874        } else {
875            for entry in &self.entries {
876                serialize_entry(entry, write_version, out);
877            }
878        }
879        if self.sparse_directories {
880            out.extend_from_slice(&INDEX_EXT_SPARSE_DIRECTORIES.to_be_bytes());
881            out.extend_from_slice(&0u32.to_be_bytes());
882        }
883        if let Some(uc) = &self.untracked_cache {
884            let payload = untracked_cache::write_untracked_extension(uc);
885            out.extend_from_slice(&INDEX_EXT_UNTRACKED.to_be_bytes());
886            out.extend_from_slice(&(payload.len() as u32).to_be_bytes());
887            out.extend_from_slice(&payload);
888        }
889        if let Some(token) = &self.fsmonitor_last_update {
890            let mut payload = token.as_bytes().to_vec();
891            payload.push(0);
892            out.extend_from_slice(&INDEX_EXT_FSMONITOR.to_be_bytes());
893            out.extend_from_slice(&(payload.len() as u32).to_be_bytes());
894            out.extend_from_slice(&payload);
895        }
896        if let Some(ru) = &self.resolve_undo {
897            let payload = write_resolve_undo_payload(ru);
898            if !payload.is_empty() {
899                out.extend_from_slice(&INDEX_EXT_RESOLVE_UNDO.to_be_bytes());
900                out.extend_from_slice(&(payload.len() as u32).to_be_bytes());
901                out.extend_from_slice(&payload);
902            }
903        }
904        if let Some(sl) = &self.split_link {
905            use crate::ewah_bitmap::EwahBitmap;
906            let del = sl
907                .delete_bitmap
908                .as_ref()
909                .cloned()
910                .unwrap_or_else(EwahBitmap::new);
911            let rep = sl
912                .replace_bitmap
913                .as_ref()
914                .cloned()
915                .unwrap_or_else(EwahBitmap::new);
916            let payload =
917                crate::split_index::serialize_link_extension_payload(&sl.base_oid, &del, &rep);
918            out.extend_from_slice(&INDEX_EXT_LINK.to_be_bytes());
919            out.extend_from_slice(&(payload.len() as u32).to_be_bytes());
920            out.extend_from_slice(&payload);
921        }
922        Ok(())
923    }
924
925    /// Add or replace an entry (matched by path + stage).
926    pub fn add_or_replace(&mut self, entry: IndexEntry) {
927        let path = entry.path.clone();
928        let stage = entry.stage();
929        let mut inserted_stage0 = false;
930        // Binary search for the insertion point by (path, stage)
931        let result = self.entries.binary_search_by(|e| {
932            e.path
933                .as_slice()
934                .cmp(path.as_slice())
935                .then_with(|| e.stage().cmp(&stage))
936        });
937        match result {
938            Ok(pos) => {
939                // Preserve split-index row binding across refresh/replace (Git `ce->index`).
940                let mut e = entry;
941                e.base_index_pos = self.entries[pos].base_index_pos;
942                self.entries[pos] = e;
943            }
944            Err(pos) => {
945                // Not found — insert at sorted position
946                self.entries.insert(pos, entry);
947                inserted_stage0 = stage == 0;
948            }
949        }
950        if inserted_stage0 {
951            if let Ok(p) = std::str::from_utf8(&path) {
952                self.invalidate_untracked_cache_for_path(p);
953            }
954        }
955    }
956
957    /// Stage a file at stage 0, removing any conflict stage entries (1, 2, 3)
958    /// for the same path. This is the correct behavior for `git add` on a
959    /// conflicted file during merge/cherry-pick resolution.
960    pub fn stage_file(&mut self, entry: IndexEntry) {
961        let path = entry.path.clone();
962        for e in &self.entries {
963            if e.path == path && e.stage() != 0 {
964                resolve_undo::record_resolve_undo_for_entry(&mut self.resolve_undo, e);
965            }
966        }
967        // Remove conflict stages first
968        self.entries.retain(|e| e.path != path || e.stage() == 0);
969        // Then add/replace stage-0 entry
970        self.add_or_replace(entry);
971    }
972
973    /// Drop all resolve-undo records (matches Git `resolve_undo_clear_index`).
974    pub fn clear_resolve_undo(&mut self) {
975        self.resolve_undo = None;
976    }
977
978    /// Remove and return the resolve-undo record for `path`, if any.
979    pub fn take_resolve_undo_record(&mut self, path: &[u8]) -> Option<ResolveUndoRecord> {
980        let map = self.resolve_undo.as_mut()?;
981        let ru = map.remove(path)?;
982        if map.is_empty() {
983            self.resolve_undo = None;
984        }
985        Some(ru)
986    }
987
988    /// Replace all index entries for `path` with unmerged stages from `record`.
989    pub fn install_unmerged_from_resolve_undo(&mut self, path: &[u8], record: &ResolveUndoRecord) {
990        self.entries.retain(|e| e.path != path);
991        for stage in 1u8..=3u8 {
992            let i = (stage - 1) as usize;
993            if record.modes[i] == 0 {
994                continue;
995            }
996            let entry = IndexEntry {
997                ctime_sec: 0,
998                ctime_nsec: 0,
999                mtime_sec: 0,
1000                mtime_nsec: 0,
1001                dev: 0,
1002                ino: 0,
1003                mode: record.modes[i],
1004                uid: 0,
1005                gid: 0,
1006                size: 0,
1007                oid: record.oids[i],
1008                flags: path.len().min(0xFFF) as u16 | ((stage as u16) << 12),
1009                flags_extended: None,
1010                path: path.to_vec(),
1011                base_index_pos: 0,
1012            };
1013            self.add_or_replace(entry);
1014        }
1015        self.sort();
1016    }
1017
1018    /// Re-create unmerged index entries for `path` from the resolve-undo extension.
1019    ///
1020    /// Returns `true` when a resolve-undo record existed and was consumed (Git `unmerge_one`).
1021    pub fn unmerge_path_from_resolve_undo(&mut self, path: &[u8]) -> bool {
1022        let Some(record) = self.take_resolve_undo_record(path) else {
1023            return false;
1024        };
1025        self.install_unmerged_from_resolve_undo(path, &record);
1026        true
1027    }
1028
1029    /// Remove all entries matching the given path (all stages).
1030    ///
1031    /// Returns `true` if at least one entry was removed.
1032    pub fn remove(&mut self, path: &[u8]) -> bool {
1033        let mut removed_any = false;
1034        for e in &self.entries {
1035            if e.path == path {
1036                if e.stage() != 0 {
1037                    resolve_undo::record_resolve_undo_for_entry(&mut self.resolve_undo, e);
1038                }
1039                removed_any = true;
1040            }
1041        }
1042        if !removed_any {
1043            return false;
1044        }
1045        self.entries.retain(|e| e.path != path);
1046        if let Ok(p) = std::str::from_utf8(path) {
1047            self.invalidate_untracked_cache_for_path(p);
1048        }
1049        true
1050    }
1051
1052    /// Remove every index entry for `path` (all merge stages), like `remove_file_from_index`.
1053    ///
1054    /// Returns whether any entry was removed.
1055    pub fn remove_path_all_stages(&mut self, path: &[u8]) -> bool {
1056        self.remove(path)
1057    }
1058
1059    /// Invalidate UNTR nodes affected by an index change (Git `untracked_cache_*_index`).
1060    pub fn invalidate_untracked_cache_for_path(&mut self, path: &str) {
1061        if let Some(uc) = self.untracked_cache.as_mut() {
1062            untracked_cache::invalidate_path(uc, path);
1063        }
1064    }
1065
1066    /// Remove every index entry whose path lies strictly under `path` (all stages).
1067    ///
1068    /// Used when staging a file at `path` that replaces a former directory: Git removes
1069    /// tracked paths like `path/child` from the index so they do not remain alongside
1070    /// the new blob entry.
1071    pub fn remove_descendants_under_path(&mut self, path: &str) {
1072        let prefix = path.as_bytes();
1073        if prefix.is_empty() {
1074            return;
1075        }
1076        let plen = prefix.len();
1077        let had_descendant = self.entries.iter().any(|e| {
1078            let ep = e.path.as_slice();
1079            ep.len() > plen && ep.starts_with(prefix) && ep[plen] == b'/'
1080        });
1081        for e in self.entries.iter() {
1082            let ep = e.path.as_slice();
1083            if ep.len() > plen && ep.starts_with(prefix) && ep[plen] == b'/' && e.stage() != 0 {
1084                resolve_undo::record_resolve_undo_for_entry(&mut self.resolve_undo, e);
1085            }
1086        }
1087        self.entries.retain(|e| {
1088            let ep = e.path.as_slice();
1089            if ep.len() <= plen {
1090                return true;
1091            }
1092            if !ep.starts_with(prefix) {
1093                return true;
1094            }
1095            // Drop paths strictly under `prefix/` (keep same-length prefix matches like "d-other").
1096            ep[plen] != b'/'
1097        });
1098        if had_descendant {
1099            self.invalidate_untracked_cache_for_path(path);
1100        }
1101    }
1102
1103    /// Sort entries in Git's canonical order: by path, then by stage.
1104    pub fn sort(&mut self) {
1105        self.entries
1106            .sort_by(|a, b| a.path.cmp(&b.path).then_with(|| a.stage().cmp(&b.stage())));
1107    }
1108
1109    /// OID of the shared index when this index uses split-index mode (`link` extension).
1110    #[must_use]
1111    pub fn split_index_base_oid(&self) -> Option<ObjectId> {
1112        self.split_link.as_ref().map(|l| l.base_oid)
1113    }
1114
1115    /// Find an entry by path and stage (0 for normal entries).
1116    #[must_use]
1117    pub fn get(&self, path: &[u8], stage: u8) -> Option<&IndexEntry> {
1118        self.entries
1119            .iter()
1120            .find(|e| e.path == path && e.stage() == stage)
1121    }
1122
1123    /// Find a mutable entry by path and stage.
1124    pub fn get_mut(&mut self, path: &[u8], stage: u8) -> Option<&mut IndexEntry> {
1125        self.entries
1126            .iter_mut()
1127            .find(|e| e.path == path && e.stage() == stage)
1128    }
1129
1130    /// Merge tree contents from `treeish` into this index as virtual stage-1 entries, matching
1131    /// Git's `overlay_tree_on_index` used by `git ls-files --with-tree`.
1132    ///
1133    /// Existing unmerged entries (stages 1–3) are shifted to stage 3 so stage 1 is free for the
1134    /// overlay. Stage-1 paths that already exist at stage 0 are marked so `ls-files` can skip
1135    /// them (Git's `CE_UPDATE` on the stage-1 entry).
1136    ///
1137    /// # Parameters
1138    ///
1139    /// - `repo` — repository whose object database is used to read the tree.
1140    /// - `treeish` — revision or tree OID string (`HEAD`, `HEAD~1`, full SHA, etc.).
1141    /// - `prefix` — optional path prefix (bytes, no trailing slash except empty); only paths under
1142    ///   this prefix are considered from the tree. Pass empty slice for the full tree.
1143    ///
1144    /// # Errors
1145    ///
1146    /// Returns [`Error`] if `treeish` cannot be resolved, the tree cannot be read, or an object is
1147    /// missing from the ODB.
1148    pub fn overlay_tree_on_index(
1149        &mut self,
1150        repo: &Repository,
1151        treeish: &str,
1152        prefix: &[u8],
1153    ) -> Result<()> {
1154        let oid = rev_parse::resolve_revision(repo, treeish)?;
1155        let tree_oid = peel_to_tree_oid(repo, oid)?;
1156        for e in self.entries.iter_mut() {
1157            if e.stage() != 0 {
1158                e.set_stage(3);
1159            }
1160        }
1161        self.sort();
1162        let has_stage1 = self.entries.iter().any(|e| e.stage() == 1);
1163        let mut appended: Vec<IndexEntry> = Vec::new();
1164        read_tree_into_overlay(repo, &tree_oid, prefix, &[], has_stage1, &mut appended)?;
1165        for e in appended {
1166            self.add_or_replace(e);
1167        }
1168        if !has_stage1 {
1169            self.sort();
1170        }
1171        let mut last_stage0: Option<&[u8]> = None;
1172        for e in &mut self.entries {
1173            match e.stage() {
1174                0 => {
1175                    last_stage0 = Some(e.path.as_slice());
1176                }
1177                1 => {
1178                    if last_stage0.is_some_and(|p| p == e.path.as_slice()) {
1179                        e.set_overlay_tree_skip_output(true);
1180                    }
1181                }
1182                _ => {}
1183            }
1184        }
1185        Ok(())
1186    }
1187}
1188
1189fn peel_to_tree_oid(repo: &Repository, oid: ObjectId) -> Result<ObjectId> {
1190    let obj = repo.odb.read(&oid)?;
1191    match obj.kind {
1192        ObjectKind::Tree => Ok(oid),
1193        ObjectKind::Commit => {
1194            let commit = crate::objects::parse_commit(&obj.data)?;
1195            Ok(commit.tree)
1196        }
1197        ObjectKind::Tag => {
1198            let tag = crate::objects::parse_tag(&obj.data)?;
1199            peel_to_tree_oid(repo, tag.object)
1200        }
1201        _ => Err(Error::ObjectNotFound(format!(
1202            "cannot peel {oid} to tree for --with-tree"
1203        ))),
1204    }
1205}
1206
1207fn read_tree_into_overlay(
1208    repo: &Repository,
1209    tree_oid: &ObjectId,
1210    prefix: &[u8],
1211    rel_base: &[u8],
1212    use_replace_path: bool,
1213    out: &mut Vec<IndexEntry>,
1214) -> Result<()> {
1215    let obj = repo.odb.read(tree_oid)?;
1216    if obj.kind != ObjectKind::Tree {
1217        return Err(Error::ObjectNotFound(format!(
1218            "object {tree_oid} is not a tree"
1219        )));
1220    }
1221    let entries = parse_tree(&obj.data)?;
1222    for TreeEntry { mode, name, oid } in entries {
1223        if mode == MODE_TREE {
1224            let mut path = rel_base.to_vec();
1225            if !path.is_empty() {
1226                path.push(b'/');
1227            }
1228            path.extend_from_slice(&name);
1229            if !prefix_under_or_equal(prefix, &path) {
1230                continue;
1231            }
1232            read_tree_into_overlay(repo, &oid, prefix, &path, use_replace_path, out)?;
1233            continue;
1234        }
1235        if mode == MODE_GITLINK {
1236            continue;
1237        }
1238        let mut path = rel_base.to_vec();
1239        if !path.is_empty() {
1240            path.push(b'/');
1241        }
1242        path.extend_from_slice(&name);
1243        if !prefix_under_or_equal(prefix, &path) {
1244            continue;
1245        }
1246        let entry = synthetic_stage1_index_entry(mode, &path, oid);
1247        if use_replace_path {
1248            if let Some(pos) = out.iter().position(|e| e.path == path && e.stage() == 1) {
1249                out[pos] = entry;
1250            } else {
1251                out.push(entry);
1252            }
1253        } else {
1254            out.push(entry);
1255        }
1256    }
1257    Ok(())
1258}
1259
1260fn prefix_under_or_equal(prefix: &[u8], path: &[u8]) -> bool {
1261    if prefix.is_empty() {
1262        return true;
1263    }
1264    if path == prefix {
1265        return true;
1266    }
1267    path.len() > prefix.len() && path.starts_with(prefix) && path[prefix.len()] == b'/'
1268}
1269
1270fn synthetic_stage1_index_entry(mode: u32, path: &[u8], oid: ObjectId) -> IndexEntry {
1271    let path_len = path.len().min(0xFFF) as u16;
1272    let flags = (1u16 << 12) | path_len;
1273    IndexEntry {
1274        ctime_sec: 0,
1275        ctime_nsec: 0,
1276        mtime_sec: 0,
1277        mtime_nsec: 0,
1278        dev: 0,
1279        ino: 0,
1280        mode,
1281        uid: 0,
1282        gid: 0,
1283        size: 0,
1284        oid,
1285        flags,
1286        flags_extended: None,
1287        path: path.to_vec(),
1288        base_index_pos: 0,
1289    }
1290}
1291
1292fn config_truthy(raw: Option<&str>) -> bool {
1293    let Some(val) = raw else {
1294        return false;
1295    };
1296    let lowered = val.trim().to_lowercase();
1297    matches!(lowered.as_str(), "true" | "yes" | "1" | "on")
1298}
1299
1300/// Whether to write 20 zero bytes instead of the SHA-1 of the index body.
1301///
1302/// Mirrors Git `prepare_repo_settings`: `feature.manyFiles` enables skip-hash unless
1303/// `index.skipHash` / `index.skiphash` is explicitly false; otherwise honor true `index.skipHash`.
1304pub(crate) fn index_skip_hash_for_write(config: Option<&ConfigSet>) -> bool {
1305    let Some(config) = config else {
1306        return false;
1307    };
1308    let many_files = config
1309        .get_bool("feature.manyFiles")
1310        .and_then(|r| r.ok())
1311        .unwrap_or(false);
1312    if many_files {
1313        if let Some(Ok(false)) = config.get_bool("index.skipHash") {
1314            return false;
1315        }
1316        if let Some(Ok(false)) = config.get_bool("index.skiphash") {
1317            return false;
1318        }
1319        return true;
1320    }
1321    for key in ["index.skipHash", "index.skiphash"] {
1322        if let Some(Ok(true)) = config.get_bool(key) {
1323            return true;
1324        }
1325    }
1326    false
1327}
1328
1329fn trim_trailing_slash_bytes(path: &[u8]) -> &[u8] {
1330    path.strip_suffix(b"/").unwrap_or(path)
1331}
1332
1333fn path_under_prefix(path: &[u8], prefix: &[u8]) -> bool {
1334    if path == prefix {
1335        return true;
1336    }
1337    if prefix.is_empty() {
1338        return true;
1339    }
1340    path.len() > prefix.len() && path.starts_with(prefix) && path[prefix.len()] == b'/'
1341}
1342
1343fn directory_in_cone(dir_path: &str, patterns: &[String], cone_mode: bool) -> bool {
1344    crate::sparse_checkout::path_matches_sparse_patterns(dir_path, patterns, cone_mode)
1345}
1346
1347fn collect_directory_prefixes(path: &[u8], out: &mut BTreeSet<Vec<u8>>) {
1348    for (i, &b) in path.iter().enumerate() {
1349        if b == b'/' {
1350            out.insert(path[..i].to_vec());
1351        }
1352    }
1353}
1354
1355fn tree_oid_for_prefix(odb: &Odb, root_tree: &ObjectId, prefix: &[u8]) -> Result<Option<ObjectId>> {
1356    if prefix.is_empty() {
1357        return Ok(Some(*root_tree));
1358    }
1359    let pref_str = String::from_utf8_lossy(prefix);
1360    let components: Vec<&str> = pref_str.split('/').filter(|c| !c.is_empty()).collect();
1361    let mut current = *root_tree;
1362    for comp in components {
1363        let obj = odb.read(&current)?;
1364        if obj.kind != ObjectKind::Tree {
1365            return Ok(None);
1366        }
1367        let entries = parse_tree(&obj.data)?;
1368        let mut next = None;
1369        for e in entries {
1370            if e.name == comp.as_bytes() {
1371                if e.mode == MODE_TREE {
1372                    next = Some(e.oid);
1373                }
1374                break;
1375            }
1376        }
1377        current = match next {
1378            Some(o) => o,
1379            None => return Ok(None),
1380        };
1381    }
1382    Ok(Some(current))
1383}
1384
1385/// Build the list of blob index entries under `prefix` that match `HEAD` at `tree_oid`,
1386/// treating existing sparse-directory placeholders in `entries` as opaque subtrees (like Git).
1387fn collect_sparse_aware_expected_blobs(
1388    odb: &Odb,
1389    tree_oid: &ObjectId,
1390    prefix: &[u8],
1391    patterns: &[String],
1392    cone_mode: bool,
1393    entries: &[IndexEntry],
1394) -> Result<Vec<IndexEntry>> {
1395    let mut out = Vec::new();
1396    walk_sparse_aware(
1397        odb, tree_oid, prefix, patterns, cone_mode, entries, &mut out,
1398    )?;
1399    Ok(out)
1400}
1401
1402fn walk_sparse_aware(
1403    odb: &Odb,
1404    tree_oid: &ObjectId,
1405    prefix: &[u8],
1406    patterns: &[String],
1407    cone_mode: bool,
1408    entries: &[IndexEntry],
1409    out: &mut Vec<IndexEntry>,
1410) -> Result<()> {
1411    let obj = odb.read(tree_oid)?;
1412    if obj.kind != ObjectKind::Tree {
1413        return Err(Error::IndexError(format!("expected tree at {}", tree_oid)));
1414    }
1415    let tree_entries = parse_tree(&obj.data)?;
1416    for te in tree_entries {
1417        let path = if prefix.is_empty() {
1418            te.name.clone()
1419        } else {
1420            let mut p = prefix.to_vec();
1421            p.push(b'/');
1422            p.extend_from_slice(&te.name);
1423            p
1424        };
1425        if te.mode == MODE_TREE {
1426            let path_slash = {
1427                let mut p = path.clone();
1428                p.push(b'/');
1429                p
1430            };
1431            if entries.iter().any(|e| {
1432                e.stage() == 0
1433                    && e.is_sparse_directory_placeholder()
1434                    && e.path == path_slash
1435                    && e.oid == te.oid
1436            }) {
1437                continue;
1438            }
1439            walk_sparse_aware(odb, &te.oid, &path, patterns, cone_mode, entries, out)?;
1440        } else {
1441            let path_len = path.len().min(0xFFF) as u16;
1442            let path_str = String::from_utf8_lossy(&path);
1443            if crate::sparse_checkout::path_matches_sparse_patterns(&path_str, patterns, cone_mode)
1444            {
1445                continue;
1446            }
1447            let mut e = IndexEntry {
1448                ctime_sec: 0,
1449                ctime_nsec: 0,
1450                mtime_sec: 0,
1451                mtime_nsec: 0,
1452                dev: 0,
1453                ino: 0,
1454                mode: te.mode,
1455                uid: 0,
1456                gid: 0,
1457                size: 0,
1458                oid: te.oid,
1459                flags: path_len,
1460                flags_extended: Some(0),
1461                path,
1462                base_index_pos: 0,
1463            };
1464            e.set_skip_worktree(true);
1465            out.push(e);
1466        }
1467    }
1468    Ok(())
1469}
1470
1471fn flatten_tree_blobs(odb: &Odb, tree_oid: &ObjectId, prefix: &[u8]) -> Result<Vec<IndexEntry>> {
1472    let obj = odb.read(tree_oid)?;
1473    if obj.kind != ObjectKind::Tree {
1474        return Err(Error::IndexError(format!("expected tree at {}", tree_oid)));
1475    }
1476    let entries = parse_tree(&obj.data)?;
1477    let mut out = Vec::new();
1478    for te in entries {
1479        let path = if prefix.is_empty() {
1480            te.name.clone()
1481        } else {
1482            let mut p = prefix.to_vec();
1483            p.push(b'/');
1484            p.extend_from_slice(&te.name);
1485            p
1486        };
1487        if te.mode == MODE_TREE {
1488            let sub = flatten_tree_blobs(odb, &te.oid, &path)?;
1489            out.extend(sub);
1490        } else {
1491            let path_len = path.len().min(0xFFF) as u16;
1492            let mut e = IndexEntry {
1493                ctime_sec: 0,
1494                ctime_nsec: 0,
1495                mtime_sec: 0,
1496                mtime_nsec: 0,
1497                dev: 0,
1498                ino: 0,
1499                mode: te.mode,
1500                uid: 0,
1501                gid: 0,
1502                size: 0,
1503                oid: te.oid,
1504                flags: path_len,
1505                flags_extended: Some(0),
1506                path,
1507                base_index_pos: 0,
1508            };
1509            e.set_skip_worktree(true);
1510            out.push(e);
1511        }
1512    }
1513    Ok(out)
1514}
1515
1516fn lockfile_pid_enabled(index_path: &Path) -> bool {
1517    let git_dir = match index_path.parent() {
1518        Some(dir) => dir,
1519        None => return false,
1520    };
1521
1522    ConfigSet::load(Some(git_dir), true)
1523        .ok()
1524        .and_then(|cfg| cfg.get_bool("core.lockfilepid"))
1525        .and_then(|res| res.ok())
1526        .unwrap_or(false)
1527}
1528
1529fn pid_path_for_lock(lock_path: &Path) -> std::path::PathBuf {
1530    let file_name = lock_path
1531        .file_name()
1532        .map(|s| s.to_string_lossy().to_string())
1533        .unwrap_or_else(|| "index.lock".to_owned());
1534    let pid_name = if let Some(base) = file_name.strip_suffix(".lock") {
1535        format!("{base}~pid.lock")
1536    } else {
1537        format!("{file_name}~pid.lock")
1538    };
1539    lock_path.with_file_name(pid_name)
1540}
1541
1542fn write_lock_pid_file(pid_path: &Path) -> io::Result<()> {
1543    use std::io::Write as _;
1544    let mut file = fs::OpenOptions::new()
1545        .write(true)
1546        .create(true)
1547        .truncate(true)
1548        .open(pid_path)?;
1549    writeln!(file, "pid {}", std::process::id())?;
1550    Ok(())
1551}
1552
1553/// Detail lines Git prints when the index lock file already exists (used by stash and similar).
1554pub fn format_index_lock_blocked_detail(index_path: &Path) -> String {
1555    let lock_path = index_path.with_extension("lock");
1556    let pid_path = pid_path_for_lock(&lock_path);
1557    let err = io::Error::new(io::ErrorKind::AlreadyExists, "File exists");
1558    build_lock_exists_message(&lock_path, &pid_path, &err)
1559}
1560
1561fn build_lock_exists_message(lock_path: &Path, pid_path: &Path, err: &io::Error) -> String {
1562    let mut msg = format!("Unable to create '{}': {}.\n\n", lock_path.display(), err);
1563
1564    if let Some(pid) = read_lock_pid(pid_path) {
1565        if is_process_running(pid) {
1566            msg.push_str(&format!(
1567                "Lock is held by process {pid}; if no git process is running, the lock file may be stale (PIDs can be reused)"
1568            ));
1569        } else {
1570            msg.push_str(&format!(
1571                "Lock was held by process {pid}, which is no longer running; the lock file appears to be stale"
1572            ));
1573        }
1574    } else {
1575        msg.push_str(
1576            "Another git process seems to be running in this repository, or the lock file may be stale",
1577        );
1578    }
1579
1580    msg
1581}
1582
1583fn read_lock_pid(pid_path: &Path) -> Option<u64> {
1584    let raw = fs::read_to_string(pid_path).ok()?;
1585    let trimmed = raw.trim();
1586    if let Some(v) = trimmed.strip_prefix("pid ") {
1587        return v.trim().parse::<u64>().ok();
1588    }
1589    trimmed.parse::<u64>().ok()
1590}
1591
1592fn is_process_running(pid: u64) -> bool {
1593    #[cfg(target_os = "linux")]
1594    {
1595        let proc_path = std::path::PathBuf::from(format!("/proc/{pid}"));
1596        proc_path.exists()
1597    }
1598
1599    #[cfg(not(target_os = "linux"))]
1600    {
1601        let status = std::process::Command::new("kill")
1602            .arg("-0")
1603            .arg(pid.to_string())
1604            .status();
1605        status.map(|s| s.success()).unwrap_or(false)
1606    }
1607}
1608
1609/// Parse a single index entry from `data`, returning `(entry, bytes_consumed)`.
1610fn parse_entry(data: &[u8], version: u32, prev_path: &[u8]) -> Result<(IndexEntry, usize)> {
1611    if data.len() < 62 {
1612        return Err(Error::IndexError("entry too short".to_owned()));
1613    }
1614
1615    let mut pos = 0;
1616
1617    macro_rules! read_u32 {
1618        () => {{
1619            let v = u32::from_be_bytes(
1620                data[pos..pos + 4]
1621                    .try_into()
1622                    .map_err(|_| Error::IndexError("truncated u32".to_owned()))?,
1623            );
1624            pos += 4;
1625            v
1626        }};
1627    }
1628
1629    let ctime_sec = read_u32!();
1630    let ctime_nsec = read_u32!();
1631    let mtime_sec = read_u32!();
1632    let mtime_nsec = read_u32!();
1633    let dev = read_u32!();
1634    let ino = read_u32!();
1635    let mode = read_u32!();
1636    let uid = read_u32!();
1637    let gid = read_u32!();
1638    let size = read_u32!();
1639
1640    let oid = ObjectId::from_bytes(&data[pos..pos + 20])?;
1641    pos += 20;
1642
1643    let flags = u16::from_be_bytes(
1644        data[pos..pos + 2]
1645            .try_into()
1646            .map_err(|_| Error::IndexError("truncated flags".to_owned()))?,
1647    );
1648    pos += 2;
1649
1650    let flags_extended = if version >= 3 && flags & 0x4000 != 0 {
1651        let fe = u16::from_be_bytes(
1652            data[pos..pos + 2]
1653                .try_into()
1654                .map_err(|_| Error::IndexError("truncated extended flags".to_owned()))?,
1655        );
1656        pos += 2;
1657        Some(fe)
1658    } else {
1659        None
1660    };
1661
1662    let path;
1663    if version == 4 {
1664        // V4: prefix-compressed path
1665        let (strip_len, varint_bytes) = read_varint(&data[pos..]);
1666        pos += varint_bytes;
1667        let nul = data[pos..]
1668            .iter()
1669            .position(|&b| b == 0)
1670            .ok_or_else(|| Error::IndexError("v4 entry path missing NUL".to_owned()))?;
1671        let suffix = &data[pos..pos + nul];
1672        pos += nul + 1;
1673        let keep = prev_path.len().saturating_sub(strip_len);
1674        let mut full_path = prev_path[..keep].to_vec();
1675        full_path.extend_from_slice(suffix);
1676        path = full_path;
1677    } else {
1678        // V2/V3: NUL-terminated full path + padding
1679        let nul = data[pos..]
1680            .iter()
1681            .position(|&b| b == 0)
1682            .ok_or_else(|| Error::IndexError("entry path missing NUL terminator".to_owned()))?;
1683        path = data[pos..pos + nul].to_vec();
1684        pos += nul + 1;
1685        let entry_start = 0usize;
1686        let entry_len = pos - entry_start;
1687        let padded = (entry_len + 7) & !7;
1688        let padding = padded.saturating_sub(entry_len);
1689        pos += padding;
1690    }
1691
1692    Ok((
1693        IndexEntry {
1694            ctime_sec,
1695            ctime_nsec,
1696            mtime_sec,
1697            mtime_nsec,
1698            dev,
1699            ino,
1700            mode,
1701            uid,
1702            gid,
1703            size,
1704            oid,
1705            flags,
1706            flags_extended,
1707            path,
1708            base_index_pos: 0,
1709        },
1710        pos,
1711    ))
1712}
1713
1714/// Serialise a single index entry into `out`.
1715/// Read a variable-length integer (git's index v4 varint encoding).
1716/// Returns (value, bytes_consumed).
1717fn write_varint(out: &mut Vec<u8>, mut value: usize) {
1718    loop {
1719        let mut b = (value & 0x7F) as u8;
1720        value >>= 7;
1721        if value != 0 {
1722            b |= 0x80;
1723        }
1724        out.push(b);
1725        if value == 0 {
1726            break;
1727        }
1728    }
1729}
1730
1731fn read_varint(data: &[u8]) -> (usize, usize) {
1732    let mut value: usize = 0;
1733    let mut shift = 0usize;
1734    let mut pos = 0;
1735    loop {
1736        if pos >= data.len() {
1737            break;
1738        }
1739        let byte = data[pos] as usize;
1740        pos += 1;
1741        value |= (byte & 0x7F) << shift;
1742        if byte & 0x80 == 0 {
1743            break;
1744        }
1745        shift += 7;
1746        // Prevent infinite loops on malformed data
1747        if shift > 28 {
1748            break;
1749        }
1750    }
1751    (value, pos)
1752}
1753
1754fn serialize_entry_v4(entry: &IndexEntry, previous_path: &mut Vec<u8>, out: &mut Vec<u8>) {
1755    let write_u32 = |out: &mut Vec<u8>, v: u32| out.extend_from_slice(&v.to_be_bytes());
1756
1757    write_u32(out, entry.ctime_sec);
1758    write_u32(out, entry.ctime_nsec);
1759    write_u32(out, entry.mtime_sec);
1760    write_u32(out, entry.mtime_nsec);
1761    write_u32(out, entry.dev);
1762    write_u32(out, entry.ino);
1763    write_u32(out, entry.mode);
1764    write_u32(out, entry.uid);
1765    write_u32(out, entry.gid);
1766    write_u32(out, entry.size);
1767    out.extend_from_slice(entry.oid.as_bytes());
1768
1769    let mut flags = entry.flags;
1770    if entry.flags_extended.is_some() {
1771        flags |= 0x4000;
1772    } else {
1773        flags &= !0x4000;
1774    }
1775    let path_len = entry.path.len().min(0xFFF) as u16;
1776    flags = (flags & 0xF000) | path_len;
1777    out.extend_from_slice(&flags.to_be_bytes());
1778
1779    if let Some(fe) = entry.flags_extended {
1780        out.extend_from_slice(&fe.to_be_bytes());
1781    }
1782
1783    let common = previous_path
1784        .iter()
1785        .zip(entry.path.iter())
1786        .take_while(|(a, b)| a == b)
1787        .count();
1788    let to_remove = previous_path.len().saturating_sub(common);
1789    write_varint(out, to_remove);
1790    out.extend_from_slice(&entry.path[common..]);
1791    out.push(0);
1792
1793    previous_path.clear();
1794    previous_path.extend_from_slice(&entry.path);
1795}
1796
1797fn serialize_entry(entry: &IndexEntry, version: u32, out: &mut Vec<u8>) {
1798    let start = out.len();
1799
1800    let write_u32 = |out: &mut Vec<u8>, v: u32| out.extend_from_slice(&v.to_be_bytes());
1801
1802    write_u32(out, entry.ctime_sec);
1803    write_u32(out, entry.ctime_nsec);
1804    write_u32(out, entry.mtime_sec);
1805    write_u32(out, entry.mtime_nsec);
1806    write_u32(out, entry.dev);
1807    write_u32(out, entry.ino);
1808    write_u32(out, entry.mode);
1809    write_u32(out, entry.uid);
1810    write_u32(out, entry.gid);
1811    write_u32(out, entry.size);
1812    out.extend_from_slice(entry.oid.as_bytes());
1813
1814    // Set or clear the extended-flags bit in flags
1815    let mut flags = entry.flags;
1816    if version >= 3 && entry.flags_extended.is_some() {
1817        flags |= 0x4000;
1818    } else {
1819        flags &= !0x4000;
1820    }
1821    // Overwrite path length bits (bottom 12)
1822    let path_len = entry.path.len().min(0xFFF) as u16;
1823    flags = (flags & 0xF000) | path_len;
1824    out.extend_from_slice(&flags.to_be_bytes());
1825
1826    if version >= 3 {
1827        if let Some(fe) = entry.flags_extended {
1828            out.extend_from_slice(&fe.to_be_bytes());
1829        }
1830    }
1831
1832    out.extend_from_slice(&entry.path);
1833    out.push(0);
1834
1835    // Pad to 8-byte boundary
1836    let entry_len = out.len() - start;
1837    let padded = (entry_len + 7) & !7;
1838    let padding = padded - entry_len;
1839    for _ in 0..padding {
1840        out.push(0);
1841    }
1842}
1843
1844/// Build an [`IndexEntry`] by stat-ing a file on disk.
1845///
1846/// # Parameters
1847///
1848/// - `path` — absolute path to the file.
1849/// - `rel_path` — path relative to the repo root (stored in the index).
1850/// - `oid` — the object ID of the file's blob.
1851/// - `mode` — file mode (use [`MODE_REGULAR`], [`MODE_EXECUTABLE`], etc.).
1852///
1853/// # Errors
1854///
1855/// Returns [`Error::Io`] if `stat` fails.
1856pub fn entry_from_stat(
1857    path: &Path,
1858    rel_path: &[u8],
1859    oid: ObjectId,
1860    mode: u32,
1861) -> Result<IndexEntry> {
1862    let meta = fs::symlink_metadata(path)?;
1863    Ok(entry_from_metadata(&meta, rel_path, oid, mode))
1864}
1865
1866/// Build an [`IndexEntry`] from already-obtained metadata.
1867///
1868/// This avoids a redundant `stat()` call when the caller already has
1869/// filesystem metadata (e.g. from `symlink_metadata`).
1870#[must_use]
1871pub fn entry_from_metadata(
1872    meta: &fs::Metadata,
1873    rel_path: &[u8],
1874    oid: ObjectId,
1875    mode: u32,
1876) -> IndexEntry {
1877    use std::os::unix::fs::MetadataExt;
1878    IndexEntry {
1879        ctime_sec: meta.ctime() as u32,
1880        ctime_nsec: meta.ctime_nsec() as u32,
1881        mtime_sec: meta.mtime() as u32,
1882        mtime_nsec: meta.mtime_nsec() as u32,
1883        dev: meta.dev() as u32,
1884        ino: meta.ino() as u32,
1885        mode,
1886        uid: meta.uid(),
1887        gid: meta.gid(),
1888        size: meta.size() as u32,
1889        oid,
1890        flags: rel_path.len().min(0xFFF) as u16,
1891        flags_extended: None,
1892        path: rel_path.to_vec(),
1893        base_index_pos: 0,
1894    }
1895}
1896
1897/// Convert a `stat` mode to the Git index mode, normalised to one of the
1898/// known constants ([`MODE_REGULAR`], [`MODE_EXECUTABLE`], [`MODE_SYMLINK`]).
1899///
1900/// Only the `S_IFMT` and execute bits are inspected; all other permission bits
1901/// are discarded (Git stores only 644 or 755 for regular files).
1902///
1903/// # Parameters
1904///
1905/// - `raw_mode` — the raw `st_mode` value from `stat(2)`.
1906#[must_use]
1907pub fn normalize_mode(raw_mode: u32) -> u32 {
1908    const S_IFMT: u32 = 0o170000;
1909    const S_IFLNK: u32 = 0o120000;
1910    const S_IFREG: u32 = 0o100000;
1911
1912    let fmt = raw_mode & S_IFMT;
1913    if fmt == S_IFLNK {
1914        return MODE_SYMLINK;
1915    }
1916    if fmt == S_IFREG {
1917        // Executable if any execute bit is set
1918        if raw_mode & 0o111 != 0 {
1919            return MODE_EXECUTABLE;
1920        }
1921        return MODE_REGULAR;
1922    }
1923    // Fallback for everything else (devices, etc.) — treat as regular
1924    MODE_REGULAR
1925}
1926
1927#[cfg(test)]
1928mod tests {
1929    #![allow(clippy::expect_used, clippy::unwrap_used)]
1930
1931    use super::*;
1932    use tempfile::TempDir;
1933
1934    fn dummy_oid() -> ObjectId {
1935        ObjectId::from_bytes(&[0u8; 20]).unwrap()
1936    }
1937
1938    fn make_entry(path: &str) -> IndexEntry {
1939        IndexEntry {
1940            ctime_sec: 0,
1941            ctime_nsec: 0,
1942            mtime_sec: 0,
1943            mtime_nsec: 0,
1944            dev: 0,
1945            ino: 0,
1946            mode: MODE_REGULAR,
1947            uid: 0,
1948            gid: 0,
1949            size: 0,
1950            oid: dummy_oid(),
1951            flags: path.len().min(0xFFF) as u16,
1952            flags_extended: None,
1953            path: path.as_bytes().to_vec(),
1954            base_index_pos: 0,
1955        }
1956    }
1957
1958    #[test]
1959    fn round_trip_empty_index() {
1960        let dir = TempDir::new().unwrap();
1961        let path = dir.path().join("index");
1962
1963        let idx = Index::new();
1964        idx.write(&path).unwrap();
1965
1966        let loaded = Index::load(&path).unwrap();
1967        assert_eq!(loaded.entries.len(), 0);
1968    }
1969
1970    #[test]
1971    fn round_trip_with_entries() {
1972        let dir = TempDir::new().unwrap();
1973        let path = dir.path().join("index");
1974
1975        let mut idx = Index::new();
1976        idx.add_or_replace(make_entry("foo.txt"));
1977        idx.add_or_replace(make_entry("bar/baz.txt"));
1978        idx.write(&path).unwrap();
1979
1980        let loaded = Index::load(&path).unwrap();
1981        assert_eq!(loaded.entries.len(), 2);
1982        assert_eq!(loaded.entries[0].path, b"bar/baz.txt");
1983        assert_eq!(loaded.entries[1].path, b"foo.txt");
1984    }
1985
1986    #[test]
1987    fn remove_descendants_under_path_drops_nested_only() {
1988        let mut idx = Index::new();
1989        idx.add_or_replace(make_entry("d/e"));
1990        idx.add_or_replace(make_entry("d-other"));
1991        idx.add_or_replace(make_entry("prefix/d"));
1992        idx.remove_descendants_under_path("d");
1993        let paths: Vec<_> = idx.entries.iter().map(|e| e.path.as_slice()).collect();
1994        assert_eq!(paths, vec![b"d-other".as_slice(), b"prefix/d".as_slice()]);
1995    }
1996
1997    #[test]
1998    fn requested_v4_writes_v4_on_disk() {
1999        let dir = TempDir::new().unwrap();
2000        let path = dir.path().join("index");
2001
2002        let mut idx = Index {
2003            version: 4,
2004            ..Index::default()
2005        };
2006        idx.add_or_replace(make_entry("one"));
2007        idx.add_or_replace(make_entry("two/one"));
2008        idx.write(&path).unwrap();
2009
2010        let data = fs::read(&path).unwrap();
2011        assert_eq!(&data[4..8], &4u32.to_be_bytes());
2012
2013        let loaded = Index::load(&path).unwrap();
2014        assert_eq!(loaded.version, 4);
2015        assert_eq!(loaded.entries[0].path, b"one");
2016        assert_eq!(loaded.entries[1].path, b"two/one");
2017    }
2018}