Skip to main content

grit_lib/
index.rs

1//! Git index (staging area) reading and writing.
2//!
3//! The index file (`.git/index`) stores the current state of the staging area.
4//! It uses a binary format with a 12-byte header, fixed-size index entries,
5//! and optional extensions, followed by a trailing SHA-1 over the whole file.
6//!
7//! # Format version
8//!
9//! This implementation supports index versions 2 and 3.  Version 4 (path
10//! compression) is not yet implemented.
11//!
12//! # References
13//!
14//! See `Documentation/technical/index-format.txt` in the Git source tree for
15//! the authoritative format specification.
16
17use std::fs;
18use std::io::{self, Write};
19use std::path::Path;
20
21use sha1::{Digest, Sha1};
22
23use crate::error::{Error, Result};
24use crate::objects::ObjectId;
25
26/// File mode for a regular (non-executable) file.
27pub const MODE_REGULAR: u32 = 0o100644;
28/// File mode for an executable file.
29pub const MODE_EXECUTABLE: u32 = 0o100755;
30/// File mode for a symbolic link.
31pub const MODE_SYMLINK: u32 = 0o120000;
32/// File mode for a gitlink (submodule).
33pub const MODE_GITLINK: u32 = 0o160000;
34/// File mode for a directory (tree) entry — only used in tree objects, not index.
35pub const MODE_TREE: u32 = 0o040000;
36
37/// A single entry in the Git index.
38#[derive(Debug, Clone, PartialEq, Eq)]
39pub struct IndexEntry {
40    /// Time the file metadata last changed (seconds since epoch).
41    pub ctime_sec: u32,
42    /// Nanosecond fraction of `ctime_sec`.
43    pub ctime_nsec: u32,
44    /// Time the file data last changed (seconds since epoch).
45    pub mtime_sec: u32,
46    /// Nanosecond fraction of `mtime_sec`.
47    pub mtime_nsec: u32,
48    /// Device number.
49    pub dev: u32,
50    /// Inode number.
51    pub ino: u32,
52    /// Unix file mode (`MODE_REGULAR`, `MODE_EXECUTABLE`, `MODE_SYMLINK`, …).
53    pub mode: u32,
54    /// Owner UID.
55    pub uid: u32,
56    /// Owner GID.
57    pub gid: u32,
58    /// File size in bytes (truncated to 32 bits).
59    pub size: u32,
60    /// SHA-1 of the blob object.
61    pub oid: ObjectId,
62    /// Entry flags (stage, assume-valid, extended, …).
63    pub flags: u16,
64    /// Extended flags (v3+ only).
65    pub flags_extended: Option<u16>,
66    /// Path relative to the repository root.  May contain `/` separators.
67    pub path: Vec<u8>,
68}
69
70impl IndexEntry {
71    /// Merge stage (0 = normal, 1–3 = conflict stages).
72    #[must_use]
73    pub fn stage(&self) -> u8 {
74        ((self.flags >> 12) & 0x3) as u8
75    }
76
77    /// Whether the assume-unchanged bit is set.
78    #[must_use]
79    pub fn assume_unchanged(&self) -> bool {
80        self.flags & 0x8000 != 0
81    }
82
83    /// Whether the skip-worktree bit is set (extended flags, v3+).
84    #[must_use]
85    pub fn skip_worktree(&self) -> bool {
86        self.flags_extended
87            .map(|f| f & 0x4000 != 0)
88            .unwrap_or(false)
89    }
90
91    /// Set the assume-unchanged bit.
92    pub fn set_assume_unchanged(&mut self, value: bool) {
93        if value {
94            self.flags |= 0x8000;
95        } else {
96            self.flags &= !0x8000;
97        }
98    }
99
100    /// Set the skip-worktree bit (promotes entry to v3).
101    pub fn set_skip_worktree(&mut self, value: bool) {
102        let fe = self.flags_extended.get_or_insert(0);
103        if value {
104            *fe |= 0x4000;
105        } else {
106            *fe &= !0x4000;
107        }
108    }
109}
110
111/// The in-memory representation of the Git index file.
112#[derive(Debug, Clone, Default)]
113pub struct Index {
114    /// Index format version (2 or 3).
115    pub version: u32,
116    /// Index entries, sorted by (path, stage).
117    pub entries: Vec<IndexEntry>,
118}
119
120impl Index {
121    /// Create a new, empty index.
122    #[must_use]
123    pub fn new() -> Self {
124        Self {
125            version: 2,
126            entries: Vec::new(),
127        }
128    }
129
130    /// Load an index from the given file path.
131    ///
132    /// Returns an empty index if the file does not exist.
133    ///
134    /// # Errors
135    ///
136    /// Returns [`Error::IndexError`] if the file is present but corrupt.
137    pub fn load(path: &Path) -> Result<Self> {
138        match fs::read(path) {
139            Ok(data) => Self::parse(&data),
140            Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(Self::new()),
141            Err(e) => Err(Error::Io(e)),
142        }
143    }
144
145    /// Parse index bytes (the whole file including trailing SHA-1).
146    ///
147    /// # Errors
148    ///
149    /// Returns [`Error::IndexError`] on structural problems.
150    pub fn parse(data: &[u8]) -> Result<Self> {
151        if data.len() < 12 {
152            return Err(Error::IndexError("file too short".to_owned()));
153        }
154
155        // Verify trailing SHA-1 checksum
156        let (body, checksum) = data.split_at(data.len() - 20);
157        let mut hasher = Sha1::new();
158        hasher.update(body);
159        let computed = hasher.finalize();
160        if computed.as_slice() != checksum {
161            return Err(Error::IndexError("SHA-1 checksum mismatch".to_owned()));
162        }
163
164        // Header
165        let magic = &body[..4];
166        if magic != b"DIRC" {
167            return Err(Error::IndexError("bad magic: expected DIRC".to_owned()));
168        }
169        let version = u32::from_be_bytes(
170            body[4..8]
171                .try_into()
172                .map_err(|_| Error::IndexError("cannot read version".to_owned()))?,
173        );
174        if version != 2 && version != 3 {
175            return Err(Error::IndexError(format!(
176                "unsupported index version {version}"
177            )));
178        }
179        let count = u32::from_be_bytes(
180            body[8..12]
181                .try_into()
182                .map_err(|_| Error::IndexError("cannot read entry count".to_owned()))?,
183        );
184
185        let mut pos = 12usize;
186        let mut entries = Vec::with_capacity(count as usize);
187
188        for _ in 0..count {
189            let (entry, consumed) = parse_entry(&body[pos..], version)?;
190            entries.push(entry);
191            pos += consumed;
192        }
193
194        Ok(Self { version, entries })
195    }
196
197    /// Write the index to a file, computing and appending the trailing SHA-1.
198    ///
199    /// # Errors
200    ///
201    /// Returns [`Error::Io`] on filesystem errors.
202    pub fn write(&self, path: &Path) -> Result<()> {
203        let mut body = Vec::new();
204        self.serialize_into(&mut body)?;
205
206        let mut hasher = Sha1::new();
207        hasher.update(&body);
208        let checksum = hasher.finalize();
209
210        let tmp_path = path.with_extension("lock");
211        {
212            let mut f = fs::File::create(&tmp_path)?;
213            f.write_all(&body)?;
214            f.write_all(&checksum)?;
215        }
216        fs::rename(&tmp_path, path)?;
217        Ok(())
218    }
219
220    /// Serialise the index body (without trailing checksum) into `out`.
221    fn serialize_into(&self, out: &mut Vec<u8>) -> Result<()> {
222        // Header
223        out.extend_from_slice(b"DIRC");
224        out.extend_from_slice(&self.version.to_be_bytes());
225        out.extend_from_slice(&(self.entries.len() as u32).to_be_bytes());
226
227        for entry in &self.entries {
228            serialize_entry(entry, self.version, out);
229        }
230        Ok(())
231    }
232
233    /// Add or replace an entry (matched by path + stage).
234    pub fn add_or_replace(&mut self, entry: IndexEntry) {
235        let path = &entry.path;
236        let stage = entry.stage();
237        // Binary search for the insertion point by (path, stage)
238        let result = self.entries.binary_search_by(|e| {
239            e.path.as_slice().cmp(path.as_slice()).then_with(|| e.stage().cmp(&stage))
240        });
241        match result {
242            Ok(pos) => {
243                // Exact match — replace in place
244                self.entries[pos] = entry;
245            }
246            Err(pos) => {
247                // Not found — insert at sorted position
248                self.entries.insert(pos, entry);
249            }
250        }
251    }
252
253    /// Remove all entries matching the given path (all stages).
254    ///
255    /// Returns `true` if at least one entry was removed.
256    pub fn remove(&mut self, path: &[u8]) -> bool {
257        let before = self.entries.len();
258        self.entries.retain(|e| e.path != path);
259        self.entries.len() < before
260    }
261
262    /// Sort entries in Git's canonical order: by path, then by stage.
263    pub fn sort(&mut self) {
264        self.entries
265            .sort_by(|a, b| a.path.cmp(&b.path).then_with(|| a.stage().cmp(&b.stage())));
266    }
267
268    /// Find an entry by path and stage (0 for normal entries).
269    #[must_use]
270    pub fn get(&self, path: &[u8], stage: u8) -> Option<&IndexEntry> {
271        self.entries
272            .iter()
273            .find(|e| e.path == path && e.stage() == stage)
274    }
275
276    /// Find a mutable entry by path and stage.
277    pub fn get_mut(&mut self, path: &[u8], stage: u8) -> Option<&mut IndexEntry> {
278        self.entries
279            .iter_mut()
280            .find(|e| e.path == path && e.stage() == stage)
281    }
282}
283
284/// Parse a single index entry from `data`, returning `(entry, bytes_consumed)`.
285fn parse_entry(data: &[u8], version: u32) -> Result<(IndexEntry, usize)> {
286    if data.len() < 62 {
287        return Err(Error::IndexError("entry too short".to_owned()));
288    }
289
290    let mut pos = 0;
291
292    macro_rules! read_u32 {
293        () => {{
294            let v = u32::from_be_bytes(
295                data[pos..pos + 4]
296                    .try_into()
297                    .map_err(|_| Error::IndexError("truncated u32".to_owned()))?,
298            );
299            pos += 4;
300            v
301        }};
302    }
303
304    let ctime_sec = read_u32!();
305    let ctime_nsec = read_u32!();
306    let mtime_sec = read_u32!();
307    let mtime_nsec = read_u32!();
308    let dev = read_u32!();
309    let ino = read_u32!();
310    let mode = read_u32!();
311    let uid = read_u32!();
312    let gid = read_u32!();
313    let size = read_u32!();
314
315    let oid = ObjectId::from_bytes(&data[pos..pos + 20])?;
316    pos += 20;
317
318    let flags = u16::from_be_bytes(
319        data[pos..pos + 2]
320            .try_into()
321            .map_err(|_| Error::IndexError("truncated flags".to_owned()))?,
322    );
323    pos += 2;
324
325    let flags_extended = if version >= 3 && flags & 0x4000 != 0 {
326        let fe = u16::from_be_bytes(
327            data[pos..pos + 2]
328                .try_into()
329                .map_err(|_| Error::IndexError("truncated extended flags".to_owned()))?,
330        );
331        pos += 2;
332        Some(fe)
333    } else {
334        None
335    };
336
337    // Path: null-terminated
338    let nul = data[pos..]
339        .iter()
340        .position(|&b| b == 0)
341        .ok_or_else(|| Error::IndexError("entry path missing NUL terminator".to_owned()))?;
342    let path = data[pos..pos + nul].to_vec();
343    pos += nul + 1;
344
345    // Pad to 8-byte boundary (from start of entry)
346    let entry_start = 0usize;
347    let entry_len = pos - entry_start;
348    let padded = (entry_len + 7) & !7;
349    let padding = padded.saturating_sub(entry_len);
350    pos += padding;
351
352    Ok((
353        IndexEntry {
354            ctime_sec,
355            ctime_nsec,
356            mtime_sec,
357            mtime_nsec,
358            dev,
359            ino,
360            mode,
361            uid,
362            gid,
363            size,
364            oid,
365            flags,
366            flags_extended,
367            path,
368        },
369        pos,
370    ))
371}
372
373/// Serialise a single index entry into `out`.
374fn serialize_entry(entry: &IndexEntry, version: u32, out: &mut Vec<u8>) {
375    let start = out.len();
376
377    let write_u32 = |out: &mut Vec<u8>, v: u32| out.extend_from_slice(&v.to_be_bytes());
378
379    write_u32(out, entry.ctime_sec);
380    write_u32(out, entry.ctime_nsec);
381    write_u32(out, entry.mtime_sec);
382    write_u32(out, entry.mtime_nsec);
383    write_u32(out, entry.dev);
384    write_u32(out, entry.ino);
385    write_u32(out, entry.mode);
386    write_u32(out, entry.uid);
387    write_u32(out, entry.gid);
388    write_u32(out, entry.size);
389    out.extend_from_slice(entry.oid.as_bytes());
390
391    // Set or clear the extended-flags bit in flags
392    let mut flags = entry.flags;
393    if version >= 3 && entry.flags_extended.is_some() {
394        flags |= 0x4000;
395    } else {
396        flags &= !0x4000;
397    }
398    // Overwrite path length bits (bottom 12)
399    let path_len = entry.path.len().min(0xFFF) as u16;
400    flags = (flags & 0xF000) | path_len;
401    out.extend_from_slice(&flags.to_be_bytes());
402
403    if version >= 3 {
404        if let Some(fe) = entry.flags_extended {
405            out.extend_from_slice(&fe.to_be_bytes());
406        }
407    }
408
409    out.extend_from_slice(&entry.path);
410    out.push(0);
411
412    // Pad to 8-byte boundary
413    let entry_len = out.len() - start;
414    let padded = (entry_len + 7) & !7;
415    let padding = padded - entry_len;
416    for _ in 0..padding {
417        out.push(0);
418    }
419}
420
421/// Build an [`IndexEntry`] by stat-ing a file on disk.
422///
423/// # Parameters
424///
425/// - `path` — absolute path to the file.
426/// - `rel_path` — path relative to the repo root (stored in the index).
427/// - `oid` — the object ID of the file's blob.
428/// - `mode` — file mode (use [`MODE_REGULAR`], [`MODE_EXECUTABLE`], etc.).
429///
430/// # Errors
431///
432/// Returns [`Error::Io`] if `stat` fails.
433pub fn entry_from_stat(
434    path: &Path,
435    rel_path: &[u8],
436    oid: ObjectId,
437    mode: u32,
438) -> Result<IndexEntry> {
439    use std::os::unix::fs::MetadataExt;
440    let meta = fs::symlink_metadata(path)?;
441    Ok(entry_from_metadata(&meta, rel_path, oid, mode))
442}
443
444/// Build an [`IndexEntry`] from already-obtained metadata.
445///
446/// This avoids a redundant `stat()` call when the caller already has
447/// filesystem metadata (e.g. from `symlink_metadata`).
448#[must_use]
449pub fn entry_from_metadata(
450    meta: &fs::Metadata,
451    rel_path: &[u8],
452    oid: ObjectId,
453    mode: u32,
454) -> IndexEntry {
455    use std::os::unix::fs::MetadataExt;
456    IndexEntry {
457        ctime_sec: meta.ctime() as u32,
458        ctime_nsec: meta.ctime_nsec() as u32,
459        mtime_sec: meta.mtime() as u32,
460        mtime_nsec: meta.mtime_nsec() as u32,
461        dev: meta.dev() as u32,
462        ino: meta.ino() as u32,
463        mode,
464        uid: meta.uid(),
465        gid: meta.gid(),
466        size: meta.size() as u32,
467        oid,
468        flags: rel_path.len().min(0xFFF) as u16,
469        flags_extended: None,
470        path: rel_path.to_vec(),
471    }
472}
473
474/// Convert a `stat` mode to the Git index mode, normalised to one of the
475/// known constants ([`MODE_REGULAR`], [`MODE_EXECUTABLE`], [`MODE_SYMLINK`]).
476///
477/// Only the `S_IFMT` and execute bits are inspected; all other permission bits
478/// are discarded (Git stores only 644 or 755 for regular files).
479///
480/// # Parameters
481///
482/// - `raw_mode` — the raw `st_mode` value from `stat(2)`.
483#[must_use]
484pub fn normalize_mode(raw_mode: u32) -> u32 {
485    const S_IFMT: u32 = 0o170000;
486    const S_IFLNK: u32 = 0o120000;
487    const S_IFREG: u32 = 0o100000;
488
489    let fmt = raw_mode & S_IFMT;
490    if fmt == S_IFLNK {
491        return MODE_SYMLINK;
492    }
493    if fmt == S_IFREG {
494        // Executable if any execute bit is set
495        if raw_mode & 0o111 != 0 {
496            return MODE_EXECUTABLE;
497        }
498        return MODE_REGULAR;
499    }
500    // Fallback for everything else (devices, etc.) — treat as regular
501    MODE_REGULAR
502}
503
504#[cfg(test)]
505mod tests {
506    #![allow(clippy::expect_used, clippy::unwrap_used)]
507
508    use super::*;
509    use tempfile::TempDir;
510
511    fn dummy_oid() -> ObjectId {
512        ObjectId::from_bytes(&[0u8; 20]).unwrap()
513    }
514
515    fn make_entry(path: &str) -> IndexEntry {
516        IndexEntry {
517            ctime_sec: 0,
518            ctime_nsec: 0,
519            mtime_sec: 0,
520            mtime_nsec: 0,
521            dev: 0,
522            ino: 0,
523            mode: MODE_REGULAR,
524            uid: 0,
525            gid: 0,
526            size: 0,
527            oid: dummy_oid(),
528            flags: path.len().min(0xFFF) as u16,
529            flags_extended: None,
530            path: path.as_bytes().to_vec(),
531        }
532    }
533
534    #[test]
535    fn round_trip_empty_index() {
536        let dir = TempDir::new().unwrap();
537        let path = dir.path().join("index");
538
539        let idx = Index::new();
540        idx.write(&path).unwrap();
541
542        let loaded = Index::load(&path).unwrap();
543        assert_eq!(loaded.entries.len(), 0);
544    }
545
546    #[test]
547    fn round_trip_with_entries() {
548        let dir = TempDir::new().unwrap();
549        let path = dir.path().join("index");
550
551        let mut idx = Index::new();
552        idx.add_or_replace(make_entry("foo.txt"));
553        idx.add_or_replace(make_entry("bar/baz.txt"));
554        idx.write(&path).unwrap();
555
556        let loaded = Index::load(&path).unwrap();
557        assert_eq!(loaded.entries.len(), 2);
558        assert_eq!(loaded.entries[0].path, b"bar/baz.txt");
559        assert_eq!(loaded.entries[1].path, b"foo.txt");
560    }
561}