Skip to main content

gix_index/
lib.rs

1//! ## Feature Flags
2#![cfg_attr(
3    all(doc, feature = "document-features"),
4    doc = ::document_features::document_features!()
5)]
6#![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg))]
7#![deny(unsafe_code, missing_docs)]
8
9use std::{ops::Range, path::PathBuf};
10
11use bstr::{BStr, ByteSlice};
12use filetime::FileTime;
13/// `gix_hash` is made available as it's part of the public API in various places.
14pub use gix_hash as hash;
15/// A re-export to allow calling [`State::from_tree()`].
16pub use gix_validate as validate;
17
18///
19pub mod file;
20
21///
22pub mod extension;
23
24///
25pub mod entry;
26
27mod access;
28
29///
30pub mod init;
31
32///
33pub mod decode;
34
35///
36pub mod verify;
37
38///
39pub mod write;
40
41pub mod fs;
42
43/// All known versions of a git index file.
44#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
45#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
46pub enum Version {
47    /// Supports entries and various extensions.
48    V2 = 2,
49    /// Adds support for additional flags for each entry, called extended entries.
50    V3 = 3,
51    /// Supports deltified entry paths.
52    V4 = 4,
53}
54
55/// An entry in the index, identifying a non-tree item on disk.
56#[derive(Clone, Eq, PartialEq)]
57pub struct Entry {
58    /// The filesystem stat information for the file on disk.
59    pub stat: entry::Stat,
60    /// The object id for this entry's ODB representation (assuming it's up-to-date with it).
61    pub id: gix_hash::ObjectId,
62    /// Additional flags for use in algorithms and for efficiently storing stage information.
63    pub flags: entry::Flags,
64    /// The kind of item this entry represents - it's not all blobs in the index anymore.
65    pub mode: entry::Mode,
66    /// The range to lookup in the path backing to obtain the entry path relative to the repository.
67    /// This costs additional memory but is probably worth it given that paths can stay in one big allocation.
68    path: Range<usize>,
69}
70
71/// An index file whose state was read from a file on disk.
72#[derive(Clone)]
73pub struct File {
74    /// The state containing the actual index data.
75    pub(crate) state: State,
76    /// The path from which the index was read or to which it is supposed to be written.
77    pub(crate) path: PathBuf,
78    /// The checksum of all bytes prior to the checksum itself.
79    pub(crate) checksum: Option<gix_hash::ObjectId>,
80}
81
82/// The type to use and store paths to all entries.
83pub type PathStorage = Vec<u8>;
84/// The type to use and store paths to all entries, as reference
85pub type PathStorageRef = [u8];
86
87struct DirEntry<'a> {
88    /// The first entry in the directory
89    entry: &'a Entry,
90    /// One past the last byte of the directory in the path-backing
91    dir_end: usize,
92}
93
94impl DirEntry<'_> {
95    fn path<'a>(&self, state: &'a State) -> &'a BStr {
96        let range = self.entry.path.start..self.dir_end;
97        state.path_backing[range].as_bstr()
98    }
99}
100
101/// A backing store for accelerating lookups of entries in a case-sensitive and case-insensitive manner.
102pub struct AccelerateLookup<'a> {
103    /// The entries themselves, hashed by their full icase path.
104    /// Icase-clashes are handled in order of occurrence and are all available for iteration.
105    icase_entries: hashbrown::HashTable<&'a Entry>,
106    /// Each hash in this table corresponds to a directory containing one or more entries.
107    icase_dirs: hashbrown::HashTable<DirEntry<'a>>,
108}
109
110/// An in-memory cache of a fully parsed git index file.
111///
112/// As opposed to a snapshot, it's meant to be altered and eventually be written back to disk or converted into a tree.
113/// We treat index and its state synonymous.
114///
115/// # Path Format
116///
117/// All entry paths stored by [`State`], and all path-like arguments used to access entries, are repository-relative byte
118/// strings with `/` as separator. They are not platform-native filesystem paths, must not be absolute, and must not use
119/// separators like `\`; convert worktree or absolute paths to this representation before lookup or insertion.
120///
121/// # A note on safety
122///
123/// An index (i.e. [`State`]) created by hand is not guaranteed to have valid entry paths as they are entirely controlled
124/// by the caller, without applying any level of validation.
125///
126/// This means that before using these paths to recreate files on disk, *they must be validated*.
127///
128/// It's notable that it's possible to manufacture tree objects which contain names like `.git/hooks/pre-commit`
129/// which then will look like `.git/hooks/pre-commit` in the index, which doesn't care that the name came from a single
130/// tree instead of from trees named `.git`, `hooks` and a blob named `pre-commit`. The effect is still the same - an invalid
131/// path is presented in the index and its consumer must validate each path component before usage.
132///
133/// It's recommended to do that using `gix_worktree::Stack` which has it built-in if it's created `for_checkout()`. Alternatively
134/// one can validate component names with `gix_validate::path::component()`.
135#[derive(Clone)]
136pub struct State {
137    /// The kind of object hash used when storing the underlying file.
138    ///
139    /// Empty states for example won't have a single object id, so deduction of the hash used isn't always possible.
140    object_hash: gix_hash::Kind,
141    /// The time at which the state was created, indicating its freshness compared to other files on disk.
142    ///
143    /// Note that on platforms that only have a precisions of a second for this time, we will treat all entries with the
144    /// same timestamp as this as potentially changed, checking more thoroughly if a change actually happened.
145    timestamp: FileTime,
146    version: Version,
147    entries: Vec<Entry>,
148    /// A memory area keeping all index paths, in full length, independently of the index version.
149    ///
150    /// Ranges into this storage are referred to by parts of `entries`.
151    path_backing: PathStorage,
152    /// True if one entry in the index has a special marker mode
153    is_sparse: bool,
154
155    // Extensions
156    end_of_index_at_decode_time: bool,
157    offset_table_at_decode_time: bool,
158    tree: Option<extension::Tree>,
159    link: Option<extension::Link>,
160    resolve_undo: Option<extension::resolve_undo::Paths>,
161    untracked: Option<extension::UntrackedCache>,
162    fs_monitor: Option<extension::FsMonitor>,
163}
164
165mod impls {
166    use std::fmt::{Debug, Formatter};
167
168    use crate::{Entry, PathStorageRef, State, entry::Stage};
169
170    impl Entry {
171        pub(crate) fn fmt_debug(&self, f: &mut Formatter, path_backing: Option<&PathStorageRef>) -> std::fmt::Result {
172            if f.alternate() {
173                write!(
174                    f,
175                    "{} {}{:?} mtime: {:?} {} ",
176                    match self.flags.stage() {
177                        Stage::Unconflicted => "       ",
178                        Stage::Base => "BASE   ",
179                        Stage::Ours => "OURS   ",
180                        Stage::Theirs => "THEIRS ",
181                    },
182                    if self.flags.is_empty() {
183                        "".to_string()
184                    } else {
185                        format!("{:?} ", self.flags)
186                    },
187                    self.mode,
188                    self.stat.mtime,
189                    self.id,
190                )?;
191                return match path_backing {
192                    Some(path_backing) => write!(f, "{}", self.path_in(path_backing)),
193                    None => write!(f, "{:?}", self.path),
194                };
195            }
196
197            let mut entry = f.debug_struct("Entry");
198            entry
199                .field("stat", &self.stat)
200                .field("id", &self.id)
201                .field("flags", &self.flags)
202                .field("mode", &self.mode);
203            match path_backing {
204                Some(path_backing) => entry.field("path", &self.path_in(path_backing)),
205                None => entry.field("path", &self.path),
206            }
207            .finish()
208        }
209    }
210
211    impl Debug for Entry {
212        fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
213            self.fmt_debug(f, None)
214        }
215    }
216
217    impl Debug for State {
218        fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
219            for entry in &self.entries {
220                writeln!(
221                    f,
222                    "{} {}{:?} {} {}",
223                    match entry.flags.stage() {
224                        Stage::Unconflicted => "       ",
225                        Stage::Base => "BASE   ",
226                        Stage::Ours => "OURS   ",
227                        Stage::Theirs => "THEIRS ",
228                    },
229                    if entry.flags.is_empty() {
230                        "".to_string()
231                    } else {
232                        format!("{:?} ", entry.flags)
233                    },
234                    entry.mode,
235                    entry.id,
236                    entry.path(self)
237                )?;
238            }
239            Ok(())
240        }
241    }
242}
243
244pub(crate) mod util {
245    #[inline]
246    pub fn var_int(data: &[u8]) -> Option<(u64, &[u8])> {
247        let (num, consumed) = gix_features::decode::leb64_from_read(data).ok()?;
248        let data = &data[consumed..];
249        (num, data).into()
250    }
251
252    #[inline]
253    pub fn read_u32(data: &[u8]) -> Option<(u32, &[u8])> {
254        data.split_at_checked(4)
255            .map(|(num, data)| (u32::from_be_bytes(num.try_into().unwrap()), data))
256    }
257
258    #[inline]
259    pub fn read_u64(data: &[u8]) -> Option<(u64, &[u8])> {
260        data.split_at_checked(8)
261            .map(|(num, data)| (u64::from_be_bytes(num.try_into().unwrap()), data))
262    }
263
264    #[inline]
265    pub fn from_be_u32(b: &[u8]) -> u32 {
266        u32::from_be_bytes(b.try_into().unwrap())
267    }
268
269    #[inline]
270    pub fn split_at_byte_exclusive(data: &[u8], byte: u8) -> Option<(&[u8], &[u8])> {
271        if data.len() < 2 {
272            return None;
273        }
274        data.iter().enumerate().find_map(|(idx, b)| {
275            (*b == byte).then(|| {
276                if idx == 0 {
277                    (&[] as &[u8], &data[1..])
278                } else {
279                    let (a, b) = data.split_at(idx);
280                    (a, &b[1..])
281                }
282            })
283        })
284    }
285}