gix_index/access/
mod.rs

1use std::{cmp::Ordering, ops::Range};
2
3use bstr::{BStr, ByteSlice, ByteVec};
4use filetime::FileTime;
5
6use crate::{
7    entry,
8    entry::{Stage, StageRaw},
9    extension, AccelerateLookup, Entry, PathStorage, PathStorageRef, State, Version,
10};
11
12// TODO: integrate this somehow, somewhere, depending on later usage.
13#[allow(dead_code)]
14mod sparse;
15
16/// General information and entries
17impl State {
18    /// Return the version used to store this state's information on disk.
19    pub fn version(&self) -> Version {
20        self.version
21    }
22
23    /// Returns time at which the state was created, indicating its freshness compared to other files on disk.
24    pub fn timestamp(&self) -> FileTime {
25        self.timestamp
26    }
27
28    /// Updates the timestamp of this state, indicating its freshness compared to other files on disk.
29    ///
30    /// Be careful about using this as setting a timestamp without correctly updating the index
31    /// **will cause (file system) race conditions** see racy-git.txt in the git documentation
32    /// for more details.
33    pub fn set_timestamp(&mut self, timestamp: FileTime) {
34        self.timestamp = timestamp;
35    }
36
37    /// Return the kind of hashes used in this instance.
38    pub fn object_hash(&self) -> gix_hash::Kind {
39        self.object_hash
40    }
41
42    /// Return our entries
43    pub fn entries(&self) -> &[Entry] {
44        &self.entries
45    }
46    /// Return our path backing, the place which keeps all paths one after another, with entries storing only the range to access them.
47    pub fn path_backing(&self) -> &PathStorageRef {
48        &self.path_backing
49    }
50
51    /// Runs `filter_map` on all entries, returning an iterator over all paths along with the result of `filter_map`.
52    pub fn entries_with_paths_by_filter_map<'a, T>(
53        &'a self,
54        mut filter_map: impl FnMut(&'a BStr, &Entry) -> Option<T> + 'a,
55    ) -> impl Iterator<Item = (&'a BStr, T)> + 'a {
56        self.entries.iter().filter_map(move |e| {
57            let p = e.path(self);
58            filter_map(p, e).map(|t| (p, t))
59        })
60    }
61    /// Return mutable entries along with their path, as obtained from `backing`.
62    pub fn entries_mut_with_paths_in<'state, 'backing>(
63        &'state mut self,
64        backing: &'backing PathStorageRef,
65    ) -> impl Iterator<Item = (&'state mut Entry, &'backing BStr)> {
66        self.entries.iter_mut().map(move |e| {
67            let path = backing[e.path.clone()].as_bstr();
68            (e, path)
69        })
70    }
71
72    /// Find the entry index in [`entries()`][State::entries()] matching the given repository-relative
73    /// `path` and `stage`, or `None`.
74    ///
75    /// Use the index for accessing multiple stages if they exists, but at least the single matching entry.
76    pub fn entry_index_by_path_and_stage(&self, path: &BStr, stage: entry::Stage) -> Option<usize> {
77        let mut stage_cmp = Ordering::Equal;
78        let idx = self
79            .entries
80            .binary_search_by(|e| {
81                let res = e.path(self).cmp(path);
82                if res.is_eq() {
83                    stage_cmp = e.stage().cmp(&stage);
84                }
85                res
86            })
87            .ok()?;
88        self.entry_index_by_idx_and_stage(path, idx, stage as StageRaw, stage_cmp)
89    }
90
91    /// Walk as far in `direction` as possible, with [`Ordering::Greater`] towards higher stages, and [`Ordering::Less`]
92    /// towards lower stages, and return the lowest or highest seen stage.
93    /// Return `None` if there is no greater or smaller stage.
94    fn walk_entry_stages(&self, path: &BStr, base: usize, direction: Ordering) -> Option<usize> {
95        match direction {
96            Ordering::Greater => self
97                .entries
98                .get(base + 1..)?
99                .iter()
100                .enumerate()
101                .take_while(|(_, e)| e.path(self) == path)
102                .last()
103                .map(|(idx, _)| base + 1 + idx),
104            Ordering::Equal => Some(base),
105            Ordering::Less => self.entries[..base]
106                .iter()
107                .enumerate()
108                .rev()
109                .take_while(|(_, e)| e.path(self) == path)
110                .last()
111                .map(|(idx, _)| idx),
112        }
113    }
114
115    fn entry_index_by_idx_and_stage(
116        &self,
117        path: &BStr,
118        idx: usize,
119        wanted_stage: entry::StageRaw,
120        stage_cmp: Ordering,
121    ) -> Option<usize> {
122        match stage_cmp {
123            Ordering::Greater => self.entries[..idx]
124                .iter()
125                .enumerate()
126                .rev()
127                .take_while(|(_, e)| e.path(self) == path)
128                .find_map(|(idx, e)| (e.stage_raw() == wanted_stage).then_some(idx)),
129            Ordering::Equal => Some(idx),
130            Ordering::Less => self
131                .entries
132                .get(idx + 1..)?
133                .iter()
134                .enumerate()
135                .take_while(|(_, e)| e.path(self) == path)
136                .find_map(|(ofs, e)| (e.stage_raw() == wanted_stage).then_some(idx + ofs + 1)),
137        }
138    }
139
140    /// Return a data structure to help with case-insensitive lookups.
141    ///
142    /// It's required perform any case-insensitive lookup.
143    /// TODO: needs multi-threaded insertion, raw-table to have multiple locks depending on bucket.
144    pub fn prepare_icase_backing(&self) -> AccelerateLookup<'_> {
145        let _span = gix_features::trace::detail!("prepare_icase_backing", entries = self.entries.len());
146        let mut out = AccelerateLookup::with_capacity(self.entries.len());
147        for entry in &self.entries {
148            let entry_path = entry.path(self);
149            let hash = AccelerateLookup::icase_hash(entry_path);
150            out.icase_entries
151                .insert_unique(hash, entry, |e| AccelerateLookup::icase_hash(e.path(self)));
152
153            let mut last_pos = entry_path.len();
154            while let Some(slash_idx) = entry_path[..last_pos].rfind_byte(b'/') {
155                let dir = entry_path[..slash_idx].as_bstr();
156                last_pos = slash_idx;
157                let dir_range = entry.path.start..(entry.path.start + dir.len());
158
159                let hash = AccelerateLookup::icase_hash(dir);
160                if out
161                    .icase_dirs
162                    .find(hash, |dir| {
163                        dir.path(self) == self.path_backing[dir_range.clone()].as_bstr()
164                    })
165                    .is_none()
166                {
167                    out.icase_dirs.insert_unique(
168                        hash,
169                        crate::DirEntry {
170                            entry,
171                            dir_end: dir_range.end,
172                        },
173                        |dir| AccelerateLookup::icase_hash(dir.path(self)),
174                    );
175                } else {
176                    break;
177                }
178            }
179        }
180        gix_features::trace::debug!(directories = out.icase_dirs.len(), "stored directories");
181        out
182    }
183
184    /// Return the entry at `path` that is at the lowest available stage, using `lookup` for acceleration.
185    /// It must have been created from this instance, and was ideally kept up-to-date with it.
186    ///
187    /// If `ignore_case` is `true`, a case-insensitive (ASCII-folding only) search will be performed.
188    pub fn entry_by_path_icase<'a>(
189        &'a self,
190        path: &BStr,
191        ignore_case: bool,
192        lookup: &AccelerateLookup<'a>,
193    ) -> Option<&'a Entry> {
194        lookup
195            .icase_entries
196            .find(AccelerateLookup::icase_hash(path), |e| {
197                let entry_path = e.path(self);
198                if entry_path == path {
199                    return true;
200                }
201                if !ignore_case {
202                    return false;
203                }
204                entry_path.eq_ignore_ascii_case(path)
205            })
206            .copied()
207    }
208
209    /// Return the entry (at any stage) that is inside of `directory`, or `None`,
210    /// using `lookup` for acceleration.
211    /// Note that submodules are not detected as directories and the user should
212    /// make another call to [`entry_by_path_icase()`](Self::entry_by_path_icase) to check for this
213    /// possibility. Doing so might also reveal a sparse directory.
214    ///
215    /// If `ignore_case` is set
216    pub fn entry_closest_to_directory_icase<'a>(
217        &'a self,
218        directory: &BStr,
219        ignore_case: bool,
220        lookup: &AccelerateLookup<'a>,
221    ) -> Option<&'a Entry> {
222        lookup
223            .icase_dirs
224            .find(AccelerateLookup::icase_hash(directory), |dir| {
225                let dir_path = dir.path(self);
226                if dir_path == directory {
227                    return true;
228                }
229                if !ignore_case {
230                    return false;
231                }
232                dir_path.eq_ignore_ascii_case(directory)
233            })
234            .map(|dir| dir.entry)
235    }
236
237    /// Return the entry (at any stage) that is inside of `directory`, or `None`.
238    /// Note that submodules are not detected as directories and the user should
239    /// make another call to [`entry_by_path_icase()`](Self::entry_by_path_icase) to check for this
240    /// possibility. Doing so might also reveal a sparse directory.
241    ///
242    /// Note that this is a case-sensitive search.
243    pub fn entry_closest_to_directory(&self, directory: &BStr) -> Option<&Entry> {
244        let idx = self.entry_index_by_path(directory).err()?;
245        for entry in &self.entries[idx..] {
246            let path = entry.path(self);
247            if path.get(..directory.len())? != directory {
248                break;
249            }
250            let dir_char = path.get(directory.len())?;
251            if *dir_char > b'/' {
252                break;
253            }
254            if *dir_char < b'/' {
255                continue;
256            }
257            return Some(entry);
258        }
259        None
260    }
261
262    /// Find the entry index in [`entries()[..upper_bound]`][State::entries()] matching the given repository-relative
263    /// `path` and `stage`, or `None`.
264    ///
265    /// Use the index for accessing multiple stages if they exists, but at least the single matching entry.
266    ///
267    /// # Panics
268    ///
269    /// If `upper_bound` is out of bounds of our entries array.
270    pub fn entry_index_by_path_and_stage_bounded(
271        &self,
272        path: &BStr,
273        stage: entry::Stage,
274        upper_bound: usize,
275    ) -> Option<usize> {
276        self.entries[..upper_bound]
277            .binary_search_by(|e| e.path(self).cmp(path).then_with(|| e.stage().cmp(&stage)))
278            .ok()
279    }
280
281    /// Like [`entry_index_by_path_and_stage()`](State::entry_index_by_path_and_stage()),
282    /// but returns the entry instead of the index.
283    pub fn entry_by_path_and_stage(&self, path: &BStr, stage: entry::Stage) -> Option<&Entry> {
284        self.entry_index_by_path_and_stage(path, stage)
285            .map(|idx| &self.entries[idx])
286    }
287
288    /// Return the entry at `path` that is either at stage 0, or at stage 2 (ours) in case of a merge conflict.
289    ///
290    /// Using this method is more efficient in comparison to doing two searches, one for stage 0 and one for stage 2.
291    pub fn entry_by_path(&self, path: &BStr) -> Option<&Entry> {
292        let mut stage_at_index = 0;
293        let idx = self
294            .entries
295            .binary_search_by(|e| {
296                let res = e.path(self).cmp(path);
297                if res.is_eq() {
298                    stage_at_index = e.stage_raw();
299                }
300                res
301            })
302            .ok()?;
303        let idx = if stage_at_index == 0 || stage_at_index == 2 {
304            idx
305        } else {
306            self.entry_index_by_idx_and_stage(path, idx, Stage::Ours as StageRaw, stage_at_index.cmp(&2))?
307        };
308        Some(&self.entries[idx])
309    }
310
311    /// Return the index at `Ok(index)` where the entry matching `path` (in any stage) can be found, or return
312    /// `Err(index)` to indicate the insertion position at which an entry with `path` would fit in.
313    pub fn entry_index_by_path(&self, path: &BStr) -> Result<usize, usize> {
314        self.entries.binary_search_by(|e| e.path(self).cmp(path))
315    }
316
317    /// Return the slice of entries which all share the same `prefix`, or `None` if there isn't a single such entry.
318    ///
319    /// If `prefix` is empty, all entries are returned.
320    pub fn prefixed_entries(&self, prefix: &BStr) -> Option<&[Entry]> {
321        self.prefixed_entries_range(prefix).map(|range| &self.entries[range])
322    }
323
324    /// Return the range of entries which all share the same `prefix`, or `None` if there isn't a single such entry.
325    ///
326    /// If `prefix` is empty, the range will include all entries.
327    pub fn prefixed_entries_range(&self, prefix: &BStr) -> Option<Range<usize>> {
328        if prefix.is_empty() {
329            return Some(0..self.entries.len());
330        }
331        let prefix_len = prefix.len();
332        let mut low = self.entries.partition_point(|e| {
333            e.path(self)
334                .get(..prefix_len)
335                .map_or_else(|| e.path(self) <= &prefix[..e.path.len()], |p| p < prefix)
336        });
337        let mut high =
338            low + self.entries[low..].partition_point(|e| e.path(self).get(..prefix_len).is_some_and(|p| p <= prefix));
339
340        let low_entry = &self.entries.get(low)?;
341        if low_entry.stage_raw() != 0 {
342            low = self
343                .walk_entry_stages(low_entry.path(self), low, Ordering::Less)
344                .unwrap_or(low);
345        }
346        if let Some(high_entry) = self.entries.get(high) {
347            if high_entry.stage_raw() != 0 {
348                high = self
349                    .walk_entry_stages(high_entry.path(self), high, Ordering::Less)
350                    .unwrap_or(high);
351            }
352        }
353        (low != high).then_some(low..high)
354    }
355
356    /// Return the entry at `idx` or _panic_ if the index is out of bounds.
357    ///
358    /// The `idx` is typically returned by [`entry_by_path_and_stage()`][State::entry_by_path_and_stage()].
359    pub fn entry(&self, idx: usize) -> &Entry {
360        &self.entries[idx]
361    }
362
363    /// Returns a boolean value indicating whether the index is sparse or not.
364    ///
365    /// An index is sparse if it contains at least one [`Mode::DIR`][entry::Mode::DIR] entry.
366    pub fn is_sparse(&self) -> bool {
367        self.is_sparse
368    }
369
370    /// Return the range of entries that exactly match the given `path`, in all available stages, or `None` if no entry with such
371    /// path exists.
372    ///
373    /// The range can be used to access the respective entries via [`entries()`](Self::entries()) or [`entries_mut()](Self::entries_mut()).
374    pub fn entry_range(&self, path: &BStr) -> Option<Range<usize>> {
375        let mut stage_at_index = 0;
376        let idx = self
377            .entries
378            .binary_search_by(|e| {
379                let res = e.path(self).cmp(path);
380                if res.is_eq() {
381                    stage_at_index = e.stage_raw();
382                }
383                res
384            })
385            .ok()?;
386
387        let (start, end) = (
388            self.walk_entry_stages(path, idx, Ordering::Less).unwrap_or(idx),
389            self.walk_entry_stages(path, idx, Ordering::Greater).unwrap_or(idx) + 1,
390        );
391        Some(start..end)
392    }
393}
394
395impl AccelerateLookup<'_> {
396    fn with_capacity(cap: usize) -> Self {
397        let ratio_of_entries_to_dirs_in_webkit = 20; // 400k entries and 20k dirs
398        Self {
399            icase_entries: hashbrown::HashTable::with_capacity(cap),
400            icase_dirs: hashbrown::HashTable::with_capacity(cap / ratio_of_entries_to_dirs_in_webkit),
401        }
402    }
403    fn icase_hash(data: &BStr) -> u64 {
404        use std::hash::Hasher;
405        let mut hasher = fnv::FnvHasher::default();
406        for b in data.as_bytes() {
407            hasher.write_u8(b.to_ascii_lowercase());
408        }
409        hasher.finish()
410    }
411}
412
413/// Mutation
414impl State {
415    /// After usage of the storage obtained by [`take_path_backing()`][Self::take_path_backing()], return it here.
416    /// Note that it must not be empty.
417    pub fn return_path_backing(&mut self, backing: PathStorage) {
418        debug_assert!(
419            self.path_backing.is_empty(),
420            "BUG: return path backing only after taking it, once"
421        );
422        self.path_backing = backing;
423    }
424
425    /// Return mutable entries in a slice.
426    pub fn entries_mut(&mut self) -> &mut [Entry] {
427        &mut self.entries
428    }
429
430    /// Return a writable slice to entries and read-access to their path storage at the same time.
431    pub fn entries_mut_and_pathbacking(&mut self) -> (&mut [Entry], &PathStorageRef) {
432        (&mut self.entries, &self.path_backing)
433    }
434
435    /// Return mutable entries along with their paths in an iterator.
436    pub fn entries_mut_with_paths(&mut self) -> impl Iterator<Item = (&mut Entry, &BStr)> {
437        let paths = &self.path_backing;
438        self.entries.iter_mut().map(move |e| {
439            let path = paths[e.path.clone()].as_bstr();
440            (e, path)
441        })
442    }
443
444    /// Return all parts that relate to entries, which includes path storage.
445    ///
446    /// This can be useful for obtaining a standalone, boxable iterator
447    pub fn into_entries(self) -> (Vec<Entry>, PathStorage) {
448        (self.entries, self.path_backing)
449    }
450
451    /// Sometimes it's needed to remove the path backing to allow certain mutation to happen in the state while supporting reading the entry's
452    /// path.
453    pub fn take_path_backing(&mut self) -> PathStorage {
454        assert_eq!(
455            self.entries.is_empty(),
456            self.path_backing.is_empty(),
457            "BUG: cannot take out backing multiple times"
458        );
459        std::mem::take(&mut self.path_backing)
460    }
461
462    /// Like [`entry_index_by_path_and_stage()`][State::entry_index_by_path_and_stage()],
463    /// but returns the mutable entry instead of the index.
464    pub fn entry_mut_by_path_and_stage(&mut self, path: &BStr, stage: entry::Stage) -> Option<&mut Entry> {
465        self.entry_index_by_path_and_stage(path, stage)
466            .map(|idx| &mut self.entries[idx])
467    }
468
469    /// Push a new entry containing `stat`, `id`, `flags` and `mode` and `path` to the end of our storage, without performing
470    /// any sanity checks. This means it's possible to push a new entry to the same path on the same stage and even after sorting
471    /// the entries lookups may still return the wrong one of them unless the correct binary search criteria is chosen.
472    ///
473    /// Note that this *is likely* to break invariants that will prevent further lookups by path unless
474    /// [`entry_index_by_path_and_stage_bounded()`][State::entry_index_by_path_and_stage_bounded()] is used with
475    /// the `upper_bound` being the amount of entries before the first call to this method.
476    ///
477    /// Alternatively, make sure to call [`sort_entries()`][State::sort_entries()] before entry lookup by path to restore
478    /// the invariant.
479    pub fn dangerously_push_entry(
480        &mut self,
481        stat: entry::Stat,
482        id: gix_hash::ObjectId,
483        flags: entry::Flags,
484        mode: entry::Mode,
485        path: &BStr,
486    ) {
487        let path = {
488            let path_start = self.path_backing.len();
489            self.path_backing.push_str(path);
490            path_start..self.path_backing.len()
491        };
492
493        self.entries.push(Entry {
494            stat,
495            id,
496            flags,
497            mode,
498            path,
499        });
500    }
501
502    /// Unconditionally sort entries as needed to perform lookups quickly.
503    pub fn sort_entries(&mut self) {
504        let path_backing = &self.path_backing;
505        self.entries.sort_by(|a, b| {
506            Entry::cmp_filepaths(a.path_in(path_backing), b.path_in(path_backing))
507                .then_with(|| a.stage().cmp(&b.stage()))
508        });
509    }
510
511    /// Similar to [`sort_entries()`][State::sort_entries()], but applies `compare` after comparing
512    /// by path and stage as a third criteria.
513    pub fn sort_entries_by(&mut self, mut compare: impl FnMut(&Entry, &Entry) -> Ordering) {
514        let path_backing = &self.path_backing;
515        self.entries.sort_by(|a, b| {
516            Entry::cmp_filepaths(a.path_in(path_backing), b.path_in(path_backing))
517                .then_with(|| a.stage().cmp(&b.stage()))
518                .then_with(|| compare(a, b))
519        });
520    }
521
522    /// Physically remove all entries for which `should_remove(idx, path, entry)` returns `true`, traversing them from first to last.
523    ///
524    /// Note that the memory used for the removed entries paths is not freed, as it's append-only, and
525    /// that some extensions might refer to paths which are now deleted.
526    ///
527    /// ### Performance
528    ///
529    /// To implement this operation typically, one would rather add [entry::Flags::REMOVE] to each entry to remove
530    /// them when [writing the index](Self::write_to()).
531    pub fn remove_entries(&mut self, mut should_remove: impl FnMut(usize, &BStr, &mut Entry) -> bool) {
532        let mut index = 0;
533        let paths = &self.path_backing;
534        self.entries.retain_mut(|e| {
535            let path = e.path_in(paths);
536            let res = !should_remove(index, path, e);
537            index += 1;
538            res
539        });
540    }
541
542    /// Physically remove the entry at `index`, or panic if the entry didn't exist.
543    ///
544    /// This call is typically made after looking up `index`, so it's clear that it will not panic.
545    ///
546    /// Note that the memory used for the removed entries paths is not freed, as it's append-only, and
547    /// that some extensions might refer to paths which are now deleted.
548    pub fn remove_entry_at_index(&mut self, index: usize) -> Entry {
549        self.entries.remove(index)
550    }
551}
552
553/// Extensions
554impl State {
555    /// Access the `tree` extension.
556    pub fn tree(&self) -> Option<&extension::Tree> {
557        self.tree.as_ref()
558    }
559    /// Remove the `tree` extension.
560    pub fn remove_tree(&mut self) -> Option<extension::Tree> {
561        self.tree.take()
562    }
563    /// Access the `link` extension.
564    pub fn link(&self) -> Option<&extension::Link> {
565        self.link.as_ref()
566    }
567    /// Obtain the resolve-undo extension.
568    pub fn resolve_undo(&self) -> Option<&extension::resolve_undo::Paths> {
569        self.resolve_undo.as_ref()
570    }
571    /// Remove the resolve-undo extension.
572    pub fn remove_resolve_undo(&mut self) -> Option<extension::resolve_undo::Paths> {
573        self.resolve_undo.take()
574    }
575    /// Obtain the untracked extension.
576    pub fn untracked(&self) -> Option<&extension::UntrackedCache> {
577        self.untracked.as_ref()
578    }
579    /// Obtain the fsmonitor extension.
580    pub fn fs_monitor(&self) -> Option<&extension::FsMonitor> {
581        self.fs_monitor.as_ref()
582    }
583    /// Return `true` if the end-of-index extension was present when decoding this index.
584    pub fn had_end_of_index_marker(&self) -> bool {
585        self.end_of_index_at_decode_time
586    }
587    /// Return `true` if the offset-table extension was present when decoding this index.
588    pub fn had_offset_table(&self) -> bool {
589        self.offset_table_at_decode_time
590    }
591}
592
593#[cfg(test)]
594mod tests {
595    use std::path::{Path, PathBuf};
596
597    #[test]
598    fn entry_by_path_with_conflicting_file() {
599        let file = PathBuf::from("tests")
600            .join("fixtures")
601            .join(Path::new("loose_index").join("conflicting-file.git-index"));
602        let file = crate::File::at(file, gix_hash::Kind::Sha1, false, Default::default()).expect("valid file");
603        assert_eq!(
604            file.entries().len(),
605            3,
606            "we have a set of conflict entries for a single file"
607        );
608        for idx in 0..3 {
609            for wanted_stage in 1..=3 {
610                let actual_idx = file
611                    .entry_index_by_idx_and_stage(
612                        "file".into(),
613                        idx,
614                        wanted_stage,
615                        (idx + 1).cmp(&(wanted_stage as usize)),
616                    )
617                    .expect("found");
618                assert_eq!(
619                    actual_idx + 1,
620                    wanted_stage as usize,
621                    "the index and stage have a relation, and that is upheld if we search correctly"
622                );
623            }
624        }
625    }
626}