git_odb/store_impls/dynamic/
types.rs

1use std::{
2    path::{Path, PathBuf},
3    sync::{
4        atomic::{AtomicU16, AtomicU32, AtomicUsize, Ordering},
5        Arc,
6    },
7    time::SystemTime,
8};
9
10use arc_swap::ArcSwap;
11use git_features::hash;
12
13/// An id to refer to an index file or a multipack index file
14pub type IndexId = usize;
15pub(crate) type StateId = u32;
16pub(crate) type Generation = u32;
17pub(crate) type AtomicGeneration = AtomicU32;
18
19/// A way to indicate which pack indices we have seen already and which of them are loaded, along with an idea
20/// of whether stored `PackId`s are still usable.
21#[derive(Default, Copy, Clone)]
22pub struct SlotIndexMarker {
23    /// The generation the `loaded_until_index` belongs to. Indices of different generations are completely incompatible.
24    /// This value changes once the internal representation is compacted, something that may happen only if there is no handle
25    /// requiring stable pack indices.
26    pub(crate) generation: Generation,
27    /// A unique id identifying the index state as well as all loose databases we have last observed.
28    /// If it changes in any way, the value is different.
29    pub(crate) state_id: StateId,
30}
31
32/// A way to load and refer to a pack uniquely, namespaced by their indexing mechanism, aka multi-pack or not.
33#[derive(Debug, Copy, Clone, Eq, PartialEq)]
34pub struct PackId {
35    /// This is the index in the slot map at which the packs index is located.
36    pub(crate) index: IndexId,
37    /// If the pack is in a multi-pack index, this additional index is the pack-index within the multi-pack index identified by `index`.
38    pub(crate) multipack_index: Option<git_pack::multi_index::PackIndex>,
39}
40
41impl PackId {
42    /// Returns the maximum of indices we can represent.
43    pub(crate) const fn max_indices() -> usize {
44        (1 << 15) - 1
45    }
46    /// Returns the maximum of packs we can represent if stored in a multi-index.
47    pub(crate) const fn max_packs_in_multi_index() -> git_pack::multi_index::PackIndex {
48        (1 << 16) - 1
49    }
50    /// Packs have a built-in identifier to make data structures simpler, and this method represents ourselves as such id
51    /// to be convertible back and forth. We essentially compress ourselves into a u32.
52    ///
53    /// Bit 16 is a marker to tell us if it's a multi-pack or not, the ones before are the index file itself, the ones after
54    /// are used to encode the pack index within the multi-pack.
55    pub(crate) fn to_intrinsic_pack_id(self) -> git_pack::data::Id {
56        assert!(self.index < (1 << 15), "There shouldn't be more than 2^15 indices");
57        match self.multipack_index {
58            None => self.index as git_pack::data::Id,
59            Some(midx) => {
60                assert!(
61                    midx <= Self::max_packs_in_multi_index(),
62                    "There shouldn't be more than 2^16 packs per multi-index"
63                );
64                ((self.index as git_pack::data::Id | 1 << 15) | midx << 16) as git_pack::data::Id
65            }
66        }
67    }
68
69    pub(crate) fn from_intrinsic_pack_id(pack_id: git_pack::data::Id) -> Self {
70        if pack_id & (1 << 15) == 0 {
71            PackId {
72                index: (pack_id & 0x7fff) as IndexId,
73                multipack_index: None,
74            }
75        } else {
76            PackId {
77                index: (pack_id & 0x7fff) as IndexId,
78                multipack_index: Some(pack_id >> 16),
79            }
80        }
81    }
82}
83
84/// An index that changes only if the packs directory changes and its contents is re-read.
85#[derive(Default)]
86pub struct SlotMapIndex {
87    /// The index into the slot map at which we expect an index or pack file. Neither of these might be loaded yet.
88    pub(crate) slot_indices: Vec<usize>,
89    /// A list of loose object databases as resolved by their alternates file in the `object_directory`. The first entry is this objects
90    /// directory loose file database. All other entries are the loose stores of alternates.
91    /// It's in an Arc to be shared to Handles, but not to be shared across SlotMapIndices.
92    pub(crate) loose_dbs: Arc<Vec<crate::loose::Store>>,
93
94    /// A static value that doesn't ever change for a particular clone of this index.
95    pub(crate) generation: Generation,
96    /// The number of indices loaded thus far when the index of the slot map was last examined, which can change as new indices are loaded
97    /// in parallel.
98    /// Shared across SlotMapIndex instances of the same generation.
99    pub(crate) next_index_to_load: Arc<AtomicUsize>,
100    /// Incremented by one up to `slot_indices.len()` once an attempt to load an index completed.
101    /// If a load failed, there will also be an increment.
102    /// Shared across SlotMapIndex instances of the same generation.
103    pub(crate) loaded_indices: Arc<AtomicUsize>,
104    /// The amount of indices that are currently being loaded.
105    /// Zero if no loading operation is currently happening, or more otherwise.
106    pub(crate) num_indices_currently_being_loaded: Arc<AtomicU16>,
107}
108
109impl SlotMapIndex {
110    pub(crate) fn state_id(self: &Arc<SlotMapIndex>) -> StateId {
111        // We let the loaded indices take part despite not being part of our own snapshot.
112        // This is to account for indices being loaded in parallel without actually changing the snapshot itself.
113        let hash = hash::crc32(&(Arc::as_ptr(self) as usize).to_be_bytes());
114        hash::crc32_update(hash, &self.loaded_indices.load(Ordering::SeqCst).to_be_bytes())
115    }
116
117    pub(crate) fn marker(self: &Arc<SlotMapIndex>) -> SlotIndexMarker {
118        SlotIndexMarker {
119            generation: self.generation,
120            state_id: self.state_id(),
121        }
122    }
123
124    /// Returns true if we already know at least one loose object db, a sign of being initialized
125    pub(crate) fn is_initialized(&self) -> bool {
126        !self.loose_dbs.is_empty()
127    }
128}
129
130#[derive(Clone)]
131pub(crate) struct OnDiskFile<T: Clone> {
132    /// The last known path of the file
133    path: Arc<PathBuf>,
134    /// the time the file was last modified
135    mtime: SystemTime,
136    state: OnDiskFileState<T>,
137}
138
139#[derive(Clone)]
140pub(crate) enum OnDiskFileState<T: Clone> {
141    /// The file is on disk and can be loaded from there.
142    Unloaded,
143    Loaded(T),
144    /// The file was loaded, but appeared to be missing on disk after reconciling our state with what's on disk.
145    /// As there were handles that required pack-id stability we had to keep the item to allow finding it on later
146    /// lookups.
147    Garbage(T),
148    /// File is missing on disk and could not be loaded when we tried or turned missing after reconciling our state.
149    Missing,
150}
151
152impl<T: Clone> OnDiskFile<T> {
153    pub fn path(&self) -> &Path {
154        &self.path
155    }
156    /// Return true if we hold a memory map of the file already.
157    pub fn is_loaded(&self) -> bool {
158        matches!(self.state, OnDiskFileState::Loaded(_) | OnDiskFileState::Garbage(_))
159    }
160
161    /// Return true if we are to be collected as garbage
162    pub fn is_disposable(&self) -> bool {
163        matches!(self.state, OnDiskFileState::Garbage(_) | OnDiskFileState::Missing)
164    }
165
166    // On error, always declare the file missing and return an error.
167    pub(crate) fn load_strict(&mut self, load: impl FnOnce(&Path) -> std::io::Result<T>) -> std::io::Result<()> {
168        use OnDiskFileState::*;
169        match self.state {
170            Unloaded | Missing => match load(&self.path) {
171                Ok(v) => {
172                    self.state = Loaded(v);
173                    Ok(())
174                }
175                Err(err) => {
176                    // TODO: Should be provide more information? We don't even know what exactly failed right now, degenerating information.
177                    self.state = Missing;
178                    Err(err)
179                }
180            },
181            Loaded(_) | Garbage(_) => Ok(()),
182        }
183    }
184    /// If the file is missing, we don't consider this failure but instead return Ok(None) to allow recovery.
185    /// when we know that loading is necessary. This also works around borrow check, which is a nice coincidence.
186    pub fn load_with_recovery(&mut self, load: impl FnOnce(&Path) -> std::io::Result<T>) -> std::io::Result<Option<T>> {
187        use OnDiskFileState::*;
188        match &mut self.state {
189            Loaded(v) | Garbage(v) => Ok(Some(v.clone())),
190            Missing => Ok(None),
191            Unloaded => match load(&self.path) {
192                Ok(v) => {
193                    self.state = OnDiskFileState::Loaded(v.clone());
194                    Ok(Some(v))
195                }
196                Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
197                    self.state = OnDiskFileState::Missing;
198                    Ok(None)
199                }
200                Err(err) => Err(err),
201            },
202        }
203    }
204
205    pub fn loaded(&self) -> Option<&T> {
206        use OnDiskFileState::*;
207        match &self.state {
208            Loaded(v) | Garbage(v) => Some(v),
209            Unloaded | Missing => None,
210        }
211    }
212
213    pub fn put_back(&mut self) {
214        match std::mem::replace(&mut self.state, OnDiskFileState::Missing) {
215            OnDiskFileState::Garbage(v) => self.state = OnDiskFileState::Loaded(v),
216            OnDiskFileState::Missing => self.state = OnDiskFileState::Unloaded,
217            other @ OnDiskFileState::Loaded(_) | other @ OnDiskFileState::Unloaded => self.state = other,
218        }
219    }
220
221    pub fn trash(&mut self) {
222        match std::mem::replace(&mut self.state, OnDiskFileState::Missing) {
223            OnDiskFileState::Loaded(v) => self.state = OnDiskFileState::Garbage(v),
224            other @ OnDiskFileState::Garbage(_)
225            | other @ OnDiskFileState::Unloaded
226            | other @ OnDiskFileState::Missing => self.state = other,
227        }
228    }
229}
230
231#[derive(Clone)]
232pub(crate) struct IndexFileBundle {
233    pub index: OnDiskFile<Arc<git_pack::index::File>>,
234    pub data: OnDiskFile<Arc<git_pack::data::File>>,
235}
236
237#[derive(Clone)]
238pub(crate) struct MultiIndexFileBundle {
239    pub multi_index: OnDiskFile<Arc<git_pack::multi_index::File>>,
240    pub data: Vec<OnDiskFile<Arc<git_pack::data::File>>>,
241}
242
243#[derive(Clone)]
244pub(crate) enum IndexAndPacks {
245    Index(IndexFileBundle),
246    /// Note that there can only be one multi-pack file per repository, but thanks to git alternates, there can be multiple overall.
247    MultiIndex(MultiIndexFileBundle),
248}
249
250impl IndexAndPacks {
251    pub(crate) fn index_path(&self) -> &Path {
252        match self {
253            IndexAndPacks::Index(index) => &index.index.path,
254            IndexAndPacks::MultiIndex(index) => &index.multi_index.path,
255        }
256    }
257
258    pub(crate) fn mtime(&self) -> SystemTime {
259        match self {
260            IndexAndPacks::Index(index) => index.index.mtime,
261            IndexAndPacks::MultiIndex(index) => index.multi_index.mtime,
262        }
263    }
264
265    /// If we are garbage, put ourselves into the loaded state. Otherwise put ourselves back to unloaded.
266    pub(crate) fn put_back(&mut self) {
267        match self {
268            IndexAndPacks::Index(bundle) => {
269                bundle.index.put_back();
270                bundle.data.put_back();
271            }
272            IndexAndPacks::MultiIndex(bundle) => {
273                bundle.multi_index.put_back();
274                for data in &mut bundle.data {
275                    data.put_back();
276                }
277            }
278        }
279    }
280
281    // The inverse of `put_back()`, by trashing the content.
282    pub(crate) fn trash(&mut self) {
283        match self {
284            IndexAndPacks::Index(bundle) => {
285                bundle.index.trash();
286                bundle.data.trash();
287            }
288            IndexAndPacks::MultiIndex(bundle) => {
289                bundle.multi_index.trash();
290                for data in &mut bundle.data {
291                    data.trash();
292                }
293            }
294        }
295    }
296
297    pub(crate) fn index_is_loaded(&self) -> bool {
298        match self {
299            Self::Index(bundle) => bundle.index.is_loaded(),
300            Self::MultiIndex(bundle) => bundle.multi_index.is_loaded(),
301        }
302    }
303
304    pub(crate) fn is_disposable(&self) -> bool {
305        match self {
306            Self::Index(bundle) => bundle.index.is_disposable() || bundle.data.is_disposable(),
307            Self::MultiIndex(bundle) => {
308                bundle.multi_index.is_disposable() || bundle.data.iter().any(|odf| odf.is_disposable())
309            }
310        }
311    }
312
313    pub(crate) fn load_index(&mut self, object_hash: git_hash::Kind) -> std::io::Result<()> {
314        match self {
315            IndexAndPacks::Index(bundle) => bundle.index.load_strict(|path| {
316                git_pack::index::File::at(path, object_hash)
317                    .map(Arc::new)
318                    .map_err(|err| match err {
319                        git_pack::index::init::Error::Io { source, .. } => source,
320                        err => std::io::Error::new(std::io::ErrorKind::Other, err),
321                    })
322            }),
323            IndexAndPacks::MultiIndex(bundle) => {
324                bundle.multi_index.load_strict(|path| {
325                    git_pack::multi_index::File::at(path)
326                        .map(Arc::new)
327                        .map_err(|err| match err {
328                            git_pack::multi_index::init::Error::Io { source, .. } => source,
329                            err => std::io::Error::new(std::io::ErrorKind::Other, err),
330                        })
331                })?;
332                if let Some(multi_index) = bundle.multi_index.loaded() {
333                    bundle.data = Self::index_names_to_pack_paths(multi_index);
334                }
335                Ok(())
336            }
337        }
338    }
339
340    pub(crate) fn new_single(index_path: PathBuf, mtime: SystemTime) -> Self {
341        let data_path = index_path.with_extension("pack");
342        Self::Index(IndexFileBundle {
343            index: OnDiskFile {
344                path: index_path.into(),
345                state: OnDiskFileState::Unloaded,
346                mtime,
347            },
348            data: OnDiskFile {
349                path: data_path.into(),
350                state: OnDiskFileState::Unloaded,
351                mtime,
352            },
353        })
354    }
355
356    pub(crate) fn new_multi_from_open_file(multi_index: Arc<git_pack::multi_index::File>, mtime: SystemTime) -> Self {
357        let data = Self::index_names_to_pack_paths(&multi_index);
358        Self::MultiIndex(MultiIndexFileBundle {
359            multi_index: OnDiskFile {
360                path: Arc::new(multi_index.path().to_owned()),
361                state: OnDiskFileState::Loaded(multi_index),
362                mtime,
363            },
364            data,
365        })
366    }
367
368    fn index_names_to_pack_paths(
369        multi_index: &git_pack::multi_index::File,
370    ) -> Vec<OnDiskFile<Arc<git_pack::data::File>>> {
371        let parent_dir = multi_index.path().parent().expect("parent present");
372        let data = multi_index
373            .index_names()
374            .iter()
375            .map(|idx| OnDiskFile {
376                path: parent_dir.join(idx.with_extension("pack")).into(),
377                state: OnDiskFileState::Unloaded,
378                mtime: SystemTime::UNIX_EPOCH,
379            })
380            .collect();
381        data
382    }
383}
384
385#[derive(Default)]
386pub(crate) struct MutableIndexAndPack {
387    pub(crate) files: ArcSwap<Option<IndexAndPacks>>,
388    pub(crate) write: parking_lot::Mutex<()>,
389    /// The generation required at least to read this slot. If these mismatch, the caller is likely referring to a now changed slot
390    /// that has different content under the same id.
391    /// Must only be changed when the write lock is held.
392    pub(crate) generation: AtomicGeneration,
393}
394
395/// A snapshot about resource usage.
396#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
397#[cfg_attr(feature = "serde1", derive(serde::Serialize, serde::Deserialize))]
398pub struct Metrics {
399    /// The total amount of handles which can be used to access object information.
400    pub num_handles: usize,
401    /// The amount of refreshes performed to reconcile with the ODB state on disk.
402    pub num_refreshes: usize,
403    /// The amount of indices that are currently open and will be returned to handles.
404    pub open_reachable_indices: usize,
405    /// The amount of reachable, known indices, which aren't opened yet.
406    pub known_reachable_indices: usize,
407    /// The amount of packs which are open in memory and will be returned to handles.
408    pub open_reachable_packs: usize,
409    /// The amount of packs that are reachable and will be returned to handles. They aren't open yet.
410    pub known_packs: usize,
411    /// The amount of slots which are empty.
412    ///
413    /// Over time these will fill, but they can be emptied as files are removed from disk.
414    pub unused_slots: usize,
415    /// Unreachable indices are still using slots, but aren't returned to new handles anymore unless they still happen to
416    /// know their id.
417    ///
418    /// This allows to keep files available while they are still potentially required for operations like pack generation, despite
419    /// the file on disk being removed or changed.
420    pub unreachable_indices: usize,
421    /// Equivalent to `unreachable_indices`, but for mapped packed data files
422    pub unreachable_packs: usize,
423    /// The amount of loose object databases currently available for object retrieval.
424    ///
425    /// There may be more than one if 'alternates' are used.
426    pub loose_dbs: usize,
427}
428
429#[cfg(test)]
430mod tests {
431    use super::*;
432
433    mod pack_id {
434        use super::PackId;
435
436        #[test]
437        fn to_intrinsic_roundtrip() {
438            let single = PackId {
439                index: (1 << 15) - 1,
440                multipack_index: None,
441            };
442            let multi = PackId {
443                index: (1 << 15) - 1,
444                multipack_index: Some((1 << 16) - 1),
445            };
446            assert_eq!(PackId::from_intrinsic_pack_id(single.to_intrinsic_pack_id()), single);
447            assert_eq!(PackId::from_intrinsic_pack_id(multi.to_intrinsic_pack_id()), multi);
448        }
449
450        #[test]
451        #[should_panic]
452        fn max_supported_index_count() {
453            PackId {
454                index: 1 << 15,
455                multipack_index: None,
456            }
457            .to_intrinsic_pack_id();
458        }
459    }
460}