gix_odb/lib.rs
1//! Git stores all of its data as _Objects_, which are data along with a hash over all data. Thus it's an
2//! object store indexed by the signature of data itself with inherent deduplication: the same data will have the same hash,
3//! and thus occupy the same space within the store.
4//!
5//! There is only one all-round object store, also known as the [`Store`], as it supports ~~everything~~ most of what git has to offer.
6//!
7//! * loose object reading and writing
8//! * access to packed objects
9//! * multiple loose objects and pack locations as gathered from `alternates` files.
10//!
11//! ## Write And Read Loose Objects
12//!
13//! ```
14//! # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
15//! # mod doctest { include!(concat!(env!("CARGO_MANIFEST_DIR"), "/tests/doctest.rs")); }
16//! use gix_object::{FindExt, Write};
17//!
18//! let (_dir, odb) = doctest::empty_store()?;
19//! let id = odb.write_buf(gix_object::Kind::Blob, b"hello")?;
20//!
21//! let mut buf = Vec::new();
22//! let object = odb.find(&id, &mut buf)?;
23//! assert_eq!(object.kind, gix_object::Kind::Blob);
24//! assert_eq!(object.data, b"hello");
25//! # Ok(()) }
26//! ```
27//!
28//! ## Inspect Headers Without Decoding The Object
29//!
30//! ```
31//! # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
32//! # mod doctest { include!(concat!(env!("CARGO_MANIFEST_DIR"), "/tests/doctest.rs")); }
33//! use gix_object::Write;
34//! use gix_odb::HeaderExt;
35//!
36//! let (_dir, odb) = doctest::empty_store()?;
37//! let id = odb.write_buf(gix_object::Kind::Blob, b"hello")?;
38//!
39//! let header = odb.header(&id)?;
40//! assert_eq!(header.kind(), gix_object::Kind::Blob);
41//! assert_eq!(header.size(), 5);
42//! # Ok(()) }
43//! ```
44//! ## Feature Flags
45#![cfg_attr(
46 all(doc, feature = "document-features"),
47 doc = ::document_features::document_features!()
48)]
49#![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg))]
50#![deny(missing_docs, rust_2018_idioms, unsafe_code)]
51
52use std::{
53 cell::RefCell,
54 path::PathBuf,
55 sync::{atomic::AtomicUsize, Arc},
56};
57
58use arc_swap::ArcSwap;
59use gix_features::{threading::OwnShared, zlib::stream::deflate};
60pub use gix_pack as pack;
61
62mod store_impls;
63pub use store_impls::{dynamic as store, loose};
64
65pub mod alternate;
66
67/// A way to access objects along with pre-configured thread-local caches for packed base objects as well as objects themselves.
68///
69/// By default, no cache will be used.
70pub struct Cache<S> {
71 /// The inner provider of trait implementations we use in conjunction with our caches.
72 ///
73 /// For calling methods on `inner`, prefer to make use of auto-dereferencing, i.e. `cache.inner_method()` instead of `cache.inner.inner_method()`.
74 inner: S,
75 // TODO: have single-threaded code-paths also for pack-creation (entries from counts) so that we can use OwnShared here
76 // instead of Arc. However, it's probably not that important as these aren't called often.
77 new_pack_cache: Option<Arc<cache::NewPackCacheFn>>,
78 new_object_cache: Option<Arc<cache::NewObjectCacheFn>>,
79 pack_cache: Option<RefCell<Box<cache::PackCache>>>,
80 object_cache: Option<RefCell<Box<cache::ObjectCache>>>,
81}
82
83///
84pub mod cache;
85
86///
87/// It can optionally compress the content, similarly to what would happen when using a [`loose::Store`].
88///
89#[derive(Clone)]
90pub struct Sink {
91 compressor: Option<RefCell<deflate::Write<std::io::Sink>>>,
92 object_hash: gix_hash::Kind,
93}
94
95/// Create a new [`Sink`] with compression disabled.
96pub fn sink(object_hash: gix_hash::Kind) -> Sink {
97 Sink {
98 compressor: None,
99 object_hash,
100 }
101}
102
103///
104pub mod memory;
105
106mod sink;
107
108///
109pub mod find;
110
111/// An object database equivalent to `/dev/null`, dropping all objects stored into it.
112mod traits;
113
114pub use traits::{Header, HeaderExt};
115
116/// A thread-local handle to access any object.
117pub type Handle = Cache<store::Handle<OwnShared<Store>>>;
118/// A thread-local handle to access any object, but thread-safe and independent of the actual type of `OwnShared` or feature toggles in `gix-features`.
119pub type HandleArc = Cache<store::Handle<Arc<Store>>>;
120
121use store::types;
122
123/// The object store for use in any applications with support for auto-updates in the light of changes to the object database.
124///
125/// ### Features
126///
127/// - entirely lazy, creating an instance does no disk IO at all if [`Slots::Given`][store::init::Slots::Given] is used.
128/// - multi-threaded lazy-loading of indices and packs
129/// - per-thread pack and object caching avoiding cache trashing.
130/// - most-recently-used packs are always first for speedups if objects are stored in the same pack, typical for packs organized by
131/// commit graph and object age.
132/// - lock-free reading for perfect scaling across all cores, and changes to it don't affect readers as long as these don't want to
133/// enter the same branch.
134/// - sync with the state on disk if objects aren't found to catch up with changes if an object seems to be missing.
135/// - turn off the behaviour above for all handles if objects are expected to be missing due to spare checkouts.
136pub struct Store {
137 /// The central write lock without which the slotmap index can't be changed.
138 write: parking_lot::Mutex<()>,
139
140 /// The source directory from which all content is loaded, and the central write lock for use when a directory refresh is needed.
141 pub(crate) path: PathBuf,
142
143 /// The current working directory at the time this store was instantiated. It becomes relevant when resolving alternate paths
144 /// when re-reading the store configuration on updates when an object was missed.
145 /// Keeping it here helps to assure consistency even while a process changes its CWD.
146 pub(crate) current_dir: PathBuf,
147
148 /// A set of replacements that given a source OID return a destination OID. The vector is sorted.
149 pub(crate) replacements: Vec<(gix_hash::ObjectId, gix_hash::ObjectId)>,
150
151 /// A list of indices keeping track of which slots are filled with data. These are usually, but not always, consecutive.
152 pub(crate) index: ArcSwap<types::SlotMapIndex>,
153
154 /// The below state acts like a slot-map with each slot is mutable when the write lock is held, but readable independently of it.
155 /// This allows multiple file to be loaded concurrently if there is multiple handles requesting to load packs or additional indices.
156 /// The map is static and cannot change.
157 /// It's read often and changed rarely.
158 pub(crate) files: Vec<types::MutableIndexAndPack>,
159
160 /// The amount of handles that would prevent us from unloading packs or indices
161 pub(crate) num_handles_stable: AtomicUsize,
162 /// The amount of handles that don't affect our ability to compact our internal data structures or unload packs or indices.
163 pub(crate) num_handles_unstable: AtomicUsize,
164
165 /// The amount of times we re-read the disk state to consolidate our in-memory representation.
166 pub(crate) num_disk_state_consolidation: AtomicUsize,
167 /// If true, we are allowed to use multi-pack indices and they must have the `object_hash` or be ignored.
168 use_multi_pack_index: bool,
169 /// The hash kind to use for some operations
170 object_hash: gix_hash::Kind,
171 /// The maximum size of a single allocation caused by user-controlled on-disk pack data.
172 alloc_limit_bytes: Option<usize>,
173}
174
175/// Create a new cached handle to the object store with support for additional options.
176///
177/// `replacements` is an iterator over pairs of old and new object ids for replacement support.
178/// This means that when asking for object `X`, one will receive object `X-replaced` given an iterator like `Some((X, X-replaced))`.
179pub fn at_opts(
180 objects_dir: impl Into<PathBuf>,
181 replacements: impl IntoIterator<Item = (gix_hash::ObjectId, gix_hash::ObjectId)>,
182 options: store::init::Options,
183) -> std::io::Result<Handle> {
184 let handle = OwnShared::new(Store::at_opts(
185 objects_dir.into(),
186 &mut replacements.into_iter(),
187 options,
188 )?)
189 .to_handle();
190 Ok(Cache::from(handle))
191}
192
193/// Create a new cached handle to the object store.
194pub fn at(objects_dir: impl Into<PathBuf>) -> std::io::Result<Handle> {
195 at_opts(objects_dir, Vec::new(), Default::default())
196}