1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
use std::{path::PathBuf, sync::Arc};
use arc_swap::ArcSwap;
use crate::{
store::types::{MutableIndexAndPack, SlotMapIndex},
Store,
};
/// Options for use in [`Store::at_opts()`].
#[derive(Clone, Debug)]
pub struct Options {
/// How to obtain a size for the slot map.
pub slots: Slots,
/// The kind of hash we expect in our packs and would use for loose object iteration and object writing.
pub object_hash: gix_hash::Kind,
/// If false, no multi-pack indices will be used. If true, they will be used if their hash matches `object_hash`.
pub use_multi_pack_index: bool,
/// The maximum size of a single allocation caused by user-controlled on-disk pack data.
///
/// If `None`, no additional limit is enforced.
pub alloc_limit_bytes: Option<usize>,
/// The current directory of the process at the time of instantiation.
/// If unset, it will be retrieved using `gix_fs::current_dir(false)`.
pub current_dir: Option<std::path::PathBuf>,
}
impl Default for Options {
fn default() -> Self {
Options {
slots: Default::default(),
object_hash: Default::default(),
use_multi_pack_index: true,
alloc_limit_bytes: None,
current_dir: None,
}
}
}
/// Configures the number of slots in the index slotmap, which is fixed throughout the existence of the store.
#[derive(Copy, Clone, Debug)]
pub enum Slots {
/// The number of slots to use, that is the total number of indices we can hold at a time.
/// Using this has the advantage of avoiding an initial directory listing of the repository, and is recommended
/// on the server side where the repository setup is controlled.
///
/// Note that this won't affect their packs, as each index can have one or more packs associated with it.
Given(u16),
/// Compute the number of slots needed, as probably best used on the client side where a variety of repositories is encountered.
AsNeededByDiskState {
/// 1.0 means no safety, 1.1 means 10% more slots than needed
multiplier: f32,
/// The minimum number of slots to assume
minimum: usize,
},
}
impl Default for Slots {
fn default() -> Self {
Slots::AsNeededByDiskState {
multiplier: 1.1,
minimum: 32,
}
}
}
impl Store {
/// Open the store at `objects_dir` (containing loose objects and `packs/`), which must only be a directory for
/// the store to be created without any additional work being done.
/// `slots` defines how many multi-pack-indices as well as indices we can know about at a time, which includes
/// the allowance for all additional object databases coming in via `alternates` as well.
/// Note that the `slots` isn't used for packs, these are included with their multi-index or index respectively.
/// For example, In a repository with 250m objects and geometric packing one would expect 27 index/pack pairs,
/// or a single multi-pack index.
/// `replacements` is an iterator over pairs of old and new object ids for replacement support.
/// This means that when asking for object `X`, one will receive object `X-replaced` given an iterator like `Some((X, X-replaced))`.
pub fn at_opts(
objects_dir: PathBuf,
replacements: &mut dyn Iterator<Item = (gix_hash::ObjectId, gix_hash::ObjectId)>,
Options {
slots,
object_hash,
use_multi_pack_index,
alloc_limit_bytes,
current_dir,
}: Options,
) -> std::io::Result<Self> {
let _span = gix_features::trace::detail!("gix_odb::Store::at()");
let current_dir = current_dir.map_or_else(
|| {
// It's only used for real-pathing alternate paths and there it just needs to be consistent (enough).
gix_fs::current_dir(false)
},
Ok,
)?;
if !objects_dir.is_dir() {
return Err(std::io::Error::other(format!(
"'{}' wasn't a directory",
objects_dir.display()
)));
}
let slot_count = match slots {
Slots::Given(n) => n as usize,
Slots::AsNeededByDiskState { multiplier, minimum } => {
let mut db_paths =
crate::alternate::resolve(objects_dir.clone(), ¤t_dir).map_err(std::io::Error::other)?;
db_paths.insert(0, objects_dir.clone());
let num_slots =
Store::collect_indices_and_mtime_sorted_by_size(db_paths, None, None, alloc_limit_bytes)
.map_err(std::io::Error::other)?
.len();
let candidate = ((num_slots as f32 * multiplier) as usize).max(minimum);
if candidate > crate::store::types::PackId::max_indices() {
// A chance for this to work without 10% extra allocation - this already
// is an insane amount of packs.
num_slots
} else {
candidate
}
}
};
if slot_count > crate::store::types::PackId::max_indices() {
return Err(std::io::Error::other(format!(
"Cannot use more than 2^15-1 slots, got {slot_count}"
)));
}
let mut replacements: Vec<_> = replacements.collect();
replacements.sort_by_key(|a| a.0);
Ok(Store {
current_dir,
write: Default::default(),
replacements,
path: objects_dir,
files: Vec::from_iter(std::iter::repeat_with(MutableIndexAndPack::default).take(slot_count)),
index: ArcSwap::new(Arc::new(SlotMapIndex::default())),
use_multi_pack_index,
object_hash,
alloc_limit_bytes,
num_handles_stable: Default::default(),
num_handles_unstable: Default::default(),
num_disk_state_consolidation: Default::default(),
})
}
}