1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
use std::str::FromStr;
use serde_derive::{Deserialize, Serialize};
use serde_with::skip_serializing_none;
use crate::{
backend::FileType,
blob::BlobType,
define_new_id_struct,
error::{ErrorKind, RusticError, RusticResult},
impl_repofile,
repofile::RepoFile,
};
pub(super) mod constants {
pub(super) const KB: u32 = 1024;
pub(super) const MB: u32 = 1024 * KB;
/// Default Tree size
pub(super) const DEFAULT_TREE_SIZE: u32 = 4 * MB;
/// Default Data size
pub(super) const DEFAULT_DATA_SIZE: u32 = 32 * MB;
/// the default factor used for repo-size dependent pack size.
/// 32 * sqrt(reposize in bytes) = 1 MB * sqrt(reposize in GB)
pub(super) const DEFAULT_GROW_FACTOR: u32 = 32;
/// The default maximum targeted pack size.
pub(super) const DEFAULT_SIZE_LIMIT: u32 = u32::MAX;
/// The default minimum percentage of targeted pack size.
pub(super) const DEFAULT_MIN_PERCENTAGE: u32 = 30;
/// The default (average) size of a chunk = 1 MiB.
pub(super) const DEFAULT_CHUNK_SIZE: usize = 1024 * 1024;
/// The default maximum size of a chunk = 512 kiB.
pub(super) const DEFAULT_CHUNK_MIN_SIZE: usize = 512 * 1024;
/// The default maximum size of a chunk = 8 MiB.
pub(super) const DEFAULT_CHUNK_MAX_SIZE: usize = 8 * 1024 * 1024;
}
define_new_id_struct!(RepositoryId, "repository");
impl_repofile!(ConfigId, FileType::Config, ConfigFile);
#[skip_serializing_none]
#[derive(Serialize, Deserialize, Debug, Default, Clone, PartialEq, Eq)]
/// The config file describes all repository-wide information.
///
/// It is usually saved in the repository as `config`
pub struct ConfigFile {
/// Repository version. Currently 1 and 2 are supported
pub version: u32,
/// The [`Id`] identifying the repository
pub id: RepositoryId,
/// The chunker polynomial used to chunk data
pub chunker: Option<Chunker>,
/// The chunker polynomial used to chunk data in case of Rabin content defined chunking
pub chunker_polynomial: String,
/// The (average) chunk size. For `FixedSized` chunking, this is the chunk size, for Rabin chunking, this size
/// will be reached on average for chunks.
pub chunk_size: Option<usize>,
/// The minimum chunk size. For Rabin chunking, this defines the minimum chunk size before chunks are defined by
/// the Rabin fingerprint.
/// Has no effect for `FixedSized` chunking.
pub chunk_min_size: Option<usize>,
/// The maximum chunk size. For Rabin chunking, this defines the maximum chunk size, i.e. the size when chunks are cut
/// even if no cut point has been identified by the Rabin fingerprint.
/// Has no effect for `FixedSized` chunking.
pub chunk_max_size: Option<usize>,
/// Marker if this is a hot repository. If not set, this is no hot repository
///
/// # Note
///
/// When using hot/cold repositories, this is only set within the hot part of the repository.
pub is_hot: Option<bool>,
/// Marker if this is a append-only repository.
///
/// # Note
///
/// Commands which are not append-only won't run once this is set.
pub append_only: Option<bool>,
/// Compression level
///
/// # Note
///
/// `Some(0)` means no compression. If not set, use the default compression:
/// * for repository version 1, use no compression (as not supported)
/// * for repository version 2, use the zstd default compression
pub compression: Option<i32>,
/// Size of tree packs. This will be enhanced by the `treepack_growfactor` depending on the repository size
///
/// If not set, defaults to 4 MiB
pub treepack_size: Option<u32>,
/// Grow factor to increase size of tree packs depending on the repository size
///
/// If not set, defaults to `32`
pub treepack_growfactor: Option<u32>,
/// Maximum targeted tree pack size.
pub treepack_size_limit: Option<u32>,
/// Size of data packs. This will be enhanced by the `datapack_growfactor` depending on the repository size
///
/// If not set, defaults to `32 MiB`
pub datapack_size: Option<u32>,
/// Grow factor to increase size of data packs depending on the repository size
///
/// If not set, defaults to `32`
pub datapack_growfactor: Option<u32>,
/// maximum targeted data pack size.
pub datapack_size_limit: Option<u32>,
/// Tolerate pack sizes which are larger than given percentage of targeted pack size
///
/// If not set, defaults to `30`
pub min_packsize_tolerate_percent: Option<u32>,
/// Tolerate pack sizes which are smaller than given percentage of targeted pack size
///
/// If not set or set to `0` this is unlimited.
pub max_packsize_tolerate_percent: Option<u32>,
/// Do an extra verification by decompressing/decrypting all data before uploading to the repository
pub extra_verify: Option<bool>,
}
impl ConfigFile {
#[must_use]
/// Creates a new `ConfigFile`.
///
/// # Arguments
///
/// * `version` - The version of the repository
/// * `id` - The id of the repository
/// * `poly` - The chunker polynomial
pub fn new(version: u32, id: RepositoryId, poly: u64) -> Self {
Self {
version,
id,
chunker_polynomial: format!("{poly:x}"),
..Self::default()
}
}
/// Get the chunker polynomial
///
/// # Errors
///
/// * If the polynomial could not be parsed
pub fn poly(&self) -> RusticResult<u64> {
let chunker_poly = u64::from_str_radix(&self.chunker_polynomial, 16)
.map_err(|err| RusticError::with_source(
ErrorKind::InvalidInput,
"Parsing u64 from hex failed for polynomial `{polynomial}`, the value must be a valid hexadecimal string.",
err)
.attach_context("polynomial",&self.chunker_polynomial))
?;
Ok(chunker_poly)
}
/// Get the compression level
///
/// # Errors
///
/// * If the version is not supported
pub fn zstd(&self) -> RusticResult<Option<i32>> {
match (self.version, self.compression) {
(1, _) | (2, Some(0)) => Ok(None),
(2, None) => Ok(Some(0)), // use default (=0) zstd compression
(2, Some(c)) => Ok(Some(c)),
_ => Err(RusticError::new(
ErrorKind::Unsupported,
"Config version `{version}` not supported. Please make sure, that you use the correct version.",
)
.attach_context("version", self.version.to_string())),
}
}
/// Get whether an extra verification (decompressing/decrypting data before writing to the repository) should be performed.
#[must_use]
pub fn extra_verify(&self) -> bool {
self.extra_verify.unwrap_or(true) // default is to do the extra check
}
/// Get pack size parameter
///
/// # Arguments
///
/// * `blob` - The blob type to get the pack size parameters for
///
/// # Returns
///
/// A tuple containing the pack size, the grow factor and the size limit
#[must_use]
pub fn packsize(&self, blob: BlobType) -> (u32, u32, u32) {
match blob {
BlobType::Tree => (
self.treepack_size.unwrap_or(constants::DEFAULT_TREE_SIZE),
self.treepack_growfactor
.unwrap_or(constants::DEFAULT_GROW_FACTOR),
self.treepack_size_limit
.unwrap_or(constants::DEFAULT_SIZE_LIMIT),
),
BlobType::Data => (
self.datapack_size.unwrap_or(constants::DEFAULT_DATA_SIZE),
self.datapack_growfactor
.unwrap_or(constants::DEFAULT_GROW_FACTOR),
self.datapack_size_limit
.unwrap_or(constants::DEFAULT_SIZE_LIMIT),
),
}
}
/// Get pack size toleration limits
///
/// # Returns
///
///
#[must_use]
pub fn packsize_ok_percents(&self) -> (u32, u32) {
(
self.min_packsize_tolerate_percent
.unwrap_or(constants::DEFAULT_MIN_PERCENTAGE),
match self.max_packsize_tolerate_percent {
None | Some(0) => u32::MAX,
Some(percent) => percent,
},
)
}
/// Get the chunker
#[must_use]
pub fn chunker(&self) -> Chunker {
self.chunker.unwrap_or_default()
}
/// Get the (average) chunk size
#[must_use]
pub fn chunk_size(&self) -> usize {
self.chunk_size.unwrap_or(constants::DEFAULT_CHUNK_SIZE)
}
/// Get the min chunk size
#[must_use]
pub fn chunk_min_size(&self) -> usize {
self.chunk_min_size
.unwrap_or(constants::DEFAULT_CHUNK_MIN_SIZE)
}
/// Get the max chunk size
#[must_use]
pub fn chunk_max_size(&self) -> usize {
self.chunk_max_size
.unwrap_or(constants::DEFAULT_CHUNK_MAX_SIZE)
}
/// Determine if two [`ConfigFile`]s have compatible chunker parameters
#[must_use]
pub fn has_same_chunker(&self, other: &Self) -> bool {
match self.chunker() {
chunker if chunker != other.chunker() => false,
Chunker::Rabin => {
self.chunker_polynomial == other.chunker_polynomial
&& self.chunk_size() == other.chunk_size()
&& self.chunk_min_size() == other.chunk_min_size()
&& self.chunk_max_size() == other.chunk_max_size()
}
Chunker::FixedSize => self.chunk_size() == other.chunk_size(),
}
}
}
#[derive(Serialize, Deserialize, Debug, Default, Clone, Copy, PartialEq, Eq)]
/// Supported chunkers used to cut large files into Blobs.
pub enum Chunker {
#[default]
/// Rabin chunker - a content defined chunker (CDC) based on Rabin fingerprints
Rabin,
/// Fixed size chunke - makes chunks of a given fixed size
FixedSize,
}
impl FromStr for Chunker {
type Err = Box<RusticError>;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"rabin" => Ok(Self::Rabin),
"fixed_size" => Ok(Self::FixedSize),
_ => Err(RusticError::new(
ErrorKind::InvalidInput,
"only ``rabin`` and ``fixed_size`` are valid chunkers",
)),
}
}
}