1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
use std::io;
use std::path::PathBuf;
/// Configuration for configuring `Marble`.
#[derive(Debug, Clone)]
pub struct Config {
/// Storage files will be kept here.
pub path: PathBuf,
/// The compression level to use when compressing each
/// batch of objects. A value of `None` disables
/// compression. This is one of the most important
/// parameters to experiment with while finding an
/// appropriate configuration for your system.
pub zstd_compression_level: Option<i32>,
/// Issue fsyncs on each new file and the containing
/// directory when it is created. This corresponds
/// to at least one call to fsync for each call to
/// `write_batch`.
pub fsync_each_batch: bool,
/// Garbage collection will try to keep storage
/// files around this size or smaller.
pub target_file_size: usize,
/// Remaining live percentage of a file before
/// it's considered rewritabe.
pub file_compaction_percent: u8,
/// The ceiling on the largest allocation this system
/// will ever attempt to perform in order to read an
/// object off of disk.
pub max_object_size: usize,
/// The number of total files (of all sizes) that must
/// exist before "small files" are squished together
/// even if they are above the `file_compaction_percent`.
/// A "small file" is defined as a file whose uncompressed
/// size times `min_compaction_files` is below the
/// `target_file_size`.
pub small_file_cleanup_threshold: usize,
/// A partitioning function for objects based on
/// object ID and object size. You may override this to
/// cause objects to be written into separate files so
/// that garbage collection may take advantage of
/// locality effects for your workload that are
/// correlated to object identifiers or the size of
/// data.
///
/// Ideally, you will colocate objects that have
/// similar expected lifespans. Doing so minimizes
/// the costs of copying live data over time during
/// storage file GC.
pub partition_function: fn(object_id: u64, object_size: usize) -> u8,
/// The minimum number of files within a generation to
/// collect if below the live compaction percent.
pub min_compaction_files: usize,
}
impl Default for Config {
fn default() -> Config {
Config {
path: "".into(),
target_file_size: 1 << 28, // 256mb
file_compaction_percent: 66,
partition_function: crate::default_partition_function,
max_object_size: 16 * 1024 * 1024 * 1024, /* 16gb */
small_file_cleanup_threshold: 64,
min_compaction_files: 2,
fsync_each_batch: false,
zstd_compression_level: None,
}
}
}
impl Config {
pub(crate) fn validate(&self) -> io::Result<()> {
if self.target_file_size == 0 {
return Err(io::Error::new(
io::ErrorKind::Unsupported,
"Config's target_file_size must be non-zero",
));
}
if self.file_compaction_percent > 99 {
return Err(io::Error::new(
io::ErrorKind::Unsupported,
"Config's file_compaction_percent must be less than 100",
));
}
Ok(())
}
}