lsm_tree/config.rs
1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5use crate::{
6 cache::Cache,
7 descriptor_table::FileDescriptorTable,
8 path::absolute_path,
9 segment::meta::{CompressionType, TableType},
10 BlobTree, Tree,
11};
12use std::{
13 path::{Path, PathBuf},
14 sync::Arc,
15};
16
17/// LSM-tree type
18#[derive(Copy, Clone, Debug, PartialEq, Eq)]
19pub enum TreeType {
20 /// Standard LSM-tree, see [`Tree`]
21 Standard,
22
23 /// Key-value separated LSM-tree, see [`BlobTree`]
24 Blob,
25}
26
27impl From<TreeType> for u8 {
28 fn from(val: TreeType) -> Self {
29 match val {
30 TreeType::Standard => 0,
31 TreeType::Blob => 1,
32 }
33 }
34}
35
36impl TryFrom<u8> for TreeType {
37 type Error = ();
38
39 fn try_from(value: u8) -> Result<Self, Self::Error> {
40 match value {
41 0 => Ok(Self::Standard),
42 1 => Ok(Self::Blob),
43 _ => Err(()),
44 }
45 }
46}
47
48const DEFAULT_FILE_FOLDER: &str = ".lsm.data";
49
50#[derive(Clone)]
51/// Tree configuration builder
52pub struct Config {
53 /// Folder path
54 #[doc(hidden)]
55 pub path: PathBuf,
56
57 /// Tree type (unused)
58 #[allow(unused)]
59 pub tree_type: TreeType,
60
61 /// What type of compression is used
62 pub compression: CompressionType,
63
64 /// What type of compression is used for blobs
65 pub blob_compression: CompressionType,
66
67 /// Table type (unused)
68 #[allow(unused)]
69 pub(crate) table_type: TableType,
70
71 /// Block size of data blocks
72 pub data_block_size: u32,
73
74 /// Block size of index blocks
75 pub index_block_size: u32,
76
77 /// Amount of levels of the LSM tree (depth of tree)
78 pub level_count: u8,
79
80 /// Bits per key for levels that are not L0, L1, L2
81 // NOTE: bloom_bits_per_key is not conditionally compiled,
82 // because that would change the file format
83 #[doc(hidden)]
84 pub bloom_bits_per_key: i8,
85
86 /// Block cache to use
87 #[doc(hidden)]
88 pub cache: Arc<Cache>,
89
90 /// Blob file (value log segment) target size in bytes
91 #[doc(hidden)]
92 pub blob_file_target_size: u64,
93
94 /// Key-value separation threshold in bytes
95 #[doc(hidden)]
96 pub blob_file_separation_threshold: u32,
97
98 /// Descriptor table to use
99 #[doc(hidden)]
100 pub descriptor_table: Arc<FileDescriptorTable>,
101}
102
103impl Default for Config {
104 fn default() -> Self {
105 Self {
106 path: absolute_path(Path::new(DEFAULT_FILE_FOLDER)),
107 descriptor_table: Arc::new(FileDescriptorTable::new(128, 2)),
108
109 cache: Arc::new(Cache::with_capacity_bytes(/* 16 MiB */ 16 * 1_024 * 1_024)),
110
111 data_block_size: /* 4 KiB */ 4_096,
112 index_block_size: /* 4 KiB */ 4_096,
113 level_count: 7,
114 tree_type: TreeType::Standard,
115 table_type: TableType::Block,
116 compression: CompressionType::None,
117 blob_compression: CompressionType::None,
118 bloom_bits_per_key: 10,
119
120 blob_file_target_size: /* 64 MiB */ 64 * 1_024 * 1_024,
121 blob_file_separation_threshold: /* 4 KiB */ 4 * 1_024,
122 }
123 }
124}
125
126impl Config {
127 /// Initializes a new config
128 pub fn new<P: AsRef<Path>>(path: P) -> Self {
129 Self {
130 path: absolute_path(path.as_ref()),
131 ..Default::default()
132 }
133 }
134
135 /// Sets the bits per key to use for bloom filters
136 /// in levels that are not L0 or L1.
137 ///
138 /// Use -1 to disable bloom filters even in L0, L1, L2.
139 ///
140 /// Defaults to 10 bits.
141 ///
142 /// # Panics
143 ///
144 /// Panics if `n` is less than -1.
145 #[must_use]
146 pub fn bloom_bits_per_key(mut self, bits: i8) -> Self {
147 assert!(bits >= -1, "invalid bits_per_key value");
148
149 self.bloom_bits_per_key = bits;
150 self
151 }
152
153 /// Sets the compression method.
154 ///
155 /// Using some compression is recommended.
156 ///
157 /// Default = None
158 #[must_use]
159 pub fn compression(mut self, compression: CompressionType) -> Self {
160 self.compression = compression;
161 self
162 }
163
164 /// Sets the compression method.
165 ///
166 /// Using some compression is recommended.
167 ///
168 /// Default = None
169 #[must_use]
170 pub fn blob_compression(mut self, compression: CompressionType) -> Self {
171 self.blob_compression = compression;
172 self
173 }
174
175 /// Sets the amount of levels of the LSM tree (depth of tree).
176 ///
177 /// Defaults to 7, like `LevelDB` and `RocksDB`.
178 ///
179 /// Cannot be changed once set.
180 ///
181 /// # Panics
182 ///
183 /// Panics if `n` is 0.
184 #[must_use]
185 pub fn level_count(mut self, n: u8) -> Self {
186 assert!(n > 0);
187
188 self.level_count = n;
189 self
190 }
191
192 /// Sets the data block size.
193 ///
194 /// Defaults to 4 KiB (4096 bytes).
195 ///
196 /// For point read heavy workloads (get) a sensible default is
197 /// somewhere between 4 - 8 KiB, depending on the average value size.
198 ///
199 /// For scan heavy workloads (range, prefix), use 16 - 64 KiB
200 /// which also increases compression efficiency.
201 ///
202 /// # Panics
203 ///
204 /// Panics if the block size is smaller than 1 KiB or larger than 512 KiB.
205 #[must_use]
206 pub fn data_block_size(mut self, block_size: u32) -> Self {
207 assert!(block_size >= 1_024);
208 assert!(block_size <= 512 * 1_024);
209
210 self.data_block_size = block_size;
211
212 self
213 }
214
215 /// Sets the index block size.
216 ///
217 /// Defaults to 4 KiB (4096 bytes).
218 ///
219 /// For point read heavy workloads (get) a sensible default is
220 /// somewhere between 4 - 8 KiB, depending on the average value size.
221 ///
222 /// For scan heavy workloads (range, prefix), use 16 - 64 KiB
223 /// which also increases compression efficiency.
224 ///
225 /// # Panics
226 ///
227 /// Panics if the block size is smaller than 1 KiB or larger than 512 KiB.
228 #[must_use]
229 pub fn index_block_size(mut self, block_size: u32) -> Self {
230 assert!(block_size >= 1_024);
231 assert!(block_size <= 512 * 1_024);
232
233 self.index_block_size = block_size;
234
235 self
236 }
237
238 /// Sets the global cache.
239 ///
240 /// You can create a global [`Cache`] and share it between multiple
241 /// trees to cap global cache memory usage.
242 ///
243 /// Defaults to a cache with 8 MiB of capacity *per tree*.
244 #[must_use]
245 pub fn use_cache(mut self, cache: Arc<Cache>) -> Self {
246 self.cache = cache;
247 self
248 }
249
250 /// Sets the target size of blob files.
251 ///
252 /// Smaller blob files allow more granular garbage collection
253 /// which allows lower space amp for lower write I/O cost.
254 ///
255 /// Larger blob files decrease the number of files on disk and maintenance
256 /// overhead.
257 ///
258 /// Defaults to 64 MiB.
259 ///
260 /// This option has no effect when not used for opening a blob tree.
261 #[must_use]
262 pub fn blob_file_target_size(mut self, bytes: u64) -> Self {
263 self.blob_file_target_size = bytes;
264 self
265 }
266
267 /// Sets the key-value separation threshold in bytes.
268 ///
269 /// Smaller value will reduce compaction overhead and thus write amplification,
270 /// at the cost of lower read performance.
271 ///
272 /// Defaults to 4KiB.
273 ///
274 /// This option has no effect when not used for opening a blob tree.
275 #[must_use]
276 pub fn blob_file_separation_threshold(mut self, bytes: u32) -> Self {
277 self.blob_file_separation_threshold = bytes;
278 self
279 }
280
281 #[must_use]
282 #[doc(hidden)]
283 pub fn descriptor_table(mut self, descriptor_table: Arc<FileDescriptorTable>) -> Self {
284 self.descriptor_table = descriptor_table;
285 self
286 }
287
288 /// Opens a tree using the config.
289 ///
290 /// # Errors
291 ///
292 /// Will return `Err` if an IO error occurs.
293 pub fn open(self) -> crate::Result<Tree> {
294 Tree::open(self)
295 }
296
297 /// Opens a blob tree using the config.
298 ///
299 /// # Errors
300 ///
301 /// Will return `Err` if an IO error occurs.
302 pub fn open_as_blob_tree(mut self) -> crate::Result<BlobTree> {
303 self.tree_type = TreeType::Blob;
304 BlobTree::open(self)
305 }
306}