lsm_tree/config.rs
1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5use crate::{
6 descriptor_table::FileDescriptorTable,
7 path::absolute_path,
8 segment::meta::{CompressionType, TableType},
9 BlobTree, BlockCache, Tree,
10};
11use std::{
12 path::{Path, PathBuf},
13 sync::Arc,
14};
15use value_log::BlobCache;
16
17/// LSM-tree type
18#[derive(Copy, Clone, Debug, PartialEq, Eq)]
19pub enum TreeType {
20 /// Standard LSM-tree, see [`Tree`]
21 Standard,
22
23 /// Key-value separated LSM-tree, see [`BlobTree`]
24 Blob,
25}
26
27impl From<TreeType> for u8 {
28 fn from(val: TreeType) -> Self {
29 match val {
30 TreeType::Standard => 0,
31 TreeType::Blob => 1,
32 }
33 }
34}
35
36impl TryFrom<u8> for TreeType {
37 type Error = ();
38
39 fn try_from(value: u8) -> Result<Self, Self::Error> {
40 match value {
41 0 => Ok(Self::Standard),
42 1 => Ok(Self::Blob),
43 _ => Err(()),
44 }
45 }
46}
47
48const DEFAULT_FILE_FOLDER: &str = ".lsm.data";
49
50#[derive(Clone)]
51/// Tree configuration builder
52pub struct Config {
53 /// Folder path
54 #[doc(hidden)]
55 pub path: PathBuf,
56
57 /// Tree type (unused)
58 #[allow(unused)]
59 pub tree_type: TreeType,
60
61 /// What type of compression is used
62 pub compression: CompressionType,
63
64 /// What type of compression is used for blobs
65 pub blob_compression: CompressionType,
66
67 /// Table type (unused)
68 #[allow(unused)]
69 pub(crate) table_type: TableType,
70
71 /// Block size of data blocks
72 pub data_block_size: u32,
73
74 /// Block size of index blocks
75 pub index_block_size: u32,
76
77 /// Amount of levels of the LSM tree (depth of tree)
78 pub level_count: u8,
79
80 /// Bits per key for levels that are not L0, L1, L2
81 // NOTE: bloom_bits_per_key is not conditionally compiled,
82 // because that would change the file format
83 #[doc(hidden)]
84 pub bloom_bits_per_key: i8,
85
86 /// Block cache to use
87 #[doc(hidden)]
88 pub block_cache: Arc<BlockCache>,
89
90 /// Blob cache to use
91 #[doc(hidden)]
92 pub blob_cache: Arc<BlobCache>,
93
94 /// Blob file (value log segment) target size in bytes
95 #[doc(hidden)]
96 pub blob_file_target_size: u64,
97
98 /// Key-value separation threshold in bytes
99 #[doc(hidden)]
100 pub blob_file_separation_threshold: u32,
101
102 /// Descriptor table to use
103 #[doc(hidden)]
104 pub descriptor_table: Arc<FileDescriptorTable>,
105}
106
107impl Default for Config {
108 fn default() -> Self {
109 Self {
110 path: absolute_path(Path::new(DEFAULT_FILE_FOLDER)),
111 descriptor_table: Arc::new(FileDescriptorTable::new(128, 2)),
112
113 block_cache: Arc::new(BlockCache::with_capacity_bytes(/* 16 MiB */ 16 * 1_024 * 1_024)),
114 data_block_size: /* 4 KiB */ 4_096,
115 index_block_size: /* 4 KiB */ 4_096,
116 level_count: 7,
117 tree_type: TreeType::Standard,
118 table_type: TableType::Block,
119 compression: CompressionType::None,
120 blob_compression: CompressionType::None,
121 bloom_bits_per_key: 10,
122
123 blob_cache: Arc::new(BlobCache::with_capacity_bytes(/* 16 MiB */ 16 * 1_024 * 1_024)),
124 blob_file_target_size: /* 64 MiB */ 64 * 1_024 * 1_024,
125 blob_file_separation_threshold: /* 4 KiB */ 4 * 1_024,
126 }
127 }
128}
129
130impl Config {
131 /// Initializes a new config
132 pub fn new<P: AsRef<Path>>(path: P) -> Self {
133 Self {
134 path: absolute_path(path.as_ref()),
135 ..Default::default()
136 }
137 }
138
139 /// Sets the bits per key to use for bloom filters
140 /// in levels that are not L0 or L1.
141 ///
142 /// Use -1 to disable bloom filters even in L0, L1, L2.
143 ///
144 /// Defaults to 10 bits.
145 ///
146 /// # Panics
147 ///
148 /// Panics if `n` is less than -1.
149 #[must_use]
150 pub fn bloom_bits_per_key(mut self, bits: i8) -> Self {
151 assert!(bits >= -1, "invalid bits_per_key value");
152
153 self.bloom_bits_per_key = bits;
154 self
155 }
156
157 /// Sets the compression method.
158 ///
159 /// Using some compression is recommended.
160 ///
161 /// Default = None
162 #[must_use]
163 pub fn compression(mut self, compression: CompressionType) -> Self {
164 self.compression = compression;
165 self
166 }
167
168 /// Sets the compression method.
169 ///
170 /// Using some compression is recommended.
171 ///
172 /// Default = None
173 #[must_use]
174 pub fn blob_compression(mut self, compression: CompressionType) -> Self {
175 self.blob_compression = compression;
176 self
177 }
178
179 /// Sets the amount of levels of the LSM tree (depth of tree).
180 ///
181 /// Defaults to 7, like `LevelDB` and `RocksDB`.
182 ///
183 /// Cannot be changed once set.
184 ///
185 /// # Panics
186 ///
187 /// Panics if `n` is 0.
188 #[must_use]
189 pub fn level_count(mut self, n: u8) -> Self {
190 assert!(n > 0);
191
192 self.level_count = n;
193 self
194 }
195
196 /// Sets the data block size.
197 ///
198 /// Defaults to 4 KiB (4096 bytes).
199 ///
200 /// For point read heavy workloads (get) a sensible default is
201 /// somewhere between 4 - 8 KiB, depending on the average value size.
202 ///
203 /// For scan heavy workloads (range, prefix), use 16 - 64 KiB
204 /// which also increases compression efficiency.
205 ///
206 /// # Panics
207 ///
208 /// Panics if the block size is smaller than 1 KiB or larger than 512 KiB.
209 #[must_use]
210 pub fn data_block_size(mut self, block_size: u32) -> Self {
211 assert!(block_size >= 1_024);
212 assert!(block_size <= 512 * 1_024);
213
214 self.data_block_size = block_size;
215
216 self
217 }
218
219 /// Sets the index block size.
220 ///
221 /// Defaults to 4 KiB (4096 bytes).
222 ///
223 /// For point read heavy workloads (get) a sensible default is
224 /// somewhere between 4 - 8 KiB, depending on the average value size.
225 ///
226 /// For scan heavy workloads (range, prefix), use 16 - 64 KiB
227 /// which also increases compression efficiency.
228 ///
229 /// # Panics
230 ///
231 /// Panics if the block size is smaller than 1 KiB or larger than 512 KiB.
232 #[must_use]
233 pub fn index_block_size(mut self, block_size: u32) -> Self {
234 assert!(block_size >= 1_024);
235 assert!(block_size <= 512 * 1_024);
236
237 self.index_block_size = block_size;
238
239 self
240 }
241
242 /// Sets the block cache.
243 ///
244 /// You can create a global [`BlockCache`] and share it between multiple
245 /// trees to cap global cache memory usage.
246 ///
247 /// Defaults to a block cache with 8 MiB of capacity *per tree*.
248 #[must_use]
249 pub fn block_cache(mut self, block_cache: Arc<BlockCache>) -> Self {
250 self.block_cache = block_cache;
251 self
252 }
253
254 /// Sets the block cache.
255 ///
256 /// You can create a global [`BlobCache`] and share it between multiple
257 /// trees and their value logs to cap global cache memory usage.
258 ///
259 /// Defaults to a block cache with 8 MiB of capacity *per tree*.
260 ///
261 /// This option has no effect when not used for opening a blob tree.
262 #[must_use]
263 pub fn blob_cache(mut self, blob_cache: Arc<BlobCache>) -> Self {
264 self.blob_cache = blob_cache;
265 self
266 }
267
268 /// Sets the target size of blob files.
269 ///
270 /// Smaller blob files allow more granular garbage collection
271 /// which allows lower space amp for lower write I/O cost.
272 ///
273 /// Larger blob files decrease the number of files on disk and maintenance
274 /// overhead.
275 ///
276 /// Defaults to 64 MiB.
277 ///
278 /// This option has no effect when not used for opening a blob tree.
279 #[must_use]
280 pub fn blob_file_target_size(mut self, bytes: u64) -> Self {
281 self.blob_file_target_size = bytes;
282 self
283 }
284
285 /// Sets the key-value separation threshold in bytes.
286 ///
287 /// Smaller value will reduce compaction overhead and thus write amplification,
288 /// at the cost of lower read performance.
289 ///
290 /// Defaults to 4KiB.
291 ///
292 /// This option has no effect when not used for opening a blob tree.
293 #[must_use]
294 pub fn blob_file_separation_threshold(mut self, bytes: u32) -> Self {
295 self.blob_file_separation_threshold = bytes;
296 self
297 }
298
299 #[must_use]
300 #[doc(hidden)]
301 pub fn descriptor_table(mut self, descriptor_table: Arc<FileDescriptorTable>) -> Self {
302 self.descriptor_table = descriptor_table;
303 self
304 }
305
306 /// Opens a tree using the config.
307 ///
308 /// # Errors
309 ///
310 /// Will return `Err` if an IO error occurs.
311 pub fn open(self) -> crate::Result<Tree> {
312 Tree::open(self)
313 }
314
315 /// Opens a blob tree using the config.
316 ///
317 /// # Errors
318 ///
319 /// Will return `Err` if an IO error occurs.
320 pub fn open_as_blob_tree(mut self) -> crate::Result<BlobTree> {
321 self.tree_type = TreeType::Blob;
322 BlobTree::open(self)
323 }
324}