Skip to main content

lsm_tree/config/
mod.rs

1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5mod block_size;
6mod compression;
7mod filter;
8mod hash_ratio;
9mod pinning;
10mod restart_interval;
11
12pub use block_size::BlockSizePolicy;
13pub use compression::CompressionPolicy;
14pub use filter::{BloomConstructionPolicy, FilterPolicy, FilterPolicyEntry};
15pub use hash_ratio::HashRatioPolicy;
16pub use pinning::PinningPolicy;
17pub use restart_interval::RestartIntervalPolicy;
18
19/// Partitioning policy for indexes and filters
20pub type PartitioningPolicy = PinningPolicy;
21
22use crate::{
23    compaction::filter::Factory, path::absolute_path, version::DEFAULT_LEVEL_COUNT, AnyTree,
24    BlobTree, Cache, CompressionType, DescriptorTable, SequenceNumberCounter, Tree,
25};
26use std::{
27    path::{Path, PathBuf},
28    sync::Arc,
29};
30
31/// LSM-tree type
32#[derive(Copy, Clone, Debug, PartialEq, Eq)]
33pub enum TreeType {
34    /// Standard LSM-tree, see [`Tree`]
35    Standard,
36
37    /// Key-value separated LSM-tree, see [`BlobTree`]
38    Blob,
39}
40
41impl From<TreeType> for u8 {
42    fn from(val: TreeType) -> Self {
43        match val {
44            TreeType::Standard => 0,
45            TreeType::Blob => 1,
46        }
47    }
48}
49
50impl TryFrom<u8> for TreeType {
51    type Error = ();
52
53    fn try_from(value: u8) -> Result<Self, Self::Error> {
54        match value {
55            0 => Ok(Self::Standard),
56            1 => Ok(Self::Blob),
57            _ => Err(()),
58        }
59    }
60}
61
62const DEFAULT_FILE_FOLDER: &str = ".lsm.data";
63
64/// Options for key-value separation
65#[derive(Clone, Debug, PartialEq)]
66pub struct KvSeparationOptions {
67    /// What type of compression is used for blobs
68    #[doc(hidden)]
69    pub compression: CompressionType,
70
71    /// Blob file target size in bytes
72    #[doc(hidden)]
73    pub file_target_size: u64,
74
75    /// Key-value separation threshold in bytes
76    #[doc(hidden)]
77    pub separation_threshold: u32,
78
79    #[doc(hidden)]
80    pub staleness_threshold: f32,
81
82    #[doc(hidden)]
83    pub age_cutoff: f32,
84}
85
86impl Default for KvSeparationOptions {
87    fn default() -> Self {
88        Self {
89            #[cfg(feature="lz4")]
90            compression:   CompressionType::Lz4,
91
92            #[cfg(not(feature="lz4"))]
93            compression: CompressionType::None,
94
95            file_target_size: /* 64 MiB */ 64 * 1_024 * 1_024,
96            separation_threshold: /* 1 KiB */ 1_024,
97
98            staleness_threshold: 0.25,
99            age_cutoff: 0.25,
100        }
101    }
102}
103
104impl KvSeparationOptions {
105    /// Sets the blob compression method.
106    #[must_use]
107    pub fn compression(mut self, compression: CompressionType) -> Self {
108        self.compression = compression;
109        self
110    }
111
112    /// Sets the target size of blob files.
113    ///
114    /// Smaller blob files allow more granular garbage collection
115    /// which allows lower space amp for lower write I/O cost.
116    ///
117    /// Larger blob files decrease the number of files on disk and maintenance
118    /// overhead.
119    ///
120    /// Defaults to 64 MiB.
121    #[must_use]
122    pub fn file_target_size(mut self, bytes: u64) -> Self {
123        self.file_target_size = bytes;
124        self
125    }
126
127    /// Sets the key-value separation threshold in bytes.
128    ///
129    /// Smaller value will reduce compaction overhead and thus write amplification,
130    /// at the cost of lower read performance.
131    ///
132    /// Defaults to 1 KiB.
133    #[must_use]
134    pub fn separation_threshold(mut self, bytes: u32) -> Self {
135        self.separation_threshold = bytes;
136        self
137    }
138
139    /// Sets the staleness threshold percentage.
140    ///
141    /// The staleness percentage determines how much a blob file needs to be fragmented to be
142    /// picked up by the garbage collection.
143    ///
144    /// Defaults to 33%.
145    #[must_use]
146    pub fn staleness_threshold(mut self, ratio: f32) -> Self {
147        self.staleness_threshold = ratio;
148        self
149    }
150
151    /// Sets the age cutoff threshold.
152    ///
153    /// Defaults to 20%.
154    #[must_use]
155    pub fn age_cutoff(mut self, ratio: f32) -> Self {
156        self.age_cutoff = ratio;
157        self
158    }
159}
160
161/// Tree configuration builder
162pub struct Config {
163    /// Folder path
164    #[doc(hidden)]
165    pub path: PathBuf,
166
167    /// Block cache to use
168    #[doc(hidden)]
169    pub cache: Arc<Cache>,
170
171    /// Descriptor table to use
172    #[doc(hidden)]
173    pub descriptor_table: Option<Arc<DescriptorTable>>,
174
175    /// Number of levels of the LSM tree (depth of tree)
176    ///
177    /// Once set, the level count is fixed (in the "manifest" file)
178    pub level_count: u8,
179
180    /// What type of compression is used for data blocks
181    pub data_block_compression_policy: CompressionPolicy,
182
183    /// What type of compression is used for index blocks
184    pub index_block_compression_policy: CompressionPolicy,
185
186    /// Restart interval inside data blocks
187    pub data_block_restart_interval_policy: RestartIntervalPolicy,
188
189    /// Restart interval inside index blocks
190    pub index_block_restart_interval_policy: RestartIntervalPolicy,
191
192    /// Block size of data blocks
193    pub data_block_size_policy: BlockSizePolicy,
194
195    /// Whether to pin index blocks
196    pub index_block_pinning_policy: PinningPolicy,
197
198    /// Whether to pin filter blocks
199    pub filter_block_pinning_policy: PinningPolicy,
200
201    /// Whether to pin top level index of partitioned index
202    pub top_level_index_block_pinning_policy: PinningPolicy,
203
204    /// Whether to pin top level index of partitioned filter
205    pub top_level_filter_block_pinning_policy: PinningPolicy,
206
207    /// Data block hash ratio
208    pub data_block_hash_ratio_policy: HashRatioPolicy,
209
210    /// Whether to partition index blocks
211    pub index_block_partitioning_policy: PartitioningPolicy,
212
213    /// Whether to partition filter blocks
214    pub filter_block_partitioning_policy: PartitioningPolicy,
215
216    /// Partition size when using partitioned indexes
217    pub index_block_partition_size_policy: BlockSizePolicy,
218
219    /// Partition size when using partitioned filters
220    pub filter_block_partition_size_policy: BlockSizePolicy,
221
222    /// If `true`, the last level will not build filters, reducing the filter size of a database
223    /// by ~90% typically
224    pub(crate) expect_point_read_hits: bool,
225
226    /// Filter construction policy
227    pub filter_policy: FilterPolicy,
228
229    /// Compaction filter factory
230    pub compaction_filter_factory: Option<Arc<dyn Factory>>,
231
232    #[doc(hidden)]
233    pub kv_separation_opts: Option<KvSeparationOptions>,
234
235    /// The global sequence number generator
236    ///
237    /// Should be shared between multple trees of a database
238    pub(crate) seqno: SequenceNumberCounter,
239
240    pub(crate) visible_seqno: SequenceNumberCounter,
241}
242
243// TODO: remove default?
244impl Default for Config {
245    fn default() -> Self {
246        Self {
247            path: absolute_path(Path::new(DEFAULT_FILE_FOLDER)),
248            descriptor_table: Some(Arc::new(DescriptorTable::new(256))),
249            seqno: SequenceNumberCounter::default(),
250            visible_seqno: SequenceNumberCounter::default(),
251
252            cache: Arc::new(Cache::with_capacity_bytes(
253                /* 16 MiB */ 16 * 1_024 * 1_024,
254            )),
255
256            data_block_restart_interval_policy: RestartIntervalPolicy::all(16),
257            index_block_restart_interval_policy: RestartIntervalPolicy::all(1),
258
259            level_count: DEFAULT_LEVEL_COUNT,
260
261            data_block_size_policy: BlockSizePolicy::all(4_096),
262
263            index_block_pinning_policy: PinningPolicy::new([true, true, false]),
264            filter_block_pinning_policy: PinningPolicy::new([true, false]),
265
266            top_level_index_block_pinning_policy: PinningPolicy::all(true), // TODO: implement
267            top_level_filter_block_pinning_policy: PinningPolicy::all(true), // TODO: implement
268
269            index_block_partitioning_policy: PinningPolicy::new([false, false, false, true]),
270            filter_block_partitioning_policy: PinningPolicy::new([false, false, false, true]),
271
272            index_block_partition_size_policy: BlockSizePolicy::all(4_096), // TODO: implement
273            filter_block_partition_size_policy: BlockSizePolicy::all(4_096), // TODO: implement
274
275            data_block_compression_policy: ({
276                #[cfg(feature = "lz4")]
277                let c = CompressionPolicy::new([CompressionType::None, CompressionType::Lz4]);
278
279                #[cfg(not(feature = "lz4"))]
280                let c = CompressionPolicy::new([CompressionType::None]);
281
282                c
283            }),
284            index_block_compression_policy: CompressionPolicy::all(CompressionType::None),
285
286            data_block_hash_ratio_policy: HashRatioPolicy::all(0.0),
287
288            filter_policy: FilterPolicy::all(FilterPolicyEntry::Bloom(
289                BloomConstructionPolicy::BitsPerKey(10.0),
290            )),
291
292            compaction_filter_factory: None,
293
294            expect_point_read_hits: false,
295
296            kv_separation_opts: None,
297        }
298    }
299}
300
301impl Config {
302    /// Initializes a new config
303    pub fn new<P: AsRef<Path>>(
304        path: P,
305        seqno: SequenceNumberCounter,
306        visible_seqno: SequenceNumberCounter,
307    ) -> Self {
308        Self {
309            path: absolute_path(path.as_ref()),
310            seqno,
311            visible_seqno,
312            ..Default::default()
313        }
314    }
315
316    /// Sets the global cache.
317    ///
318    /// You can create a global [`Cache`] and share it between multiple
319    /// trees to cap global cache memory usage.
320    ///
321    /// Defaults to a cache with 16 MiB of capacity *per tree*.
322    #[must_use]
323    pub fn use_cache(mut self, cache: Arc<Cache>) -> Self {
324        self.cache = cache;
325        self
326    }
327
328    /// Sets the file descriptor cache.
329    ///
330    /// Can be shared across trees.
331    #[must_use]
332    pub fn use_descriptor_table(mut self, descriptor_table: Option<Arc<DescriptorTable>>) -> Self {
333        self.descriptor_table = descriptor_table;
334        self
335    }
336
337    /// If `true`, the last level will not build filters, reducing the filter size of a database
338    /// by ~90% typically.
339    ///
340    /// **Enable this only if you know that point reads generally are expected to find a key-value pair.**
341    #[must_use]
342    pub fn expect_point_read_hits(mut self, b: bool) -> Self {
343        self.expect_point_read_hits = b;
344        self
345    }
346
347    /// Sets the partitioning policy for filter blocks.
348    #[must_use]
349    pub fn filter_block_partitioning_policy(mut self, policy: PinningPolicy) -> Self {
350        self.filter_block_partitioning_policy = policy;
351        self
352    }
353
354    /// Sets the partitioning policy for index blocks.
355    #[must_use]
356    pub fn index_block_partitioning_policy(mut self, policy: PinningPolicy) -> Self {
357        self.index_block_partitioning_policy = policy;
358        self
359    }
360
361    /// Sets the pinning policy for filter blocks.
362    #[must_use]
363    pub fn filter_block_pinning_policy(mut self, policy: PinningPolicy) -> Self {
364        self.filter_block_pinning_policy = policy;
365        self
366    }
367
368    /// Sets the pinning policy for index blocks.
369    #[must_use]
370    pub fn index_block_pinning_policy(mut self, policy: PinningPolicy) -> Self {
371        self.index_block_pinning_policy = policy;
372        self
373    }
374
375    /// Sets the restart interval inside data blocks.
376    ///
377    /// A higher restart interval saves space while increasing lookup times
378    /// inside data blocks.
379    ///
380    /// Default = 16
381    #[must_use]
382    pub fn data_block_restart_interval_policy(mut self, policy: RestartIntervalPolicy) -> Self {
383        self.data_block_restart_interval_policy = policy;
384        self
385    }
386
387    // TODO: not supported yet in index blocks
388    // /// Sets the restart interval inside index blocks.
389    // ///
390    // /// A higher restart interval saves space while increasing lookup times
391    // /// inside index blocks.
392    // ///
393    // /// Default = 1
394    // #[must_use]
395    // pub fn index_block_restart_interval_policy(mut self, policy: RestartIntervalPolicy) -> Self {
396    //     self.index_block_restart_interval_policy = policy;
397    //     self
398    // }
399
400    /// Sets the filter construction policy.
401    #[must_use]
402    pub fn filter_policy(mut self, policy: FilterPolicy) -> Self {
403        self.filter_policy = policy;
404        self
405    }
406
407    /// Sets the compression method for data blocks.
408    #[must_use]
409    pub fn data_block_compression_policy(mut self, policy: CompressionPolicy) -> Self {
410        self.data_block_compression_policy = policy;
411        self
412    }
413
414    /// Sets the compression method for index blocks.
415    #[must_use]
416    pub fn index_block_compression_policy(mut self, policy: CompressionPolicy) -> Self {
417        self.index_block_compression_policy = policy;
418        self
419    }
420
421    // TODO: level count is fixed to 7 right now
422    // /// Sets the number of levels of the LSM tree (depth of tree).
423    // ///
424    // /// Defaults to 7, like `LevelDB` and `RocksDB`.
425    // ///
426    // /// Cannot be changed once set.
427    // ///
428    // /// # Panics
429    // ///
430    // /// Panics if `n` is 0.
431    // #[must_use]
432    // pub fn level_count(mut self, n: u8) -> Self {
433    //     assert!(n > 0);
434
435    //     self.level_count = n;
436    //     self
437    // }
438
439    /// Sets the data block size policy.
440    #[must_use]
441    pub fn data_block_size_policy(mut self, policy: BlockSizePolicy) -> Self {
442        self.data_block_size_policy = policy;
443        self
444    }
445
446    /// Sets the hash ratio policy for data blocks.
447    ///
448    /// If greater than 0.0, a hash index is embedded into data blocks that can speed up reads
449    /// inside the data block.
450    #[must_use]
451    pub fn data_block_hash_ratio_policy(mut self, policy: HashRatioPolicy) -> Self {
452        self.data_block_hash_ratio_policy = policy;
453        self
454    }
455
456    /// Toggles key-value separation.
457    #[must_use]
458    pub fn with_kv_separation(mut self, opts: Option<KvSeparationOptions>) -> Self {
459        self.kv_separation_opts = opts;
460        self
461    }
462
463    /// Installs a custom compaction filter.
464    #[must_use]
465    pub fn with_compaction_filter_factory(mut self, factory: Option<Arc<dyn Factory>>) -> Self {
466        self.compaction_filter_factory = factory;
467        self
468    }
469
470    /// Opens a tree using the config.
471    ///
472    /// # Errors
473    ///
474    /// Will return `Err` if an IO error occurs.
475    pub fn open(self) -> crate::Result<AnyTree> {
476        Ok(if self.kv_separation_opts.is_some() {
477            AnyTree::Blob(BlobTree::open(self)?)
478        } else {
479            AnyTree::Standard(Tree::open(self)?)
480        })
481    }
482}