lsm_tree/config/
mod.rs

1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5mod block_size;
6mod compression;
7mod filter;
8mod hash_ratio;
9mod pinning;
10mod restart_interval;
11
12pub use block_size::BlockSizePolicy;
13pub use compression::CompressionPolicy;
14pub use filter::{BloomConstructionPolicy, FilterPolicy, FilterPolicyEntry};
15pub use hash_ratio::HashRatioPolicy;
16pub use pinning::PinningPolicy;
17pub use restart_interval::RestartIntervalPolicy;
18
19use crate::{path::absolute_path, BlobTree, Cache, CompressionType, DescriptorTable, Tree};
20use std::{
21    path::{Path, PathBuf},
22    sync::Arc,
23};
24
25/// LSM-tree type
26#[derive(Copy, Clone, Debug, PartialEq, Eq)]
27pub enum TreeType {
28    /// Standard LSM-tree, see [`Tree`]
29    Standard,
30
31    /// Key-value separated LSM-tree, see [`BlobTree`]
32    Blob,
33}
34
35impl From<TreeType> for u8 {
36    fn from(val: TreeType) -> Self {
37        match val {
38            TreeType::Standard => 0,
39            TreeType::Blob => 1,
40        }
41    }
42}
43
44impl TryFrom<u8> for TreeType {
45    type Error = ();
46
47    fn try_from(value: u8) -> Result<Self, Self::Error> {
48        match value {
49            0 => Ok(Self::Standard),
50            1 => Ok(Self::Blob),
51            _ => Err(()),
52        }
53    }
54}
55
56const DEFAULT_FILE_FOLDER: &str = ".lsm.data";
57
58#[derive(Clone)]
59/// Tree configuration builder
60pub struct Config {
61    /// Folder path
62    #[doc(hidden)]
63    pub path: PathBuf,
64
65    /// Block cache to use
66    #[doc(hidden)]
67    pub cache: Arc<Cache>,
68
69    /// Descriptor table to use
70    #[doc(hidden)]
71    pub descriptor_table: Arc<DescriptorTable>,
72
73    /// Tree type (unused)
74    #[allow(unused)]
75    pub tree_type: TreeType,
76
77    /// Number of levels of the LSM tree (depth of tree)
78    ///
79    /// Once set, the level count is fixed (in the "manifest" file)
80    pub level_count: u8,
81
82    /// What type of compression is used for data blocks
83    pub data_block_compression_policy: CompressionPolicy,
84
85    /// What type of compression is used for index blocks
86    pub index_block_compression_policy: CompressionPolicy,
87
88    /// Restart interval inside data blocks
89    pub data_block_restart_interval_policy: RestartIntervalPolicy,
90
91    /// Restart interval inside index blocks
92    pub index_block_restart_interval_policy: RestartIntervalPolicy,
93
94    /// Block size of data blocks
95    pub data_block_size_policy: BlockSizePolicy,
96
97    /// Block size of index blocks
98    pub index_block_size_policy: BlockSizePolicy,
99
100    /// Whether to pin index blocks
101    pub index_block_pinning_policy: PinningPolicy,
102
103    /// Whether to pin filter blocks
104    pub filter_block_pinning_policy: PinningPolicy,
105
106    /// Data block hash ratio
107    pub data_block_hash_ratio_policy: HashRatioPolicy,
108
109    /// If `true`, the last level will not build filters, reducing the filter size of a database
110    /// by ~90% typically
111    pub(crate) expect_point_read_hits: bool,
112
113    /// Filter construction policy
114    pub filter_policy: FilterPolicy,
115
116    /// What type of compression is used for blobs
117    pub blob_compression: CompressionType,
118
119    /// Blob file (value log segment) target size in bytes
120    #[doc(hidden)]
121    pub blob_file_target_size: u64,
122
123    /// Key-value separation threshold in bytes
124    #[doc(hidden)]
125    pub blob_file_separation_threshold: u32,
126}
127
128impl Default for Config {
129    fn default() -> Self {
130        Self {
131            path: absolute_path(Path::new(DEFAULT_FILE_FOLDER)),
132            descriptor_table: Arc::new(DescriptorTable::new(256)),
133
134            cache: Arc::new(Cache::with_capacity_bytes(/* 16 MiB */ 16 * 1_024 * 1_024)),
135
136            data_block_restart_interval_policy: RestartIntervalPolicy::all(16),
137            index_block_restart_interval_policy: RestartIntervalPolicy::all(1),
138
139            level_count: 7,
140            tree_type: TreeType::Standard,
141
142            data_block_size_policy: BlockSizePolicy::default(),
143            index_block_size_policy: BlockSizePolicy::default(),
144
145            index_block_pinning_policy: PinningPolicy::new(&[true, true, false]),
146            filter_block_pinning_policy: PinningPolicy::new(&[true, false]),
147
148            data_block_compression_policy: CompressionPolicy::default(),
149            index_block_compression_policy:CompressionPolicy::all(CompressionType::None),
150
151            data_block_hash_ratio_policy: HashRatioPolicy::all(0.0),
152
153            blob_compression: CompressionType::None,
154
155            filter_policy: FilterPolicy::default(),
156
157            blob_file_target_size: /* 64 MiB */ 64 * 1_024 * 1_024,
158            blob_file_separation_threshold: /* 4 KiB */ 4 * 1_024,
159
160            expect_point_read_hits: false,
161        }
162    }
163}
164
165impl Config {
166    /// Initializes a new config
167    pub fn new<P: AsRef<Path>>(path: P) -> Self {
168        Self {
169            path: absolute_path(path.as_ref()),
170            ..Default::default()
171        }
172    }
173
174    /// Sets the global cache.
175    ///
176    /// You can create a global [`Cache`] and share it between multiple
177    /// trees to cap global cache memory usage.
178    ///
179    /// Defaults to a cache with 8 MiB of capacity *per tree*.
180    #[must_use]
181    pub fn use_cache(mut self, cache: Arc<Cache>) -> Self {
182        self.cache = cache;
183        self
184    }
185
186    #[must_use]
187    #[doc(hidden)]
188    pub fn use_descriptor_table(mut self, descriptor_table: Arc<DescriptorTable>) -> Self {
189        self.descriptor_table = descriptor_table;
190        self
191    }
192
193    /// If `true`, the last level will not build filters, reducing the filter size of a database
194    /// by ~90% typically.
195    ///
196    /// **Enable this only if you know that point reads generally are expected to find a key-value pair.**
197    #[must_use]
198    pub fn expect_point_read_hits(mut self, b: bool) -> Self {
199        self.expect_point_read_hits = b;
200        self
201    }
202
203    /// Sets the pinning policy for filter blocks.
204    #[must_use]
205    pub fn filter_block_pinning_policy(mut self, policy: PinningPolicy) -> Self {
206        self.filter_block_pinning_policy = policy;
207        self
208    }
209
210    /// Sets the pinning policy for index blocks.
211    #[must_use]
212    pub fn index_block_pinning_policy(mut self, policy: PinningPolicy) -> Self {
213        self.index_block_pinning_policy = policy;
214        self
215    }
216
217    /// Sets the restart interval inside data blocks.
218    ///
219    /// A higher restart interval saves space while increasing lookup times
220    /// inside data blocks.
221    ///
222    /// Default = 16
223    #[must_use]
224    pub fn data_block_restart_interval_policy(mut self, policy: RestartIntervalPolicy) -> Self {
225        self.data_block_restart_interval_policy = policy;
226        self
227    }
228
229    /// Sets the restart interval inside index blocks.
230    ///
231    /// A higher restart interval saves space while increasing lookup times
232    /// inside index blocks.
233    ///
234    /// Default = 1
235    #[must_use]
236    pub fn index_block_restart_interval_policy(mut self, policy: RestartIntervalPolicy) -> Self {
237        self.index_block_restart_interval_policy = policy;
238        self
239    }
240
241    /// Sets the filter construction policy.
242    #[must_use]
243    pub fn filter_policy(mut self, policy: FilterPolicy) -> Self {
244        self.filter_policy = policy;
245        self
246    }
247
248    /// Sets the compression method for data blocks.
249    #[must_use]
250    pub fn data_block_compression_policy(mut self, policy: CompressionPolicy) -> Self {
251        self.data_block_compression_policy = policy;
252        self
253    }
254
255    /// Sets the compression method for index blocks.
256    #[must_use]
257    pub fn index_block_compression_policy(mut self, policy: CompressionPolicy) -> Self {
258        self.index_block_compression_policy = policy;
259        self
260    }
261
262    /// Sets the blob compression method.
263    #[must_use]
264    pub fn blob_compression(mut self, compression: CompressionType) -> Self {
265        self.blob_compression = compression;
266        self
267    }
268
269    /// Sets the number of levels of the LSM tree (depth of tree).
270    ///
271    /// Defaults to 7, like `LevelDB` and `RocksDB`.
272    ///
273    /// Cannot be changed once set.
274    ///
275    /// # Panics
276    ///
277    /// Panics if `n` is 0.
278    #[must_use]
279    pub fn level_count(mut self, n: u8) -> Self {
280        assert!(n > 0);
281
282        self.level_count = n;
283        self
284    }
285
286    /// Sets the data block size policy.
287    #[must_use]
288    pub fn data_block_size_policy(mut self, policy: BlockSizePolicy) -> Self {
289        self.data_block_size_policy = policy;
290        self
291    }
292
293    /// Sets the index block size policy.
294    #[must_use]
295    pub fn index_block_size_policy(mut self, policy: BlockSizePolicy) -> Self {
296        self.index_block_size_policy = policy;
297        self
298    }
299
300    /// Sets the hash ratio policy for data blocks.
301    ///
302    /// If greater than 0.0, a hash index is embedded into data blocks that can speed up reads
303    /// inside the data block.
304    #[must_use]
305    pub fn data_block_hash_ratio_policy(mut self, policy: HashRatioPolicy) -> Self {
306        self.data_block_hash_ratio_policy = policy;
307        self
308    }
309
310    /// Sets the target size of blob files.
311    ///
312    /// Smaller blob files allow more granular garbage collection
313    /// which allows lower space amp for lower write I/O cost.
314    ///
315    /// Larger blob files decrease the number of files on disk and maintenance
316    /// overhead.
317    ///
318    /// Defaults to 64 MiB.
319    ///
320    /// This option has no effect when not used for opening a blob tree.
321    #[must_use]
322    pub fn blob_file_target_size(mut self, bytes: u64) -> Self {
323        self.blob_file_target_size = bytes;
324        self
325    }
326
327    /// Sets the key-value separation threshold in bytes.
328    ///
329    /// Smaller value will reduce compaction overhead and thus write amplification,
330    /// at the cost of lower read performance.
331    ///
332    /// Defaults to 4KiB.
333    ///
334    /// This option has no effect when not used for opening a blob tree.
335    #[must_use]
336    pub fn blob_file_separation_threshold(mut self, bytes: u32) -> Self {
337        self.blob_file_separation_threshold = bytes;
338        self
339    }
340
341    /// Opens a tree using the config.
342    ///
343    /// # Errors
344    ///
345    /// Will return `Err` if an IO error occurs.
346    pub fn open(self) -> crate::Result<Tree> {
347        Tree::open(self)
348    }
349
350    /// Opens a blob tree using the config.
351    ///
352    /// # Errors
353    ///
354    /// Will return `Err` if an IO error occurs.
355    pub fn open_as_blob_tree(mut self) -> crate::Result<BlobTree> {
356        self.tree_type = TreeType::Blob;
357        BlobTree::open(self)
358    }
359}