lsm_tree/
config.rs

1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5use crate::{
6    descriptor_table::FileDescriptorTable,
7    path::absolute_path,
8    segment::meta::{CompressionType, TableType},
9    BlobTree, BlockCache, Tree,
10};
11use std::{
12    path::{Path, PathBuf},
13    sync::Arc,
14};
15use value_log::BlobCache;
16
17/// LSM-tree type
18#[derive(Copy, Clone, Debug, PartialEq, Eq)]
19pub enum TreeType {
20    /// Standard LSM-tree, see [`Tree`]
21    Standard,
22
23    /// Key-value separated LSM-tree, see [`BlobTree`]
24    Blob,
25}
26
27impl From<TreeType> for u8 {
28    fn from(val: TreeType) -> Self {
29        match val {
30            TreeType::Standard => 0,
31            TreeType::Blob => 1,
32        }
33    }
34}
35
36impl TryFrom<u8> for TreeType {
37    type Error = ();
38
39    fn try_from(value: u8) -> Result<Self, Self::Error> {
40        match value {
41            0 => Ok(Self::Standard),
42            1 => Ok(Self::Blob),
43            _ => Err(()),
44        }
45    }
46}
47
48const DEFAULT_FILE_FOLDER: &str = ".lsm.data";
49
50#[derive(Clone)]
51/// Tree configuration builder
52pub struct Config {
53    /// Folder path
54    #[doc(hidden)]
55    pub path: PathBuf,
56
57    /// Tree type (unused)
58    #[allow(unused)]
59    pub tree_type: TreeType,
60
61    /// What type of compression is used
62    pub compression: CompressionType,
63
64    /// What type of compression is used for blobs
65    pub blob_compression: CompressionType,
66
67    /// Table type (unused)
68    #[allow(unused)]
69    pub(crate) table_type: TableType,
70
71    /// Block size of data blocks
72    pub data_block_size: u32,
73
74    /// Block size of index blocks
75    pub index_block_size: u32,
76
77    /// Amount of levels of the LSM tree (depth of tree)
78    pub level_count: u8,
79
80    /// Bits per key for levels that are not L0, L1, L2
81    // NOTE: bloom_bits_per_key is not conditionally compiled,
82    // because that would change the file format
83    #[doc(hidden)]
84    pub bloom_bits_per_key: i8,
85
86    /// Block cache to use
87    #[doc(hidden)]
88    pub block_cache: Arc<BlockCache>,
89
90    /// Blob cache to use
91    #[doc(hidden)]
92    pub blob_cache: Arc<BlobCache>,
93
94    /// Blob file (value log segment) target size in bytes
95    #[doc(hidden)]
96    pub blob_file_target_size: u64,
97
98    /// Key-value separation threshold in bytes
99    #[doc(hidden)]
100    pub blob_file_separation_threshold: u32,
101
102    /// Descriptor table to use
103    #[doc(hidden)]
104    pub descriptor_table: Arc<FileDescriptorTable>,
105}
106
107impl Default for Config {
108    fn default() -> Self {
109        Self {
110            path: absolute_path(DEFAULT_FILE_FOLDER),
111            descriptor_table: Arc::new(FileDescriptorTable::new(128, 2)),
112
113            block_cache: Arc::new(BlockCache::with_capacity_bytes(/* 16 MiB */ 16 * 1_024 * 1_024)),
114            data_block_size: /* 4 KiB */ 4_096,
115            index_block_size: /* 4 KiB */ 4_096,
116            level_count: 7,
117            tree_type: TreeType::Standard,
118            table_type: TableType::Block,
119            compression: CompressionType::None,
120            blob_compression: CompressionType::None,
121            bloom_bits_per_key: 10,
122
123            blob_cache: Arc::new(BlobCache::with_capacity_bytes(/* 16 MiB */ 16 * 1_024 * 1_024)),
124            blob_file_target_size: /* 64 MiB */ 64 * 1_024 * 1_024,
125            blob_file_separation_threshold: /* 4 KiB */ 4 * 1_024,
126        }
127    }
128}
129
130impl Config {
131    /// Initializes a new config
132    pub fn new<P: AsRef<Path>>(path: P) -> Self {
133        Self {
134            path: absolute_path(path),
135            ..Default::default()
136        }
137    }
138
139    /// Sets the bits per key to use for bloom filters
140    /// in levels that are not L0 or L1.
141    ///
142    /// Use -1 to disable bloom filters even in L0, L1, L2.
143    ///
144    /// Defaults to 10 bits.
145    ///
146    /// # Panics
147    ///
148    /// Panics if `n` is less than -1.
149    #[must_use]
150    pub fn bloom_bits_per_key(mut self, bits: i8) -> Self {
151        assert!(bits >= -1, "invalid bits_per_key value");
152
153        self.bloom_bits_per_key = bits;
154        self
155    }
156
157    /// Sets the compression method.
158    ///
159    /// Using some compression is recommended.
160    ///
161    /// Default = None
162    #[must_use]
163    pub fn compression(mut self, compression: CompressionType) -> Self {
164        self.compression = compression;
165        self
166    }
167
168    /// Sets the compression method.
169    ///
170    /// Using some compression is recommended.
171    ///
172    /// Default = None
173    #[must_use]
174    pub fn blob_compression(mut self, compression: CompressionType) -> Self {
175        self.blob_compression = compression;
176        self
177    }
178
179    /// Sets the amount of levels of the LSM tree (depth of tree).
180    ///
181    /// Defaults to 7, like `LevelDB` and `RocksDB`.
182    ///
183    /// Cannot be changed once set.
184    ///
185    /// # Panics
186    ///
187    /// Panics if `n` is 0.
188    #[must_use]
189    pub fn level_count(mut self, n: u8) -> Self {
190        assert!(n > 0);
191
192        self.level_count = n;
193        self
194    }
195
196    /// Sets the data block size.
197    ///
198    /// Defaults to 4 KiB (4096 bytes).
199    ///
200    /// For point read heavy workloads (get) a sensible default is
201    /// somewhere between 4 - 8 KiB, depending on the average value size.
202    ///
203    /// For scan heavy workloads (range, prefix), use 16 - 64 KiB
204    /// which also increases compression efficiency.
205    ///
206    /// # Panics
207    ///
208    /// Panics if the block size is smaller than 1 KiB or larger than 512 KiB.
209    #[must_use]
210    pub fn data_block_size(mut self, block_size: u32) -> Self {
211        assert!(block_size >= 1_024);
212        assert!(block_size <= 512 * 1_024);
213
214        self.data_block_size = block_size;
215
216        self
217    }
218
219    /// Sets the index block size.
220    ///
221    /// Defaults to 4 KiB (4096 bytes).
222    ///
223    /// For point read heavy workloads (get) a sensible default is
224    /// somewhere between 4 - 8 KiB, depending on the average value size.
225    ///
226    /// For scan heavy workloads (range, prefix), use 16 - 64 KiB
227    /// which also increases compression efficiency.
228    ///
229    /// # Panics
230    ///
231    /// Panics if the block size is smaller than 1 KiB or larger than 512 KiB.
232    #[must_use]
233    pub fn index_block_size(mut self, block_size: u32) -> Self {
234        assert!(block_size >= 1_024);
235        assert!(block_size <= 512 * 1_024);
236
237        self.index_block_size = block_size;
238
239        self
240    }
241
242    /// Sets the block cache.
243    ///
244    /// You can create a global [`BlockCache`] and share it between multiple
245    /// trees to cap global cache memory usage.
246    ///
247    /// Defaults to a block cache with 8 MiB of capacity *per tree*.
248    #[must_use]
249    pub fn block_cache(mut self, block_cache: Arc<BlockCache>) -> Self {
250        self.block_cache = block_cache;
251        self
252    }
253
254    /// Sets the block cache.
255    ///
256    /// You can create a global [`BlobCache`] and share it between multiple
257    /// trees and their value logs to cap global cache memory usage.
258    ///
259    /// Defaults to a block cache with 8 MiB of capacity *per tree*.
260    ///
261    /// This option has no effect when not used for opening a blob tree.
262    #[must_use]
263    pub fn blob_cache(mut self, blob_cache: Arc<BlobCache>) -> Self {
264        self.blob_cache = blob_cache;
265        self
266    }
267
268    /// Sets the target size of blob files.
269    ///
270    /// Smaller blob files allow more granular garbage collection
271    /// which allows lower space amp for lower write I/O cost.
272    ///
273    /// Larger blob files decrease the number of files on disk and maintenance
274    /// overhead.
275    ///
276    /// Defaults to 64 MiB.
277    ///
278    /// This option has no effect when not used for opening a blob tree.
279    #[must_use]
280    pub fn blob_file_target_size(mut self, bytes: u64) -> Self {
281        self.blob_file_target_size = bytes;
282        self
283    }
284
285    /// Sets the key-value separation threshold in bytes.
286    ///
287    /// Smaller value will reduce compaction overhead and thus write amplification,
288    /// at the cost of lower read performance.
289    ///
290    /// Defaults to 4KiB.
291    ///
292    /// This option has no effect when not used for opening a blob tree.
293    #[must_use]
294    pub fn blob_file_separation_threshold(mut self, bytes: u32) -> Self {
295        self.blob_file_separation_threshold = bytes;
296        self
297    }
298
299    #[must_use]
300    #[doc(hidden)]
301    pub fn descriptor_table(mut self, descriptor_table: Arc<FileDescriptorTable>) -> Self {
302        self.descriptor_table = descriptor_table;
303        self
304    }
305
306    /// Opens a tree using the config.
307    ///
308    /// # Errors
309    ///
310    /// Will return `Err` if an IO error occurs.
311    pub fn open(self) -> crate::Result<Tree> {
312        Tree::open(self)
313    }
314
315    /// Opens a blob tree using the config.
316    ///
317    /// # Errors
318    ///
319    /// Will return `Err` if an IO error occurs.
320    pub fn open_as_blob_tree(mut self) -> crate::Result<BlobTree> {
321        self.tree_type = TreeType::Blob;
322        BlobTree::open(self)
323    }
324}