lsm_tree/
config.rs

1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5use crate::{
6    cache::Cache,
7    descriptor_table::FileDescriptorTable,
8    path::absolute_path,
9    segment::meta::{CompressionType, TableType},
10    BlobTree, Tree,
11};
12use std::{
13    path::{Path, PathBuf},
14    sync::Arc,
15};
16
17/// LSM-tree type
18#[derive(Copy, Clone, Debug, PartialEq, Eq)]
19pub enum TreeType {
20    /// Standard LSM-tree, see [`Tree`]
21    Standard,
22
23    /// Key-value separated LSM-tree, see [`BlobTree`]
24    Blob,
25}
26
27impl From<TreeType> for u8 {
28    fn from(val: TreeType) -> Self {
29        match val {
30            TreeType::Standard => 0,
31            TreeType::Blob => 1,
32        }
33    }
34}
35
36impl TryFrom<u8> for TreeType {
37    type Error = ();
38
39    fn try_from(value: u8) -> Result<Self, Self::Error> {
40        match value {
41            0 => Ok(Self::Standard),
42            1 => Ok(Self::Blob),
43            _ => Err(()),
44        }
45    }
46}
47
48const DEFAULT_FILE_FOLDER: &str = ".lsm.data";
49
50#[derive(Clone)]
51/// Tree configuration builder
52pub struct Config {
53    /// Folder path
54    #[doc(hidden)]
55    pub path: PathBuf,
56
57    /// Tree type (unused)
58    #[allow(unused)]
59    pub tree_type: TreeType,
60
61    /// What type of compression is used
62    pub compression: CompressionType,
63
64    /// What type of compression is used for blobs
65    pub blob_compression: CompressionType,
66
67    /// Table type (unused)
68    #[allow(unused)]
69    pub(crate) table_type: TableType,
70
71    /// Block size of data blocks
72    pub data_block_size: u32,
73
74    /// Block size of index blocks
75    pub index_block_size: u32,
76
77    /// Amount of levels of the LSM tree (depth of tree)
78    pub level_count: u8,
79
80    /// Bits per key for levels that are not L0, L1, L2
81    // NOTE: bloom_bits_per_key is not conditionally compiled,
82    // because that would change the file format
83    #[doc(hidden)]
84    pub bloom_bits_per_key: i8,
85
86    /// Block cache to use
87    #[doc(hidden)]
88    pub cache: Arc<Cache>,
89
90    /// Blob file (value log segment) target size in bytes
91    #[doc(hidden)]
92    pub blob_file_target_size: u64,
93
94    /// Key-value separation threshold in bytes
95    #[doc(hidden)]
96    pub blob_file_separation_threshold: u32,
97
98    /// Descriptor table to use
99    #[doc(hidden)]
100    pub descriptor_table: Arc<FileDescriptorTable>,
101}
102
103impl Default for Config {
104    fn default() -> Self {
105        Self {
106            path: absolute_path(Path::new(DEFAULT_FILE_FOLDER)),
107            descriptor_table: Arc::new(FileDescriptorTable::new(128, 2)),
108
109            cache: Arc::new(Cache::with_capacity_bytes(/* 16 MiB */ 16 * 1_024 * 1_024)),
110
111            data_block_size: /* 4 KiB */ 4_096,
112            index_block_size: /* 4 KiB */ 4_096,
113            level_count: 7,
114            tree_type: TreeType::Standard,
115            table_type: TableType::Block,
116            compression: CompressionType::None,
117            blob_compression: CompressionType::None,
118            bloom_bits_per_key: 10,
119
120            blob_file_target_size: /* 64 MiB */ 64 * 1_024 * 1_024,
121            blob_file_separation_threshold: /* 4 KiB */ 4 * 1_024,
122        }
123    }
124}
125
126impl Config {
127    /// Initializes a new config
128    pub fn new<P: AsRef<Path>>(path: P) -> Self {
129        Self {
130            path: absolute_path(path.as_ref()),
131            ..Default::default()
132        }
133    }
134
135    /// Sets the bits per key to use for bloom filters
136    /// in levels that are not L0 or L1.
137    ///
138    /// Use -1 to disable bloom filters even in L0, L1, L2.
139    ///
140    /// Defaults to 10 bits.
141    ///
142    /// # Panics
143    ///
144    /// Panics if `n` is less than -1.
145    #[must_use]
146    pub fn bloom_bits_per_key(mut self, bits: i8) -> Self {
147        assert!(bits >= -1, "invalid bits_per_key value");
148
149        self.bloom_bits_per_key = bits;
150        self
151    }
152
153    /// Sets the compression method.
154    ///
155    /// Using some compression is recommended.
156    ///
157    /// Default = None
158    #[must_use]
159    pub fn compression(mut self, compression: CompressionType) -> Self {
160        self.compression = compression;
161        self
162    }
163
164    /// Sets the compression method.
165    ///
166    /// Using some compression is recommended.
167    ///
168    /// Default = None
169    #[must_use]
170    pub fn blob_compression(mut self, compression: CompressionType) -> Self {
171        self.blob_compression = compression;
172        self
173    }
174
175    /// Sets the amount of levels of the LSM tree (depth of tree).
176    ///
177    /// Defaults to 7, like `LevelDB` and `RocksDB`.
178    ///
179    /// Cannot be changed once set.
180    ///
181    /// # Panics
182    ///
183    /// Panics if `n` is 0.
184    #[must_use]
185    pub fn level_count(mut self, n: u8) -> Self {
186        assert!(n > 0);
187
188        self.level_count = n;
189        self
190    }
191
192    /// Sets the data block size.
193    ///
194    /// Defaults to 4 KiB (4096 bytes).
195    ///
196    /// For point read heavy workloads (get) a sensible default is
197    /// somewhere between 4 - 8 KiB, depending on the average value size.
198    ///
199    /// For scan heavy workloads (range, prefix), use 16 - 64 KiB
200    /// which also increases compression efficiency.
201    ///
202    /// # Panics
203    ///
204    /// Panics if the block size is smaller than 1 KiB or larger than 512 KiB.
205    #[must_use]
206    pub fn data_block_size(mut self, block_size: u32) -> Self {
207        assert!(block_size >= 1_024);
208        assert!(block_size <= 512 * 1_024);
209
210        self.data_block_size = block_size;
211
212        self
213    }
214
215    /// Sets the index block size.
216    ///
217    /// Defaults to 4 KiB (4096 bytes).
218    ///
219    /// For point read heavy workloads (get) a sensible default is
220    /// somewhere between 4 - 8 KiB, depending on the average value size.
221    ///
222    /// For scan heavy workloads (range, prefix), use 16 - 64 KiB
223    /// which also increases compression efficiency.
224    ///
225    /// # Panics
226    ///
227    /// Panics if the block size is smaller than 1 KiB or larger than 512 KiB.
228    #[must_use]
229    pub fn index_block_size(mut self, block_size: u32) -> Self {
230        assert!(block_size >= 1_024);
231        assert!(block_size <= 512 * 1_024);
232
233        self.index_block_size = block_size;
234
235        self
236    }
237
238    /// Sets the global cache.
239    ///
240    /// You can create a global [`Cache`] and share it between multiple
241    /// trees to cap global cache memory usage.
242    ///
243    /// Defaults to a cache with 8 MiB of capacity *per tree*.
244    #[must_use]
245    pub fn use_cache(mut self, cache: Arc<Cache>) -> Self {
246        self.cache = cache;
247        self
248    }
249
250    /// Sets the target size of blob files.
251    ///
252    /// Smaller blob files allow more granular garbage collection
253    /// which allows lower space amp for lower write I/O cost.
254    ///
255    /// Larger blob files decrease the number of files on disk and maintenance
256    /// overhead.
257    ///
258    /// Defaults to 64 MiB.
259    ///
260    /// This option has no effect when not used for opening a blob tree.
261    #[must_use]
262    pub fn blob_file_target_size(mut self, bytes: u64) -> Self {
263        self.blob_file_target_size = bytes;
264        self
265    }
266
267    /// Sets the key-value separation threshold in bytes.
268    ///
269    /// Smaller value will reduce compaction overhead and thus write amplification,
270    /// at the cost of lower read performance.
271    ///
272    /// Defaults to 4KiB.
273    ///
274    /// This option has no effect when not used for opening a blob tree.
275    #[must_use]
276    pub fn blob_file_separation_threshold(mut self, bytes: u32) -> Self {
277        self.blob_file_separation_threshold = bytes;
278        self
279    }
280
281    #[must_use]
282    #[doc(hidden)]
283    pub fn descriptor_table(mut self, descriptor_table: Arc<FileDescriptorTable>) -> Self {
284        self.descriptor_table = descriptor_table;
285        self
286    }
287
288    /// Opens a tree using the config.
289    ///
290    /// # Errors
291    ///
292    /// Will return `Err` if an IO error occurs.
293    pub fn open(self) -> crate::Result<Tree> {
294        Tree::open(self)
295    }
296
297    /// Opens a blob tree using the config.
298    ///
299    /// # Errors
300    ///
301    /// Will return `Err` if an IO error occurs.
302    pub fn open_as_blob_tree(mut self) -> crate::Result<BlobTree> {
303        self.tree_type = TreeType::Blob;
304        BlobTree::open(self)
305    }
306}