lsm_tree/
abstract.rs

1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5use crate::{
6    compaction::CompactionStrategy, config::TreeType, tree::inner::MemtableId, AnyTree, BlobTree,
7    Config, KvPair, Memtable, Segment, SegmentId, SeqNo, Snapshot, Tree, UserKey, UserValue,
8};
9use enum_dispatch::enum_dispatch;
10use std::{
11    ops::RangeBounds,
12    sync::{Arc, RwLockWriteGuard},
13};
14
15pub type RangeItem = crate::Result<KvPair>;
16
17/// Generic Tree API
18#[allow(clippy::module_name_repetitions)]
19#[enum_dispatch]
20pub trait AbstractTree {
21    /// Performs major compaction, blocking the caller until it's done.
22    ///
23    /// # Errors
24    ///
25    /// Will return `Err` if an IO error occurs.
26    fn major_compact(&self, target_size: u64, seqno_threshold: SeqNo) -> crate::Result<()>;
27
28    /// Gets the memory usage of all bloom filters in the tree.
29    fn bloom_filter_size(&self) -> usize;
30
31    #[doc(hidden)]
32    fn verify(&self) -> crate::Result<usize>;
33
34    /// Synchronously flushes a memtable to a disk segment.
35    ///
36    /// This method will not make the segment immediately available,
37    /// use [`AbstractTree::register_segments`] for that.
38    ///
39    /// # Errors
40    ///
41    /// Will return `Err` if an IO error occurs.
42    fn flush_memtable(
43        &self,
44        segment_id: SegmentId,
45        memtable: &Arc<Memtable>,
46        seqno_threshold: SeqNo,
47    ) -> crate::Result<Option<Segment>>;
48
49    /// Atomically registers flushed disk segments into the tree, removing their associated sealed memtables.
50    ///
51    /// # Errors
52    ///
53    /// Will return `Err` if an IO error occurs.
54    fn register_segments(&self, segments: &[Segment]) -> crate::Result<()>;
55
56    /// Write-locks the active memtable for exclusive access
57    fn lock_active_memtable(&self) -> RwLockWriteGuard<'_, Arc<Memtable>>;
58
59    /// Clears the active memtable atomically.
60    fn clear_active_memtable(&self);
61
62    /// Sets the active memtable.
63    ///
64    /// May be used to restore the LSM-tree's in-memory state from a write-ahead log
65    /// after tree recovery.
66    fn set_active_memtable(&self, memtable: Memtable);
67
68    /// Returns the amount of sealed memtables.
69    fn sealed_memtable_count(&self) -> usize;
70
71    /// Adds a sealed memtables.
72    ///
73    /// May be used to restore the LSM-tree's in-memory state from some journals.
74    fn add_sealed_memtable(&self, id: MemtableId, memtable: Arc<Memtable>);
75
76    /// Performs compaction on the tree's levels, blocking the caller until it's done.
77    ///
78    /// # Errors
79    ///
80    /// Will return `Err` if an IO error occurs.
81    fn compact(
82        &self,
83        strategy: Arc<dyn CompactionStrategy>,
84        seqno_threshold: SeqNo,
85    ) -> crate::Result<()>;
86
87    /// Returns the next segment's ID.
88    fn get_next_segment_id(&self) -> SegmentId;
89
90    /// Returns the tree config.
91    fn tree_config(&self) -> &Config;
92
93    /// Returns the highest sequence number.
94    fn get_highest_seqno(&self) -> Option<SeqNo> {
95        let memtable_seqno = self.get_highest_memtable_seqno();
96        let segment_seqno = self.get_highest_persisted_seqno();
97        memtable_seqno.max(segment_seqno)
98    }
99
100    /// Returns the approximate size of the active memtable in bytes.
101    ///
102    /// May be used to flush the memtable if it grows too large.
103    fn active_memtable_size(&self) -> u32;
104
105    /// Returns the tree type.
106    fn tree_type(&self) -> TreeType;
107
108    /// Seals the active memtable, and returns a reference to it.
109    fn rotate_memtable(&self) -> Option<(MemtableId, Arc<Memtable>)>;
110
111    /// Returns the amount of disk segments currently in the tree.
112    fn segment_count(&self) -> usize;
113
114    /// Returns the amount of segments in levels[idx].
115    ///
116    /// Returns `None` if the level does not exist (if idx >= 7).
117    fn level_segment_count(&self, idx: usize) -> Option<usize>;
118
119    /// Returns the amount of disjoint runs in L0.
120    ///
121    /// Can be used to determine whether to write stall.
122    fn l0_run_count(&self) -> usize;
123
124    /// Returns the amount of blob files currently in the tree.
125    fn blob_file_count(&self) -> usize {
126        0
127    }
128
129    /// Approximates the amount of items in the tree.
130    fn approximate_len(&self) -> usize;
131
132    /// Returns the disk space usage.
133    fn disk_space(&self) -> u64;
134
135    /// Returns the highest sequence number of the active memtable.
136    fn get_highest_memtable_seqno(&self) -> Option<SeqNo>;
137
138    /// Returns the highest sequence number that is flushed to disk.
139    fn get_highest_persisted_seqno(&self) -> Option<SeqNo>;
140
141    /// Scans the entire tree, returning the amount of items.
142    ///
143    /// ###### Caution
144    ///
145    /// This operation scans the entire tree: O(n) complexity!
146    ///
147    /// Never, under any circumstances, use .`len()` == 0 to check
148    /// if the tree is empty, use [`Tree::is_empty`] instead.
149    ///
150    /// # Examples
151    ///
152    /// ```
153    /// # use lsm_tree::Error as TreeError;
154    /// use lsm_tree::{AbstractTree, Config, Tree};
155    ///
156    /// let folder = tempfile::tempdir()?;
157    /// let tree = Config::new(folder).open()?;
158    ///
159    /// assert_eq!(tree.len(None, None)?, 0);
160    /// tree.insert("1", "abc", 0);
161    /// tree.insert("3", "abc", 1);
162    /// tree.insert("5", "abc", 2);
163    /// assert_eq!(tree.len(None, None)?, 3);
164    /// #
165    /// # Ok::<(), TreeError>(())
166    /// ```
167    ///
168    /// # Errors
169    ///
170    /// Will return `Err` if an IO error occurs.
171    fn len(&self, seqno: Option<SeqNo>, index: Option<Arc<Memtable>>) -> crate::Result<usize> {
172        let mut count = 0;
173
174        for item in self.iter(seqno, index) {
175            let _ = item?;
176            count += 1;
177        }
178
179        Ok(count)
180    }
181
182    /// Returns `true` if the tree is empty.
183    ///
184    /// This operation has O(1) complexity.
185    ///
186    /// # Examples
187    ///
188    /// ```
189    /// # let folder = tempfile::tempdir()?;
190    /// use lsm_tree::{AbstractTree, Config, Tree};
191    ///
192    /// let tree = Config::new(folder).open()?;
193    /// assert!(tree.is_empty(None, None)?);
194    ///
195    /// tree.insert("a", "abc", 0);
196    /// assert!(!tree.is_empty(None, None)?);
197    /// #
198    /// # Ok::<(), lsm_tree::Error>(())
199    /// ```
200    ///
201    /// # Errors
202    ///
203    /// Will return `Err` if an IO error occurs.
204    fn is_empty(&self, seqno: Option<SeqNo>, index: Option<Arc<Memtable>>) -> crate::Result<bool> {
205        self.first_key_value(seqno, index).map(|x| x.is_none())
206    }
207
208    /// Returns the first key-value pair in the tree.
209    /// The key in this pair is the minimum key in the tree.
210    ///
211    /// # Examples
212    ///
213    /// ```
214    /// # use lsm_tree::Error as TreeError;
215    /// # use lsm_tree::{AbstractTree, Config, Tree};
216    /// #
217    /// # let folder = tempfile::tempdir()?;
218    /// let tree = Config::new(folder).open()?;
219    ///
220    /// tree.insert("1", "abc", 0);
221    /// tree.insert("3", "abc", 1);
222    /// tree.insert("5", "abc", 2);
223    ///
224    /// let (key, _) = tree.first_key_value(None, None)?.expect("item should exist");
225    /// assert_eq!(&*key, "1".as_bytes());
226    /// #
227    /// # Ok::<(), TreeError>(())
228    /// ```
229    ///
230    /// # Errors
231    ///
232    /// Will return `Err` if an IO error occurs.
233    fn first_key_value(
234        &self,
235        seqno: Option<SeqNo>,
236        index: Option<Arc<Memtable>>,
237    ) -> crate::Result<Option<KvPair>> {
238        self.iter(seqno, index).next().transpose()
239    }
240
241    /// Returns the last key-value pair in the tree.
242    /// The key in this pair is the maximum key in the tree.
243    ///
244    /// # Examples
245    ///
246    /// ```
247    /// # use lsm_tree::Error as TreeError;
248    /// # use lsm_tree::{AbstractTree, Config, Tree};
249    /// #
250    /// # let folder = tempfile::tempdir()?;
251    /// # let tree = Config::new(folder).open()?;
252    /// #
253    /// tree.insert("1", "abc", 0);
254    /// tree.insert("3", "abc", 1);
255    /// tree.insert("5", "abc", 2);
256    ///
257    /// let (key, _) = tree.last_key_value(None, None)?.expect("item should exist");
258    /// assert_eq!(&*key, "5".as_bytes());
259    /// #
260    /// # Ok::<(), TreeError>(())
261    /// ```
262    ///
263    /// # Errors
264    ///
265    /// Will return `Err` if an IO error occurs.
266    fn last_key_value(
267        &self,
268        seqno: Option<SeqNo>,
269        index: Option<Arc<Memtable>>,
270    ) -> crate::Result<Option<KvPair>> {
271        self.iter(seqno, index).next_back().transpose()
272    }
273
274    /// Returns an iterator that scans through the entire tree.
275    ///
276    /// Avoid using this function, or limit it as otherwise it may scan a lot of items.
277    ///
278    /// # Examples
279    ///
280    /// ```
281    /// # let folder = tempfile::tempdir()?;
282    /// use lsm_tree::{AbstractTree, Config, Tree};
283    ///
284    /// let tree = Config::new(folder).open()?;
285    ///
286    /// tree.insert("a", "abc", 0);
287    /// tree.insert("f", "abc", 1);
288    /// tree.insert("g", "abc", 2);
289    /// assert_eq!(3, tree.iter(None, None).count());
290    /// #
291    /// # Ok::<(), lsm_tree::Error>(())
292    /// ```
293    fn iter(
294        &self,
295        seqno: Option<SeqNo>,
296        index: Option<Arc<Memtable>>,
297    ) -> Box<dyn DoubleEndedIterator<Item = crate::Result<KvPair>> + 'static> {
298        self.range::<&[u8], _>(.., seqno, index)
299    }
300
301    /// Returns an iterator that scans through the entire tree, returning keys only.
302    ///
303    /// Avoid using this function, or limit it as otherwise it may scan a lot of items.
304    ///
305    /// # Examples
306    ///
307    /// ```
308    /// # let folder = tempfile::tempdir()?;
309    /// use lsm_tree::{AbstractTree, Config, Tree};
310    ///
311    /// let tree = Config::new(folder).open()?;
312    ///
313    /// tree.insert("a", "abc", 0);
314    /// tree.insert("f", "abc", 1);
315    /// tree.insert("g", "abc", 2);
316    /// assert_eq!(3, tree.keys(None, None).count());
317    /// #
318    /// # Ok::<(), lsm_tree::Error>(())
319    /// ```
320    fn keys(
321        &self,
322        seqno: Option<SeqNo>,
323        index: Option<Arc<Memtable>>,
324    ) -> Box<dyn DoubleEndedIterator<Item = crate::Result<UserKey>> + 'static>;
325
326    /// Returns an iterator that scans through the entire tree, returning values only.
327    ///
328    /// Avoid using this function, or limit it as otherwise it may scan a lot of items.
329    ///
330    /// # Examples
331    ///
332    /// ```
333    /// # let folder = tempfile::tempdir()?;
334    /// use lsm_tree::{AbstractTree, Config, Tree};
335    ///
336    /// let tree = Config::new(folder).open()?;
337    ///
338    /// tree.insert("a", "abc", 0);
339    /// tree.insert("f", "abc", 1);
340    /// tree.insert("g", "abc", 2);
341    /// assert_eq!(3, tree.values(None, None).count());
342    /// #
343    /// # Ok::<(), lsm_tree::Error>(())
344    /// ```
345    fn values(
346        &self,
347        seqno: Option<SeqNo>,
348        index: Option<Arc<Memtable>>,
349    ) -> Box<dyn DoubleEndedIterator<Item = crate::Result<UserValue>> + 'static>;
350
351    /// Returns an iterator over a range of items.
352    ///
353    /// Avoid using full or unbounded ranges as they may scan a lot of items (unless limited).
354    ///
355    /// # Examples
356    ///
357    /// ```
358    /// # let folder = tempfile::tempdir()?;
359    /// use lsm_tree::{AbstractTree, Config, Tree};
360    ///
361    /// let tree = Config::new(folder).open()?;
362    ///
363    /// tree.insert("a", "abc", 0);
364    /// tree.insert("f", "abc", 1);
365    /// tree.insert("g", "abc", 2);
366    /// assert_eq!(2, tree.range("a"..="f", None, None).into_iter().count());
367    /// #
368    /// # Ok::<(), lsm_tree::Error>(())
369    /// ```
370    fn range<K: AsRef<[u8]>, R: RangeBounds<K>>(
371        &self,
372        range: R,
373        seqno: Option<SeqNo>,
374        index: Option<Arc<Memtable>>,
375    ) -> Box<dyn DoubleEndedIterator<Item = crate::Result<KvPair>> + 'static>;
376
377    /// Returns an iterator over a prefixed set of items.
378    ///
379    /// Avoid using an empty prefix as it may scan a lot of items (unless limited).
380    ///
381    /// # Examples
382    ///
383    /// ```
384    /// # let folder = tempfile::tempdir()?;
385    /// use lsm_tree::{AbstractTree, Config, Tree};
386    ///
387    /// let tree = Config::new(folder).open()?;
388    ///
389    /// tree.insert("a", "abc", 0);
390    /// tree.insert("ab", "abc", 1);
391    /// tree.insert("abc", "abc", 2);
392    /// assert_eq!(2, tree.prefix("ab", None, None).count());
393    /// #
394    /// # Ok::<(), lsm_tree::Error>(())
395    /// ```
396    fn prefix<K: AsRef<[u8]>>(
397        &self,
398        prefix: K,
399        seqno: Option<SeqNo>,
400        index: Option<Arc<Memtable>>,
401    ) -> Box<dyn DoubleEndedIterator<Item = crate::Result<KvPair>> + 'static>;
402
403    /// Returns the size of a value if it exists.
404    ///
405    /// # Examples
406    ///
407    /// ```
408    /// # let folder = tempfile::tempdir()?;
409    /// use lsm_tree::{AbstractTree, Config, Tree};
410    ///
411    /// let tree = Config::new(folder).open()?;
412    /// tree.insert("a", "my_value", 0);
413    ///
414    /// let size = tree.size_of("a", None)?.unwrap_or_default();
415    /// assert_eq!("my_value".len() as u32, size);
416    ///
417    /// let size = tree.size_of("b", None)?.unwrap_or_default();
418    /// assert_eq!(0, size);
419    /// #
420    /// # Ok::<(), lsm_tree::Error>(())
421    /// ```
422    ///
423    /// # Errors
424    ///
425    /// Will return `Err` if an IO error occurs.
426    fn size_of<K: AsRef<[u8]>>(&self, key: K, seqno: Option<SeqNo>) -> crate::Result<Option<u32>>;
427
428    /// Retrieves an item from the tree.
429    ///
430    /// # Examples
431    ///
432    /// ```
433    /// # let folder = tempfile::tempdir()?;
434    /// use lsm_tree::{AbstractTree, Config, Tree};
435    ///
436    /// let tree = Config::new(folder).open()?;
437    /// tree.insert("a", "my_value", 0);
438    ///
439    /// let item = tree.get("a", None)?;
440    /// assert_eq!(Some("my_value".as_bytes().into()), item);
441    /// #
442    /// # Ok::<(), lsm_tree::Error>(())
443    /// ```
444    ///
445    /// # Errors
446    ///
447    /// Will return `Err` if an IO error occurs.
448    fn get<K: AsRef<[u8]>>(&self, key: K, seqno: Option<SeqNo>)
449        -> crate::Result<Option<UserValue>>;
450
451    /// Opens a read-only point-in-time snapshot of the tree
452    ///
453    /// Dropping the snapshot will close the snapshot
454    ///
455    /// # Examples
456    ///
457    /// ```
458    /// # let folder = tempfile::tempdir()?;
459    /// use lsm_tree::{AbstractTree, Config, Tree};
460    ///
461    /// let tree = Config::new(folder).open()?;
462    ///
463    /// tree.insert("a", "abc", 0);
464    ///
465    /// let snapshot = tree.snapshot(1);
466    /// assert_eq!(snapshot.len()?, tree.len(None, None)?);
467    ///
468    /// tree.insert("b", "abc", 1);
469    ///
470    /// assert_eq!(2, tree.len(None, None)?);
471    /// assert_eq!(1, snapshot.len()?);
472    ///
473    /// assert!(snapshot.contains_key("a")?);
474    /// assert!(!snapshot.contains_key("b")?);
475    /// #
476    /// # Ok::<(), lsm_tree::Error>(())
477    /// ```
478    fn snapshot(&self, seqno: SeqNo) -> Snapshot;
479
480    /// Opens a snapshot of this partition with a given sequence number
481    #[must_use]
482    fn snapshot_at(&self, seqno: SeqNo) -> Snapshot {
483        self.snapshot(seqno)
484    }
485
486    /// Returns `true` if the tree contains the specified key.
487    ///
488    /// # Examples
489    ///
490    /// ```
491    /// # let folder = tempfile::tempdir()?;
492    /// # use lsm_tree::{AbstractTree, Config, Tree};
493    /// #
494    /// let tree = Config::new(folder).open()?;
495    /// assert!(!tree.contains_key("a", None)?);
496    ///
497    /// tree.insert("a", "abc", 0);
498    /// assert!(tree.contains_key("a", None)?);
499    /// #
500    /// # Ok::<(), lsm_tree::Error>(())
501    /// ```
502    ///
503    /// # Errors
504    ///
505    /// Will return `Err` if an IO error occurs.
506    fn contains_key<K: AsRef<[u8]>>(&self, key: K, seqno: Option<SeqNo>) -> crate::Result<bool> {
507        self.get(key, seqno).map(|x| x.is_some())
508    }
509
510    /// Inserts a key-value pair into the tree.
511    ///
512    /// If the key already exists, the item will be overwritten.
513    ///
514    /// Returns the added item's size and new size of the memtable.
515    ///
516    /// # Examples
517    ///
518    /// ```
519    /// # let folder = tempfile::tempdir()?;
520    /// use lsm_tree::{AbstractTree, Config, Tree};
521    ///
522    /// let tree = Config::new(folder).open()?;
523    /// tree.insert("a", "abc", 0);
524    /// #
525    /// # Ok::<(), lsm_tree::Error>(())
526    /// ```
527    ///
528    /// # Errors
529    ///
530    /// Will return `Err` if an IO error occurs.
531    fn insert<K: Into<UserKey>, V: Into<UserValue>>(
532        &self,
533        key: K,
534        value: V,
535        seqno: SeqNo,
536    ) -> (u32, u32);
537
538    /// Removes an item from the tree.
539    ///
540    /// Returns the added item's size and new size of the memtable.
541    ///
542    /// # Examples
543    ///
544    /// ```
545    /// # let folder = tempfile::tempdir()?;
546    /// # use lsm_tree::{AbstractTree, Config, Tree};
547    /// #
548    /// # let tree = Config::new(folder).open()?;
549    /// tree.insert("a", "abc", 0);
550    ///
551    /// let item = tree.get("a", None)?.expect("should have item");
552    /// assert_eq!("abc".as_bytes(), &*item);
553    ///
554    /// tree.remove("a", 1);
555    ///
556    /// let item = tree.get("a", None)?;
557    /// assert_eq!(None, item);
558    /// #
559    /// # Ok::<(), lsm_tree::Error>(())
560    /// ```
561    ///
562    /// # Errors
563    ///
564    /// Will return `Err` if an IO error occurs.
565    fn remove<K: Into<UserKey>>(&self, key: K, seqno: SeqNo) -> (u32, u32);
566
567    /// Removes an item from the tree.
568    ///
569    /// The tombstone marker of this delete operation will vanish when it
570    /// collides with its corresponding insertion.
571    /// This may cause older versions of the value to be resurrected, so it should
572    /// only be used and preferred in scenarios where a key is only ever written once.
573    ///
574    /// Returns the added item's size and new size of the memtable.
575    ///
576    /// # Examples
577    ///
578    /// ```
579    /// # let folder = tempfile::tempdir()?;
580    /// # use lsm_tree::{AbstractTree, Config, Tree};
581    /// #
582    /// # let tree = Config::new(folder).open()?;
583    /// tree.insert("a", "abc", 0);
584    ///
585    /// let item = tree.get("a", None)?.expect("should have item");
586    /// assert_eq!("abc".as_bytes(), &*item);
587    ///
588    /// tree.remove_weak("a", 1);
589    ///
590    /// let item = tree.get("a", None)?;
591    /// assert_eq!(None, item);
592    /// #
593    /// # Ok::<(), lsm_tree::Error>(())
594    /// ```
595    ///
596    /// # Errors
597    ///
598    /// Will return `Err` if an IO error occurs.
599    fn remove_weak<K: Into<UserKey>>(&self, key: K, seqno: SeqNo) -> (u32, u32);
600}