lsm_tree/
abstract.rs

1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5use crate::{
6    compaction::CompactionStrategy, config::TreeType, tree::inner::MemtableId, AnyTree, BlobTree,
7    Config, KvPair, Memtable, Segment, SegmentId, SeqNo, Snapshot, Tree, UserKey, UserValue,
8};
9use enum_dispatch::enum_dispatch;
10use std::{
11    ops::RangeBounds,
12    sync::{Arc, RwLockWriteGuard},
13};
14
15pub type RangeItem = crate::Result<KvPair>;
16
17/// Generic Tree API
18#[allow(clippy::module_name_repetitions)]
19#[enum_dispatch]
20pub trait AbstractTree {
21    /// Ingests a sorted stream of key-value pairs into the tree.
22    ///
23    /// Can only be called on a new fresh, empty tree.
24    ///
25    /// # Errors
26    ///
27    /// Will return `Err` if an IO error occurs.
28    ///
29    /// # Panics
30    ///
31    /// Panics if the tree is **not** initially empty.
32    ///
33    /// Will panic if the input iterator is not sorted in ascending order.
34    #[doc(hidden)]
35    fn ingest(&self, iter: impl Iterator<Item = (UserKey, UserValue)>) -> crate::Result<()>;
36
37    /// Performs major compaction, blocking the caller until it's done.
38    ///
39    /// # Errors
40    ///
41    /// Will return `Err` if an IO error occurs.
42    fn major_compact(&self, target_size: u64, seqno_threshold: SeqNo) -> crate::Result<()>;
43
44    /// Gets the memory usage of all bloom filters in the tree.
45    fn bloom_filter_size(&self) -> usize;
46
47    #[doc(hidden)]
48    fn verify(&self) -> crate::Result<usize>;
49
50    /// Synchronously flushes a memtable to a disk segment.
51    ///
52    /// This method will not make the segment immediately available,
53    /// use [`AbstractTree::register_segments`] for that.
54    ///
55    /// # Errors
56    ///
57    /// Will return `Err` if an IO error occurs.
58    fn flush_memtable(
59        &self,
60        segment_id: SegmentId,
61        memtable: &Arc<Memtable>,
62        seqno_threshold: SeqNo,
63    ) -> crate::Result<Option<Segment>>;
64
65    /// Atomically registers flushed disk segments into the tree, removing their associated sealed memtables.
66    ///
67    /// # Errors
68    ///
69    /// Will return `Err` if an IO error occurs.
70    fn register_segments(&self, segments: &[Segment]) -> crate::Result<()>;
71
72    /// Write-locks the active memtable for exclusive access
73    fn lock_active_memtable(&self) -> RwLockWriteGuard<'_, Arc<Memtable>>;
74
75    /// Clears the active memtable atomically.
76    fn clear_active_memtable(&self);
77
78    /// Sets the active memtable.
79    ///
80    /// May be used to restore the LSM-tree's in-memory state from a write-ahead log
81    /// after tree recovery.
82    fn set_active_memtable(&self, memtable: Memtable);
83
84    /// Returns the amount of sealed memtables.
85    fn sealed_memtable_count(&self) -> usize;
86
87    /// Adds a sealed memtables.
88    ///
89    /// May be used to restore the LSM-tree's in-memory state from some journals.
90    fn add_sealed_memtable(&self, id: MemtableId, memtable: Arc<Memtable>);
91
92    /// Performs compaction on the tree's levels, blocking the caller until it's done.
93    ///
94    /// # Errors
95    ///
96    /// Will return `Err` if an IO error occurs.
97    fn compact(
98        &self,
99        strategy: Arc<dyn CompactionStrategy>,
100        seqno_threshold: SeqNo,
101    ) -> crate::Result<()>;
102
103    /// Returns the next segment's ID.
104    fn get_next_segment_id(&self) -> SegmentId;
105
106    /// Returns the tree config.
107    fn tree_config(&self) -> &Config;
108
109    /// Returns the highest sequence number.
110    fn get_highest_seqno(&self) -> Option<SeqNo> {
111        let memtable_seqno = self.get_highest_memtable_seqno();
112        let segment_seqno = self.get_highest_persisted_seqno();
113        memtable_seqno.max(segment_seqno)
114    }
115
116    /// Returns the approximate size of the active memtable in bytes.
117    ///
118    /// May be used to flush the memtable if it grows too large.
119    fn active_memtable_size(&self) -> u32;
120
121    /// Returns the tree type.
122    fn tree_type(&self) -> TreeType;
123
124    /// Seals the active memtable, and returns a reference to it.
125    fn rotate_memtable(&self) -> Option<(MemtableId, Arc<Memtable>)>;
126
127    /// Returns the amount of disk segments currently in the tree.
128    fn segment_count(&self) -> usize;
129
130    /// Returns the amount of segments in levels[idx].
131    ///
132    /// Returns `None` if the level does not exist (if idx >= 7).
133    fn level_segment_count(&self, idx: usize) -> Option<usize>;
134
135    /// Returns the amount of disjoint runs in L0.
136    ///
137    /// Can be used to determine whether to write stall.
138    fn l0_run_count(&self) -> usize;
139
140    /// Returns the amount of blob files currently in the tree.
141    fn blob_file_count(&self) -> usize {
142        0
143    }
144
145    /// Approximates the amount of items in the tree.
146    fn approximate_len(&self) -> usize;
147
148    /// Returns the disk space usage.
149    fn disk_space(&self) -> u64;
150
151    /// Returns the highest sequence number of the active memtable.
152    fn get_highest_memtable_seqno(&self) -> Option<SeqNo>;
153
154    /// Returns the highest sequence number that is flushed to disk.
155    fn get_highest_persisted_seqno(&self) -> Option<SeqNo>;
156
157    /// Scans the entire tree, returning the amount of items.
158    ///
159    /// ###### Caution
160    ///
161    /// This operation scans the entire tree: O(n) complexity!
162    ///
163    /// Never, under any circumstances, use .`len()` == 0 to check
164    /// if the tree is empty, use [`Tree::is_empty`] instead.
165    ///
166    /// # Examples
167    ///
168    /// ```
169    /// # use lsm_tree::Error as TreeError;
170    /// use lsm_tree::{AbstractTree, Config, Tree};
171    ///
172    /// let folder = tempfile::tempdir()?;
173    /// let tree = Config::new(folder).open()?;
174    ///
175    /// assert_eq!(tree.len(None, None)?, 0);
176    /// tree.insert("1", "abc", 0);
177    /// tree.insert("3", "abc", 1);
178    /// tree.insert("5", "abc", 2);
179    /// assert_eq!(tree.len(None, None)?, 3);
180    /// #
181    /// # Ok::<(), TreeError>(())
182    /// ```
183    ///
184    /// # Errors
185    ///
186    /// Will return `Err` if an IO error occurs.
187    fn len(&self, seqno: Option<SeqNo>, index: Option<Arc<Memtable>>) -> crate::Result<usize> {
188        let mut count = 0;
189
190        for item in self.iter(seqno, index) {
191            let _ = item?;
192            count += 1;
193        }
194
195        Ok(count)
196    }
197
198    /// Returns `true` if the tree is empty.
199    ///
200    /// This operation has O(1) complexity.
201    ///
202    /// # Examples
203    ///
204    /// ```
205    /// # let folder = tempfile::tempdir()?;
206    /// use lsm_tree::{AbstractTree, Config, Tree};
207    ///
208    /// let tree = Config::new(folder).open()?;
209    /// assert!(tree.is_empty(None, None)?);
210    ///
211    /// tree.insert("a", "abc", 0);
212    /// assert!(!tree.is_empty(None, None)?);
213    /// #
214    /// # Ok::<(), lsm_tree::Error>(())
215    /// ```
216    ///
217    /// # Errors
218    ///
219    /// Will return `Err` if an IO error occurs.
220    fn is_empty(&self, seqno: Option<SeqNo>, index: Option<Arc<Memtable>>) -> crate::Result<bool> {
221        self.first_key_value(seqno, index).map(|x| x.is_none())
222    }
223
224    /// Returns the first key-value pair in the tree.
225    /// The key in this pair is the minimum key in the tree.
226    ///
227    /// # Examples
228    ///
229    /// ```
230    /// # use lsm_tree::Error as TreeError;
231    /// # use lsm_tree::{AbstractTree, Config, Tree};
232    /// #
233    /// # let folder = tempfile::tempdir()?;
234    /// let tree = Config::new(folder).open()?;
235    ///
236    /// tree.insert("1", "abc", 0);
237    /// tree.insert("3", "abc", 1);
238    /// tree.insert("5", "abc", 2);
239    ///
240    /// let (key, _) = tree.first_key_value(None, None)?.expect("item should exist");
241    /// assert_eq!(&*key, "1".as_bytes());
242    /// #
243    /// # Ok::<(), TreeError>(())
244    /// ```
245    ///
246    /// # Errors
247    ///
248    /// Will return `Err` if an IO error occurs.
249    fn first_key_value(
250        &self,
251        seqno: Option<SeqNo>,
252        index: Option<Arc<Memtable>>,
253    ) -> crate::Result<Option<KvPair>> {
254        self.iter(seqno, index).next().transpose()
255    }
256
257    /// Returns the last key-value pair in the tree.
258    /// The key in this pair is the maximum key in the tree.
259    ///
260    /// # Examples
261    ///
262    /// ```
263    /// # use lsm_tree::Error as TreeError;
264    /// # use lsm_tree::{AbstractTree, Config, Tree};
265    /// #
266    /// # let folder = tempfile::tempdir()?;
267    /// # let tree = Config::new(folder).open()?;
268    /// #
269    /// tree.insert("1", "abc", 0);
270    /// tree.insert("3", "abc", 1);
271    /// tree.insert("5", "abc", 2);
272    ///
273    /// let (key, _) = tree.last_key_value(None, None)?.expect("item should exist");
274    /// assert_eq!(&*key, "5".as_bytes());
275    /// #
276    /// # Ok::<(), TreeError>(())
277    /// ```
278    ///
279    /// # Errors
280    ///
281    /// Will return `Err` if an IO error occurs.
282    fn last_key_value(
283        &self,
284        seqno: Option<SeqNo>,
285        index: Option<Arc<Memtable>>,
286    ) -> crate::Result<Option<KvPair>> {
287        self.iter(seqno, index).next_back().transpose()
288    }
289
290    /// Returns an iterator that scans through the entire tree.
291    ///
292    /// Avoid using this function, or limit it as otherwise it may scan a lot of items.
293    ///
294    /// # Examples
295    ///
296    /// ```
297    /// # let folder = tempfile::tempdir()?;
298    /// use lsm_tree::{AbstractTree, Config, Tree};
299    ///
300    /// let tree = Config::new(folder).open()?;
301    ///
302    /// tree.insert("a", "abc", 0);
303    /// tree.insert("f", "abc", 1);
304    /// tree.insert("g", "abc", 2);
305    /// assert_eq!(3, tree.iter(None, None).count());
306    /// #
307    /// # Ok::<(), lsm_tree::Error>(())
308    /// ```
309    fn iter(
310        &self,
311        seqno: Option<SeqNo>,
312        index: Option<Arc<Memtable>>,
313    ) -> Box<dyn DoubleEndedIterator<Item = crate::Result<KvPair>> + 'static> {
314        self.range::<&[u8], _>(.., seqno, index)
315    }
316
317    /// Returns an iterator that scans through the entire tree, returning keys only.
318    ///
319    /// Avoid using this function, or limit it as otherwise it may scan a lot of items.
320    ///
321    /// # Examples
322    ///
323    /// ```
324    /// # let folder = tempfile::tempdir()?;
325    /// use lsm_tree::{AbstractTree, Config, Tree};
326    ///
327    /// let tree = Config::new(folder).open()?;
328    ///
329    /// tree.insert("a", "abc", 0);
330    /// tree.insert("f", "abc", 1);
331    /// tree.insert("g", "abc", 2);
332    /// assert_eq!(3, tree.keys(None, None).count());
333    /// #
334    /// # Ok::<(), lsm_tree::Error>(())
335    /// ```
336    fn keys(
337        &self,
338        seqno: Option<SeqNo>,
339        index: Option<Arc<Memtable>>,
340    ) -> Box<dyn DoubleEndedIterator<Item = crate::Result<UserKey>> + 'static>;
341
342    /// Returns an iterator that scans through the entire tree, returning values only.
343    ///
344    /// Avoid using this function, or limit it as otherwise it may scan a lot of items.
345    ///
346    /// # Examples
347    ///
348    /// ```
349    /// # let folder = tempfile::tempdir()?;
350    /// use lsm_tree::{AbstractTree, Config, Tree};
351    ///
352    /// let tree = Config::new(folder).open()?;
353    ///
354    /// tree.insert("a", "abc", 0);
355    /// tree.insert("f", "abc", 1);
356    /// tree.insert("g", "abc", 2);
357    /// assert_eq!(3, tree.values(None, None).count());
358    /// #
359    /// # Ok::<(), lsm_tree::Error>(())
360    /// ```
361    fn values(
362        &self,
363        seqno: Option<SeqNo>,
364        index: Option<Arc<Memtable>>,
365    ) -> Box<dyn DoubleEndedIterator<Item = crate::Result<UserValue>> + 'static>;
366
367    /// Returns an iterator over a range of items.
368    ///
369    /// Avoid using full or unbounded ranges as they may scan a lot of items (unless limited).
370    ///
371    /// # Examples
372    ///
373    /// ```
374    /// # let folder = tempfile::tempdir()?;
375    /// use lsm_tree::{AbstractTree, Config, Tree};
376    ///
377    /// let tree = Config::new(folder).open()?;
378    ///
379    /// tree.insert("a", "abc", 0);
380    /// tree.insert("f", "abc", 1);
381    /// tree.insert("g", "abc", 2);
382    /// assert_eq!(2, tree.range("a"..="f", None, None).into_iter().count());
383    /// #
384    /// # Ok::<(), lsm_tree::Error>(())
385    /// ```
386    fn range<K: AsRef<[u8]>, R: RangeBounds<K>>(
387        &self,
388        range: R,
389        seqno: Option<SeqNo>,
390        index: Option<Arc<Memtable>>,
391    ) -> Box<dyn DoubleEndedIterator<Item = crate::Result<KvPair>> + 'static>;
392
393    /// Returns an iterator over a prefixed set of items.
394    ///
395    /// Avoid using an empty prefix as it may scan a lot of items (unless limited).
396    ///
397    /// # Examples
398    ///
399    /// ```
400    /// # let folder = tempfile::tempdir()?;
401    /// use lsm_tree::{AbstractTree, Config, Tree};
402    ///
403    /// let tree = Config::new(folder).open()?;
404    ///
405    /// tree.insert("a", "abc", 0);
406    /// tree.insert("ab", "abc", 1);
407    /// tree.insert("abc", "abc", 2);
408    /// assert_eq!(2, tree.prefix("ab", None, None).count());
409    /// #
410    /// # Ok::<(), lsm_tree::Error>(())
411    /// ```
412    fn prefix<K: AsRef<[u8]>>(
413        &self,
414        prefix: K,
415        seqno: Option<SeqNo>,
416        index: Option<Arc<Memtable>>,
417    ) -> Box<dyn DoubleEndedIterator<Item = crate::Result<KvPair>> + 'static>;
418
419    /// Returns the size of a value if it exists.
420    ///
421    /// # Examples
422    ///
423    /// ```
424    /// # let folder = tempfile::tempdir()?;
425    /// use lsm_tree::{AbstractTree, Config, Tree};
426    ///
427    /// let tree = Config::new(folder).open()?;
428    /// tree.insert("a", "my_value", 0);
429    ///
430    /// let size = tree.size_of("a", None)?.unwrap_or_default();
431    /// assert_eq!("my_value".len() as u32, size);
432    ///
433    /// let size = tree.size_of("b", None)?.unwrap_or_default();
434    /// assert_eq!(0, size);
435    /// #
436    /// # Ok::<(), lsm_tree::Error>(())
437    /// ```
438    ///
439    /// # Errors
440    ///
441    /// Will return `Err` if an IO error occurs.
442    fn size_of<K: AsRef<[u8]>>(&self, key: K, seqno: Option<SeqNo>) -> crate::Result<Option<u32>>;
443
444    /// Retrieves an item from the tree.
445    ///
446    /// # Examples
447    ///
448    /// ```
449    /// # let folder = tempfile::tempdir()?;
450    /// use lsm_tree::{AbstractTree, Config, Tree};
451    ///
452    /// let tree = Config::new(folder).open()?;
453    /// tree.insert("a", "my_value", 0);
454    ///
455    /// let item = tree.get("a", None)?;
456    /// assert_eq!(Some("my_value".as_bytes().into()), item);
457    /// #
458    /// # Ok::<(), lsm_tree::Error>(())
459    /// ```
460    ///
461    /// # Errors
462    ///
463    /// Will return `Err` if an IO error occurs.
464    fn get<K: AsRef<[u8]>>(&self, key: K, seqno: Option<SeqNo>)
465        -> crate::Result<Option<UserValue>>;
466
467    /// Opens a read-only point-in-time snapshot of the tree
468    ///
469    /// Dropping the snapshot will close the snapshot
470    ///
471    /// # Examples
472    ///
473    /// ```
474    /// # let folder = tempfile::tempdir()?;
475    /// use lsm_tree::{AbstractTree, Config, Tree};
476    ///
477    /// let tree = Config::new(folder).open()?;
478    ///
479    /// tree.insert("a", "abc", 0);
480    ///
481    /// let snapshot = tree.snapshot(1);
482    /// assert_eq!(snapshot.len()?, tree.len(None, None)?);
483    ///
484    /// tree.insert("b", "abc", 1);
485    ///
486    /// assert_eq!(2, tree.len(None, None)?);
487    /// assert_eq!(1, snapshot.len()?);
488    ///
489    /// assert!(snapshot.contains_key("a")?);
490    /// assert!(!snapshot.contains_key("b")?);
491    /// #
492    /// # Ok::<(), lsm_tree::Error>(())
493    /// ```
494    fn snapshot(&self, seqno: SeqNo) -> Snapshot;
495
496    /// Opens a snapshot of this partition with a given sequence number
497    fn snapshot_at(&self, seqno: SeqNo) -> Snapshot {
498        self.snapshot(seqno)
499    }
500
501    /// Returns `true` if the tree contains the specified key.
502    ///
503    /// # Examples
504    ///
505    /// ```
506    /// # let folder = tempfile::tempdir()?;
507    /// # use lsm_tree::{AbstractTree, Config, Tree};
508    /// #
509    /// let tree = Config::new(folder).open()?;
510    /// assert!(!tree.contains_key("a", None)?);
511    ///
512    /// tree.insert("a", "abc", 0);
513    /// assert!(tree.contains_key("a", None)?);
514    /// #
515    /// # Ok::<(), lsm_tree::Error>(())
516    /// ```
517    ///
518    /// # Errors
519    ///
520    /// Will return `Err` if an IO error occurs.
521    fn contains_key<K: AsRef<[u8]>>(&self, key: K, seqno: Option<SeqNo>) -> crate::Result<bool> {
522        self.get(key, seqno).map(|x| x.is_some())
523    }
524
525    /// Inserts a key-value pair into the tree.
526    ///
527    /// If the key already exists, the item will be overwritten.
528    ///
529    /// Returns the added item's size and new size of the memtable.
530    ///
531    /// # Examples
532    ///
533    /// ```
534    /// # let folder = tempfile::tempdir()?;
535    /// use lsm_tree::{AbstractTree, Config, Tree};
536    ///
537    /// let tree = Config::new(folder).open()?;
538    /// tree.insert("a", "abc", 0);
539    /// #
540    /// # Ok::<(), lsm_tree::Error>(())
541    /// ```
542    ///
543    /// # Errors
544    ///
545    /// Will return `Err` if an IO error occurs.
546    fn insert<K: Into<UserKey>, V: Into<UserValue>>(
547        &self,
548        key: K,
549        value: V,
550        seqno: SeqNo,
551    ) -> (u32, u32);
552
553    /// Removes an item from the tree.
554    ///
555    /// Returns the added item's size and new size of the memtable.
556    ///
557    /// # Examples
558    ///
559    /// ```
560    /// # let folder = tempfile::tempdir()?;
561    /// # use lsm_tree::{AbstractTree, Config, Tree};
562    /// #
563    /// # let tree = Config::new(folder).open()?;
564    /// tree.insert("a", "abc", 0);
565    ///
566    /// let item = tree.get("a", None)?.expect("should have item");
567    /// assert_eq!("abc".as_bytes(), &*item);
568    ///
569    /// tree.remove("a", 1);
570    ///
571    /// let item = tree.get("a", None)?;
572    /// assert_eq!(None, item);
573    /// #
574    /// # Ok::<(), lsm_tree::Error>(())
575    /// ```
576    ///
577    /// # Errors
578    ///
579    /// Will return `Err` if an IO error occurs.
580    fn remove<K: Into<UserKey>>(&self, key: K, seqno: SeqNo) -> (u32, u32);
581
582    /// Removes an item from the tree.
583    ///
584    /// The tombstone marker of this delete operation will vanish when it
585    /// collides with its corresponding insertion.
586    /// This may cause older versions of the value to be resurrected, so it should
587    /// only be used and preferred in scenarios where a key is only ever written once.
588    ///
589    /// Returns the added item's size and new size of the memtable.
590    ///
591    /// # Examples
592    ///
593    /// ```
594    /// # let folder = tempfile::tempdir()?;
595    /// # use lsm_tree::{AbstractTree, Config, Tree};
596    /// #
597    /// # let tree = Config::new(folder).open()?;
598    /// tree.insert("a", "abc", 0);
599    ///
600    /// let item = tree.get("a", None)?.expect("should have item");
601    /// assert_eq!("abc".as_bytes(), &*item);
602    ///
603    /// tree.remove_weak("a", 1);
604    ///
605    /// let item = tree.get("a", None)?;
606    /// assert_eq!(None, item);
607    /// #
608    /// # Ok::<(), lsm_tree::Error>(())
609    /// ```
610    ///
611    /// # Errors
612    ///
613    /// Will return `Err` if an IO error occurs.
614    fn remove_weak<K: Into<UserKey>>(&self, key: K, seqno: SeqNo) -> (u32, u32);
615}