lsm_tree/
abstract.rs

1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5use crate::{
6    compaction::CompactionStrategy, config::TreeType, iter_guard::IterGuardImpl, segment::Segment,
7    tree::inner::MemtableId, vlog::BlobFile, AnyTree, BlobTree, Config, Guard, KvPair, Memtable,
8    SegmentId, SeqNo, SequenceNumberCounter, Tree, UserKey, UserValue,
9};
10use enum_dispatch::enum_dispatch;
11use std::{
12    ops::RangeBounds,
13    sync::{Arc, RwLockWriteGuard},
14};
15
16pub type RangeItem = crate::Result<KvPair>;
17
18/// Generic Tree API
19#[allow(clippy::module_name_repetitions)]
20#[enum_dispatch]
21pub trait AbstractTree {
22    /// Returns an iterator that scans through the entire tree.
23    ///
24    /// Avoid using this function, or limit it as otherwise it may scan a lot of items.
25    fn iter(
26        &self,
27        seqno: SeqNo,
28        index: Option<Arc<Memtable>>,
29    ) -> Box<dyn DoubleEndedIterator<Item = IterGuardImpl<'_>> + '_> {
30        self.range::<&[u8], _>(.., seqno, index)
31    }
32
33    /// Returns an iterator over a prefixed set of items.
34    ///
35    /// Avoid using an empty prefix as it may scan a lot of items (unless limited).
36    fn prefix<K: AsRef<[u8]>>(
37        &self,
38        prefix: K,
39        seqno: SeqNo,
40        index: Option<Arc<Memtable>>,
41    ) -> Box<dyn DoubleEndedIterator<Item = IterGuardImpl<'_>> + '_>;
42
43    /// Returns an iterator over a range of items.
44    ///
45    /// Avoid using full or unbounded ranges as they may scan a lot of items (unless limited).
46    fn range<K: AsRef<[u8]>, R: RangeBounds<K>>(
47        &self,
48        range: R,
49        seqno: SeqNo,
50        index: Option<Arc<Memtable>>,
51    ) -> Box<dyn DoubleEndedIterator<Item = IterGuardImpl<'_>> + '_>;
52
53    /// Ingests a sorted stream of key-value pairs into the tree.
54    ///
55    /// Can only be called on a new fresh, empty tree.
56    ///
57    /// # Errors
58    ///
59    /// Will return `Err` if an IO error occurs.
60    ///
61    /// # Panics
62    ///
63    /// Panics if the tree is **not** initially empty.
64    ///
65    /// Will panic if the input iterator is not sorted in ascending order.
66    #[doc(hidden)]
67    fn ingest(
68        &self,
69        iter: impl Iterator<Item = (UserKey, UserValue)>,
70        seqno_generator: &SequenceNumberCounter,
71        visible_seqno: &SequenceNumberCounter,
72    ) -> crate::Result<()>;
73
74    /// Returns the approximate number of tombstones in the tree.
75    fn tombstone_count(&self) -> u64;
76
77    // TODO: clear() with Nuke compaction strategy (write lock) -> drop_range(..)
78
79    /// Drops segments that are fully contained in a given range.
80    ///
81    /// # Errors
82    ///
83    /// Will return `Err` if an IO error occurs.
84    fn drop_range(&self, key_range: crate::KeyRange) -> crate::Result<()>;
85
86    /// Performs major compaction, blocking the caller until it's done.
87    ///
88    /// # Errors
89    ///
90    /// Will return `Err` if an IO error occurs.
91    fn major_compact(&self, target_size: u64, seqno_threshold: SeqNo) -> crate::Result<()>;
92
93    /// Gets the space usage of all filters in the tree.
94    ///
95    /// May not correspond to the actual memory size because filter blocks may be paged out.
96    fn filter_size(&self) -> usize;
97
98    /// Gets the memory usage of all pinned filters in the tree.
99    fn pinned_filter_size(&self) -> usize;
100
101    /// Gets the memory usage of all pinned index blocks in the tree.
102    fn pinned_block_index_size(&self) -> usize;
103
104    /// Gets the length of the version free list.
105    fn version_free_list_len(&self) -> usize;
106
107    /// Returns the metrics structure.
108    #[cfg(feature = "metrics")]
109    fn metrics(&self) -> &Arc<crate::Metrics>;
110
111    // TODO:?
112    /* #[doc(hidden)]
113    fn verify(&self) -> crate::Result<usize>; */
114
115    /// Synchronously flushes a memtable to a disk segment.
116    ///
117    /// This method will not make the segment immediately available,
118    /// use [`AbstractTree::register_segments`] for that.
119    ///
120    /// # Errors
121    ///
122    /// Will return `Err` if an IO error occurs.
123    fn flush_memtable(
124        &self,
125        segment_id: SegmentId, // TODO: remove?
126        memtable: &Arc<Memtable>,
127        seqno_threshold: SeqNo,
128    ) -> crate::Result<Option<(Segment, Option<BlobFile>)>>;
129
130    /// Atomically registers flushed disk segments into the tree, removing their associated sealed memtables.
131    ///
132    /// # Errors
133    ///
134    /// Will return `Err` if an IO error occurs.
135    fn register_segments(
136        &self,
137        segments: &[Segment],
138        blob_files: Option<&[BlobFile]>,
139        seqno_threshold: SeqNo,
140    ) -> crate::Result<()>;
141
142    /// Write-locks the active memtable for exclusive access
143    fn lock_active_memtable(&self) -> RwLockWriteGuard<'_, Arc<Memtable>>;
144
145    /// Clears the active memtable atomically.
146    fn clear_active_memtable(&self);
147
148    /// Sets the active memtable.
149    ///
150    /// May be used to restore the LSM-tree's in-memory state from a write-ahead log
151    /// after tree recovery.
152    fn set_active_memtable(&self, memtable: Memtable);
153
154    /// Returns the number of sealed memtables.
155    fn sealed_memtable_count(&self) -> usize;
156
157    /// Adds a sealed memtables.
158    ///
159    /// May be used to restore the LSM-tree's in-memory state from some journals.
160    fn add_sealed_memtable(&self, id: MemtableId, memtable: Arc<Memtable>);
161
162    /// Performs compaction on the tree's levels, blocking the caller until it's done.
163    ///
164    /// # Errors
165    ///
166    /// Will return `Err` if an IO error occurs.
167    fn compact(
168        &self,
169        strategy: Arc<dyn CompactionStrategy>,
170        seqno_threshold: SeqNo,
171    ) -> crate::Result<()>;
172
173    /// Returns the next segment's ID.
174    fn get_next_segment_id(&self) -> SegmentId;
175
176    /// Returns the tree config.
177    fn tree_config(&self) -> &Config;
178
179    /// Returns the highest sequence number.
180    fn get_highest_seqno(&self) -> Option<SeqNo> {
181        let memtable_seqno = self.get_highest_memtable_seqno();
182        let segment_seqno = self.get_highest_persisted_seqno();
183        memtable_seqno.max(segment_seqno)
184    }
185
186    /// Returns the approximate size of the active memtable in bytes.
187    ///
188    /// May be used to flush the memtable if it grows too large.
189    fn active_memtable_size(&self) -> u64;
190
191    /// Returns the tree type.
192    fn tree_type(&self) -> TreeType;
193
194    /// Seals the active memtable, and returns a reference to it.
195    fn rotate_memtable(&self) -> Option<(MemtableId, Arc<Memtable>)>;
196
197    /// Returns the number of disk segments currently in the tree.
198    fn segment_count(&self) -> usize;
199
200    /// Returns the number of segments in `levels[idx]`.
201    ///
202    /// Returns `None` if the level does not exist (if idx >= 7).
203    fn level_segment_count(&self, idx: usize) -> Option<usize>;
204
205    /// Returns the number of disjoint runs in L0.
206    ///
207    /// Can be used to determine whether to write stall.
208    fn l0_run_count(&self) -> usize;
209
210    /// Returns the number of blob files currently in the tree.
211    fn blob_file_count(&self) -> usize {
212        0
213    }
214
215    /// Approximates the number of items in the tree.
216    fn approximate_len(&self) -> usize;
217
218    /// Returns the disk space usage.
219    fn disk_space(&self) -> u64;
220
221    /// Returns the highest sequence number of the active memtable.
222    fn get_highest_memtable_seqno(&self) -> Option<SeqNo>;
223
224    /// Returns the highest sequence number that is flushed to disk.
225    fn get_highest_persisted_seqno(&self) -> Option<SeqNo>;
226
227    /// Scans the entire tree, returning the number of items.
228    ///
229    /// ###### Caution
230    ///
231    /// This operation scans the entire tree: O(n) complexity!
232    ///
233    /// Never, under any circumstances, use .`len()` == 0 to check
234    /// if the tree is empty, use [`Tree::is_empty`] instead.
235    ///
236    /// # Examples
237    ///
238    /// ```
239    /// # use lsm_tree::Error as TreeError;
240    /// use lsm_tree::{AbstractTree, Config, Tree};
241    ///
242    /// let folder = tempfile::tempdir()?;
243    /// let tree = Config::new(folder).open()?;
244    ///
245    /// assert_eq!(tree.len(0, None)?, 0);
246    /// tree.insert("1", "abc", 0);
247    /// tree.insert("3", "abc", 1);
248    /// tree.insert("5", "abc", 2);
249    /// assert_eq!(tree.len(3, None)?, 3);
250    /// #
251    /// # Ok::<(), TreeError>(())
252    /// ```
253    ///
254    /// # Errors
255    ///
256    /// Will return `Err` if an IO error occurs.
257    fn len(&self, seqno: SeqNo, index: Option<Arc<Memtable>>) -> crate::Result<usize> {
258        let mut count = 0;
259
260        for item in self.iter(seqno, index) {
261            let _ = item.key()?;
262            count += 1;
263        }
264
265        Ok(count)
266    }
267
268    /// Returns `true` if the tree is empty.
269    ///
270    /// This operation has O(log N) complexity.
271    ///
272    /// # Examples
273    ///
274    /// ```
275    /// # let folder = tempfile::tempdir()?;
276    /// use lsm_tree::{AbstractTree, Config, Tree};
277    ///
278    /// let tree = Config::new(folder).open()?;
279    /// assert!(tree.is_empty(0, None)?);
280    ///
281    /// tree.insert("a", "abc", 0);
282    /// assert!(!tree.is_empty(1, None)?);
283    /// #
284    /// # Ok::<(), lsm_tree::Error>(())
285    /// ```
286    ///
287    /// # Errors
288    ///
289    /// Will return `Err` if an IO error occurs.
290    fn is_empty(&self, seqno: SeqNo, index: Option<Arc<Memtable>>) -> crate::Result<bool> {
291        self.first_key_value(seqno, index).map(|x| x.is_none())
292    }
293
294    /// Returns the first key-value pair in the tree.
295    /// The key in this pair is the minimum key in the tree.
296    ///
297    /// # Examples
298    ///
299    /// ```
300    /// # use lsm_tree::Error as TreeError;
301    /// # use lsm_tree::{AbstractTree, Config, Tree};
302    /// #
303    /// # let folder = tempfile::tempdir()?;
304    /// let tree = Config::new(folder).open()?;
305    ///
306    /// tree.insert("1", "abc", 0);
307    /// tree.insert("3", "abc", 1);
308    /// tree.insert("5", "abc", 2);
309    ///
310    /// let (key, _) = tree.first_key_value(3, None)?.expect("item should exist");
311    /// assert_eq!(&*key, "1".as_bytes());
312    /// #
313    /// # Ok::<(), TreeError>(())
314    /// ```
315    ///
316    /// # Errors
317    ///
318    /// Will return `Err` if an IO error occurs.
319    fn first_key_value(
320        &self,
321        seqno: SeqNo,
322        index: Option<Arc<Memtable>>,
323    ) -> crate::Result<Option<KvPair>> {
324        self.iter(seqno, index)
325            .next()
326            .map(Guard::into_inner)
327            .transpose()
328    }
329
330    /// Returns the last key-value pair in the tree.
331    /// The key in this pair is the maximum key in the tree.
332    ///
333    /// # Examples
334    ///
335    /// ```
336    /// # use lsm_tree::Error as TreeError;
337    /// # use lsm_tree::{AbstractTree, Config, Tree};
338    /// #
339    /// # let folder = tempfile::tempdir()?;
340    /// # let tree = Config::new(folder).open()?;
341    /// #
342    /// tree.insert("1", "abc", 0);
343    /// tree.insert("3", "abc", 1);
344    /// tree.insert("5", "abc", 2);
345    ///
346    /// let (key, _) = tree.last_key_value(3, None)?.expect("item should exist");
347    /// assert_eq!(&*key, "5".as_bytes());
348    /// #
349    /// # Ok::<(), TreeError>(())
350    /// ```
351    ///
352    /// # Errors
353    ///
354    /// Will return `Err` if an IO error occurs.
355    fn last_key_value(
356        &self,
357        seqno: SeqNo,
358        index: Option<Arc<Memtable>>,
359    ) -> crate::Result<Option<KvPair>> {
360        self.iter(seqno, index)
361            .next_back()
362            .map(Guard::into_inner)
363            .transpose()
364    }
365
366    /// Returns the size of a value if it exists.
367    ///
368    /// # Examples
369    ///
370    /// ```
371    /// # let folder = tempfile::tempdir()?;
372    /// use lsm_tree::{AbstractTree, Config, Tree};
373    ///
374    /// let tree = Config::new(folder).open()?;
375    /// tree.insert("a", "my_value", 0);
376    ///
377    /// let size = tree.size_of("a", 1)?.unwrap_or_default();
378    /// assert_eq!("my_value".len() as u32, size);
379    ///
380    /// let size = tree.size_of("b", 1)?.unwrap_or_default();
381    /// assert_eq!(0, size);
382    /// #
383    /// # Ok::<(), lsm_tree::Error>(())
384    /// ```
385    ///
386    /// # Errors
387    ///
388    /// Will return `Err` if an IO error occurs.
389    fn size_of<K: AsRef<[u8]>>(&self, key: K, seqno: SeqNo) -> crate::Result<Option<u32>>;
390
391    /// Retrieves an item from the tree.
392    ///
393    /// # Examples
394    ///
395    /// ```
396    /// # let folder = tempfile::tempdir()?;
397    /// use lsm_tree::{AbstractTree, Config, Tree};
398    ///
399    /// let tree = Config::new(folder).open()?;
400    /// tree.insert("a", "my_value", 0);
401    ///
402    /// let item = tree.get("a", 1)?;
403    /// assert_eq!(Some("my_value".as_bytes().into()), item);
404    /// #
405    /// # Ok::<(), lsm_tree::Error>(())
406    /// ```
407    ///
408    /// # Errors
409    ///
410    /// Will return `Err` if an IO error occurs.
411    fn get<K: AsRef<[u8]>>(&self, key: K, seqno: SeqNo) -> crate::Result<Option<UserValue>>;
412
413    /// Returns `true` if the tree contains the specified key.
414    ///
415    /// # Examples
416    ///
417    /// ```
418    /// # let folder = tempfile::tempdir()?;
419    /// # use lsm_tree::{AbstractTree, Config, Tree};
420    /// #
421    /// let tree = Config::new(folder).open()?;
422    /// assert!(!tree.contains_key("a", 0)?);
423    ///
424    /// tree.insert("a", "abc", 0);
425    /// assert!(tree.contains_key("a", 1)?);
426    /// #
427    /// # Ok::<(), lsm_tree::Error>(())
428    /// ```
429    ///
430    /// # Errors
431    ///
432    /// Will return `Err` if an IO error occurs.
433    fn contains_key<K: AsRef<[u8]>>(&self, key: K, seqno: SeqNo) -> crate::Result<bool> {
434        self.get(key, seqno).map(|x| x.is_some())
435    }
436
437    /// Inserts a key-value pair into the tree.
438    ///
439    /// If the key already exists, the item will be overwritten.
440    ///
441    /// Returns the added item's size and new size of the memtable.
442    ///
443    /// # Examples
444    ///
445    /// ```
446    /// # let folder = tempfile::tempdir()?;
447    /// use lsm_tree::{AbstractTree, Config, Tree};
448    ///
449    /// let tree = Config::new(folder).open()?;
450    /// tree.insert("a", "abc", 0);
451    /// #
452    /// # Ok::<(), lsm_tree::Error>(())
453    /// ```
454    ///
455    /// # Errors
456    ///
457    /// Will return `Err` if an IO error occurs.
458    fn insert<K: Into<UserKey>, V: Into<UserValue>>(
459        &self,
460        key: K,
461        value: V,
462        seqno: SeqNo,
463    ) -> (u64, u64);
464
465    /// Removes an item from the tree.
466    ///
467    /// Returns the added item's size and new size of the memtable.
468    ///
469    /// # Examples
470    ///
471    /// ```
472    /// # let folder = tempfile::tempdir()?;
473    /// # use lsm_tree::{AbstractTree, Config, Tree};
474    /// #
475    /// # let tree = Config::new(folder).open()?;
476    /// tree.insert("a", "abc", 0);
477    ///
478    /// let item = tree.get("a", 1)?.expect("should have item");
479    /// assert_eq!("abc".as_bytes(), &*item);
480    ///
481    /// tree.remove("a", 1);
482    ///
483    /// let item = tree.get("a", 2)?;
484    /// assert_eq!(None, item);
485    /// #
486    /// # Ok::<(), lsm_tree::Error>(())
487    /// ```
488    ///
489    /// # Errors
490    ///
491    /// Will return `Err` if an IO error occurs.
492    fn remove<K: Into<UserKey>>(&self, key: K, seqno: SeqNo) -> (u64, u64);
493
494    /// Removes an item from the tree.
495    ///
496    /// The tombstone marker of this delete operation will vanish when it
497    /// collides with its corresponding insertion.
498    /// This may cause older versions of the value to be resurrected, so it should
499    /// only be used and preferred in scenarios where a key is only ever written once.
500    ///
501    /// Returns the added item's size and new size of the memtable.
502    ///
503    /// # Examples
504    ///
505    /// ```
506    /// # let folder = tempfile::tempdir()?;
507    /// # use lsm_tree::{AbstractTree, Config, Tree};
508    /// #
509    /// # let tree = Config::new(folder).open()?;
510    /// tree.insert("a", "abc", 0);
511    ///
512    /// let item = tree.get("a", 1)?.expect("should have item");
513    /// assert_eq!("abc".as_bytes(), &*item);
514    ///
515    /// tree.remove_weak("a", 1);
516    ///
517    /// let item = tree.get("a", 2)?;
518    /// assert_eq!(None, item);
519    /// #
520    /// # Ok::<(), lsm_tree::Error>(())
521    /// ```
522    ///
523    /// # Errors
524    ///
525    /// Will return `Err` if an IO error occurs.
526    #[doc(hidden)]
527    fn remove_weak<K: Into<UserKey>>(&self, key: K, seqno: SeqNo) -> (u64, u64);
528}