lsm_tree/
abstract.rs

1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5use crate::{
6    blob_tree::FragmentationMap, compaction::CompactionStrategy, config::TreeType,
7    iter_guard::IterGuardImpl, segment::Segment, tree::inner::MemtableId, version::Version,
8    vlog::BlobFile, AnyTree, BlobTree, Config, Guard, InternalValue, KvPair, Memtable, SegmentId,
9    SeqNo, SequenceNumberCounter, Tree, TreeId, UserKey, UserValue,
10};
11use enum_dispatch::enum_dispatch;
12use std::{ops::RangeBounds, sync::Arc};
13
14pub type RangeItem = crate::Result<KvPair>;
15
16/// Generic Tree API
17#[allow(clippy::module_name_repetitions)]
18#[enum_dispatch]
19pub trait AbstractTree {
20    #[doc(hidden)]
21    fn next_table_id(&self) -> SegmentId;
22
23    #[doc(hidden)]
24    fn id(&self) -> TreeId;
25
26    #[doc(hidden)]
27    fn get_internal_entry(&self, key: &[u8], seqno: SeqNo) -> crate::Result<Option<InternalValue>>;
28
29    #[doc(hidden)]
30    fn current_version(&self) -> Version;
31
32    /// Synchronously flushes the active memtable to a disk segment.
33    ///
34    /// The function may not return a result, if, during concurrent workloads, the memtable
35    /// ends up being empty before the flush is set up.
36    ///
37    /// The result will contain the [`Segment`].
38    ///
39    /// # Errors
40    ///
41    /// Will return `Err` if an IO error occurs.
42    #[doc(hidden)]
43    fn flush_active_memtable(&self, seqno_threshold: SeqNo) -> crate::Result<Option<Segment>>;
44
45    /// Returns an iterator that scans through the entire tree.
46    ///
47    /// Avoid using this function, or limit it as otherwise it may scan a lot of items.
48    fn iter(
49        &self,
50        seqno: SeqNo,
51        index: Option<Arc<Memtable>>,
52    ) -> Box<dyn DoubleEndedIterator<Item = IterGuardImpl<'_>> + '_> {
53        self.range::<&[u8], _>(.., seqno, index)
54    }
55
56    /// Returns an iterator over a prefixed set of items.
57    ///
58    /// Avoid using an empty prefix as it may scan a lot of items (unless limited).
59    fn prefix<K: AsRef<[u8]>>(
60        &self,
61        prefix: K,
62        seqno: SeqNo,
63        index: Option<Arc<Memtable>>,
64    ) -> Box<dyn DoubleEndedIterator<Item = IterGuardImpl<'_>> + '_>;
65
66    /// Returns an iterator over a range of items.
67    ///
68    /// Avoid using full or unbounded ranges as they may scan a lot of items (unless limited).
69    fn range<K: AsRef<[u8]>, R: RangeBounds<K>>(
70        &self,
71        range: R,
72        seqno: SeqNo,
73        index: Option<Arc<Memtable>>,
74    ) -> Box<dyn DoubleEndedIterator<Item = IterGuardImpl<'_>> + '_>;
75
76    /// Ingests a sorted stream of key-value pairs into the tree.
77    ///
78    /// Can only be called on a new fresh, empty tree.
79    ///
80    /// # Errors
81    ///
82    /// Will return `Err` if an IO error occurs.
83    ///
84    /// # Panics
85    ///
86    /// Panics if the tree is **not** initially empty.
87    ///
88    /// Will panic if the input iterator is not sorted in ascending order.
89    #[doc(hidden)]
90    fn ingest(
91        &self,
92        iter: impl Iterator<Item = (UserKey, UserValue)>,
93        seqno_generator: &SequenceNumberCounter,
94        visible_seqno: &SequenceNumberCounter,
95    ) -> crate::Result<()>;
96
97    /// Returns the approximate number of tombstones in the tree.
98    fn tombstone_count(&self) -> u64;
99
100    // TODO: clear() with Nuke compaction strategy (write lock) -> drop_range(..)
101
102    /// Drops segments that are fully contained in a given range.
103    ///
104    /// Accepts any `RangeBounds`, including unbounded or exclusive endpoints.
105    /// If the normalized lower bound is greater than the upper bound, the
106    /// method returns without performing any work.
107    ///
108    /// # Errors
109    ///
110    /// Will return `Err` only if an IO error occurs during compaction.
111    fn drop_range<K: AsRef<[u8]>, R: RangeBounds<K>>(&self, range: R) -> crate::Result<()>;
112
113    /// Performs major compaction, blocking the caller until it's done.
114    ///
115    /// # Errors
116    ///
117    /// Will return `Err` if an IO error occurs.
118    fn major_compact(&self, target_size: u64, seqno_threshold: SeqNo) -> crate::Result<()>;
119
120    /// Returns the disk space used by stale blobs.
121    fn stale_blob_bytes(&self) -> u64 {
122        0
123    }
124
125    /// Gets the space usage of all filters in the tree.
126    ///
127    /// May not correspond to the actual memory size because filter blocks may be paged out.
128    fn filter_size(&self) -> usize;
129
130    /// Gets the memory usage of all pinned filters in the tree.
131    fn pinned_filter_size(&self) -> usize;
132
133    /// Gets the memory usage of all pinned index blocks in the tree.
134    fn pinned_block_index_size(&self) -> usize;
135
136    /// Gets the length of the version free list.
137    fn version_free_list_len(&self) -> usize;
138
139    /// Returns the metrics structure.
140    #[cfg(feature = "metrics")]
141    fn metrics(&self) -> &Arc<crate::Metrics>;
142
143    /// Synchronously flushes a memtable to a disk segment.
144    ///
145    /// This method will not make the segment immediately available,
146    /// use [`AbstractTree::register_segments`] for that.
147    ///
148    /// # Errors
149    ///
150    /// Will return `Err` if an IO error occurs.
151    #[warn(clippy::type_complexity)]
152    fn flush_memtable(
153        &self,
154        segment_id: SegmentId, // TODO: remove?
155        memtable: &Arc<Memtable>,
156        seqno_threshold: SeqNo,
157    ) -> crate::Result<Option<(Segment, Option<BlobFile>)>>;
158
159    /// Atomically registers flushed disk segments into the tree, removing their associated sealed memtables.
160    ///
161    /// # Errors
162    ///
163    /// Will return `Err` if an IO error occurs.
164    fn register_segments(
165        &self,
166        segments: &[Segment],
167        blob_files: Option<&[BlobFile]>,
168        frag_map: Option<FragmentationMap>,
169        seqno_threshold: SeqNo,
170    ) -> crate::Result<()>;
171
172    /// Clears the active memtable atomically.
173    fn clear_active_memtable(&self);
174
175    /// Sets the active memtable.
176    ///
177    /// May be used to restore the LSM-tree's in-memory state from a write-ahead log
178    /// after tree recovery.
179    fn set_active_memtable(&self, memtable: Memtable);
180
181    /// Returns the number of sealed memtables.
182    fn sealed_memtable_count(&self) -> usize;
183
184    /// Adds a sealed memtables.
185    ///
186    /// May be used to restore the LSM-tree's in-memory state from some journals.
187    fn add_sealed_memtable(&self, id: MemtableId, memtable: Arc<Memtable>);
188
189    /// Performs compaction on the tree's levels, blocking the caller until it's done.
190    ///
191    /// # Errors
192    ///
193    /// Will return `Err` if an IO error occurs.
194    fn compact(
195        &self,
196        strategy: Arc<dyn CompactionStrategy>,
197        seqno_threshold: SeqNo,
198    ) -> crate::Result<()>;
199
200    /// Returns the next segment's ID.
201    fn get_next_segment_id(&self) -> SegmentId;
202
203    /// Returns the tree config.
204    fn tree_config(&self) -> &Config;
205
206    /// Returns the highest sequence number.
207    fn get_highest_seqno(&self) -> Option<SeqNo> {
208        let memtable_seqno = self.get_highest_memtable_seqno();
209        let segment_seqno = self.get_highest_persisted_seqno();
210        memtable_seqno.max(segment_seqno)
211    }
212
213    /// Returns the approximate size of the active memtable in bytes.
214    ///
215    /// May be used to flush the memtable if it grows too large.
216    fn active_memtable_size(&self) -> u64;
217
218    /// Returns the tree type.
219    fn tree_type(&self) -> TreeType;
220
221    /// Seals the active memtable, and returns a reference to it.
222    fn rotate_memtable(&self) -> Option<(MemtableId, Arc<Memtable>)>;
223
224    /// Returns the number of disk segments currently in the tree.
225    fn segment_count(&self) -> usize;
226
227    /// Returns the number of segments in `levels[idx]`.
228    ///
229    /// Returns `None` if the level does not exist (if idx >= 7).
230    fn level_segment_count(&self, idx: usize) -> Option<usize>;
231
232    /// Returns the number of disjoint runs in L0.
233    ///
234    /// Can be used to determine whether to write stall.
235    fn l0_run_count(&self) -> usize;
236
237    /// Returns the number of blob files currently in the tree.
238    fn blob_file_count(&self) -> usize {
239        0
240    }
241
242    /// Approximates the number of items in the tree.
243    fn approximate_len(&self) -> usize;
244
245    /// Returns the disk space usage.
246    fn disk_space(&self) -> u64;
247
248    /// Returns the highest sequence number of the active memtable.
249    fn get_highest_memtable_seqno(&self) -> Option<SeqNo>;
250
251    /// Returns the highest sequence number that is flushed to disk.
252    fn get_highest_persisted_seqno(&self) -> Option<SeqNo>;
253
254    /// Scans the entire tree, returning the number of items.
255    ///
256    /// ###### Caution
257    ///
258    /// This operation scans the entire tree: O(n) complexity!
259    ///
260    /// Never, under any circumstances, use .`len()` == 0 to check
261    /// if the tree is empty, use [`Tree::is_empty`] instead.
262    ///
263    /// # Examples
264    ///
265    /// ```
266    /// # use lsm_tree::Error as TreeError;
267    /// use lsm_tree::{AbstractTree, Config, Tree};
268    ///
269    /// let folder = tempfile::tempdir()?;
270    /// let tree = Config::new(folder).open()?;
271    ///
272    /// assert_eq!(tree.len(0, None)?, 0);
273    /// tree.insert("1", "abc", 0);
274    /// tree.insert("3", "abc", 1);
275    /// tree.insert("5", "abc", 2);
276    /// assert_eq!(tree.len(3, None)?, 3);
277    /// #
278    /// # Ok::<(), TreeError>(())
279    /// ```
280    ///
281    /// # Errors
282    ///
283    /// Will return `Err` if an IO error occurs.
284    fn len(&self, seqno: SeqNo, index: Option<Arc<Memtable>>) -> crate::Result<usize> {
285        let mut count = 0;
286
287        for item in self.iter(seqno, index) {
288            let _ = item.key()?;
289            count += 1;
290        }
291
292        Ok(count)
293    }
294
295    /// Returns `true` if the tree is empty.
296    ///
297    /// This operation has O(log N) complexity.
298    ///
299    /// # Examples
300    ///
301    /// ```
302    /// # let folder = tempfile::tempdir()?;
303    /// use lsm_tree::{AbstractTree, Config, Tree};
304    ///
305    /// let tree = Config::new(folder).open()?;
306    /// assert!(tree.is_empty(0, None)?);
307    ///
308    /// tree.insert("a", "abc", 0);
309    /// assert!(!tree.is_empty(1, None)?);
310    /// #
311    /// # Ok::<(), lsm_tree::Error>(())
312    /// ```
313    ///
314    /// # Errors
315    ///
316    /// Will return `Err` if an IO error occurs.
317    fn is_empty(&self, seqno: SeqNo, index: Option<Arc<Memtable>>) -> crate::Result<bool> {
318        self.first_key_value(seqno, index).map(|x| x.is_none())
319    }
320
321    /// Returns the first key-value pair in the tree.
322    /// The key in this pair is the minimum key in the tree.
323    ///
324    /// # Examples
325    ///
326    /// ```
327    /// # use lsm_tree::Error as TreeError;
328    /// # use lsm_tree::{AbstractTree, Config, Tree};
329    /// #
330    /// # let folder = tempfile::tempdir()?;
331    /// let tree = Config::new(folder).open()?;
332    ///
333    /// tree.insert("1", "abc", 0);
334    /// tree.insert("3", "abc", 1);
335    /// tree.insert("5", "abc", 2);
336    ///
337    /// let (key, _) = tree.first_key_value(3, None)?.expect("item should exist");
338    /// assert_eq!(&*key, "1".as_bytes());
339    /// #
340    /// # Ok::<(), TreeError>(())
341    /// ```
342    ///
343    /// # Errors
344    ///
345    /// Will return `Err` if an IO error occurs.
346    fn first_key_value(
347        &self,
348        seqno: SeqNo,
349        index: Option<Arc<Memtable>>,
350    ) -> crate::Result<Option<KvPair>> {
351        self.iter(seqno, index)
352            .next()
353            .map(Guard::into_inner)
354            .transpose()
355    }
356
357    /// Returns the last key-value pair in the tree.
358    /// The key in this pair is the maximum key in the tree.
359    ///
360    /// # Examples
361    ///
362    /// ```
363    /// # use lsm_tree::Error as TreeError;
364    /// # use lsm_tree::{AbstractTree, Config, Tree};
365    /// #
366    /// # let folder = tempfile::tempdir()?;
367    /// # let tree = Config::new(folder).open()?;
368    /// #
369    /// tree.insert("1", "abc", 0);
370    /// tree.insert("3", "abc", 1);
371    /// tree.insert("5", "abc", 2);
372    ///
373    /// let (key, _) = tree.last_key_value(3, None)?.expect("item should exist");
374    /// assert_eq!(&*key, "5".as_bytes());
375    /// #
376    /// # Ok::<(), TreeError>(())
377    /// ```
378    ///
379    /// # Errors
380    ///
381    /// Will return `Err` if an IO error occurs.
382    fn last_key_value(
383        &self,
384        seqno: SeqNo,
385        index: Option<Arc<Memtable>>,
386    ) -> crate::Result<Option<KvPair>> {
387        self.iter(seqno, index)
388            .next_back()
389            .map(Guard::into_inner)
390            .transpose()
391    }
392
393    /// Returns the size of a value if it exists.
394    ///
395    /// # Examples
396    ///
397    /// ```
398    /// # let folder = tempfile::tempdir()?;
399    /// use lsm_tree::{AbstractTree, Config, Tree};
400    ///
401    /// let tree = Config::new(folder).open()?;
402    /// tree.insert("a", "my_value", 0);
403    ///
404    /// let size = tree.size_of("a", 1)?.unwrap_or_default();
405    /// assert_eq!("my_value".len() as u32, size);
406    ///
407    /// let size = tree.size_of("b", 1)?.unwrap_or_default();
408    /// assert_eq!(0, size);
409    /// #
410    /// # Ok::<(), lsm_tree::Error>(())
411    /// ```
412    ///
413    /// # Errors
414    ///
415    /// Will return `Err` if an IO error occurs.
416    fn size_of<K: AsRef<[u8]>>(&self, key: K, seqno: SeqNo) -> crate::Result<Option<u32>>;
417
418    /// Retrieves an item from the tree.
419    ///
420    /// # Examples
421    ///
422    /// ```
423    /// # let folder = tempfile::tempdir()?;
424    /// use lsm_tree::{AbstractTree, Config, Tree};
425    ///
426    /// let tree = Config::new(folder).open()?;
427    /// tree.insert("a", "my_value", 0);
428    ///
429    /// let item = tree.get("a", 1)?;
430    /// assert_eq!(Some("my_value".as_bytes().into()), item);
431    /// #
432    /// # Ok::<(), lsm_tree::Error>(())
433    /// ```
434    ///
435    /// # Errors
436    ///
437    /// Will return `Err` if an IO error occurs.
438    fn get<K: AsRef<[u8]>>(&self, key: K, seqno: SeqNo) -> crate::Result<Option<UserValue>>;
439
440    /// Returns `true` if the tree contains the specified key.
441    ///
442    /// # Examples
443    ///
444    /// ```
445    /// # let folder = tempfile::tempdir()?;
446    /// # use lsm_tree::{AbstractTree, Config, Tree};
447    /// #
448    /// let tree = Config::new(folder).open()?;
449    /// assert!(!tree.contains_key("a", 0)?);
450    ///
451    /// tree.insert("a", "abc", 0);
452    /// assert!(tree.contains_key("a", 1)?);
453    /// #
454    /// # Ok::<(), lsm_tree::Error>(())
455    /// ```
456    ///
457    /// # Errors
458    ///
459    /// Will return `Err` if an IO error occurs.
460    fn contains_key<K: AsRef<[u8]>>(&self, key: K, seqno: SeqNo) -> crate::Result<bool> {
461        self.get(key, seqno).map(|x| x.is_some())
462    }
463
464    /// Inserts a key-value pair into the tree.
465    ///
466    /// If the key already exists, the item will be overwritten.
467    ///
468    /// Returns the added item's size and new size of the memtable.
469    ///
470    /// # Examples
471    ///
472    /// ```
473    /// # let folder = tempfile::tempdir()?;
474    /// use lsm_tree::{AbstractTree, Config, Tree};
475    ///
476    /// let tree = Config::new(folder).open()?;
477    /// tree.insert("a", "abc", 0);
478    /// #
479    /// # Ok::<(), lsm_tree::Error>(())
480    /// ```
481    ///
482    /// # Errors
483    ///
484    /// Will return `Err` if an IO error occurs.
485    fn insert<K: Into<UserKey>, V: Into<UserValue>>(
486        &self,
487        key: K,
488        value: V,
489        seqno: SeqNo,
490    ) -> (u64, u64);
491
492    /// Removes an item from the tree.
493    ///
494    /// Returns the added item's size and new size of the memtable.
495    ///
496    /// # Examples
497    ///
498    /// ```
499    /// # let folder = tempfile::tempdir()?;
500    /// # use lsm_tree::{AbstractTree, Config, Tree};
501    /// #
502    /// # let tree = Config::new(folder).open()?;
503    /// tree.insert("a", "abc", 0);
504    ///
505    /// let item = tree.get("a", 1)?.expect("should have item");
506    /// assert_eq!("abc".as_bytes(), &*item);
507    ///
508    /// tree.remove("a", 1);
509    ///
510    /// let item = tree.get("a", 2)?;
511    /// assert_eq!(None, item);
512    /// #
513    /// # Ok::<(), lsm_tree::Error>(())
514    /// ```
515    ///
516    /// # Errors
517    ///
518    /// Will return `Err` if an IO error occurs.
519    fn remove<K: Into<UserKey>>(&self, key: K, seqno: SeqNo) -> (u64, u64);
520
521    /// Removes an item from the tree.
522    ///
523    /// The tombstone marker of this delete operation will vanish when it
524    /// collides with its corresponding insertion.
525    /// This may cause older versions of the value to be resurrected, so it should
526    /// only be used and preferred in scenarios where a key is only ever written once.
527    ///
528    /// Returns the added item's size and new size of the memtable.
529    ///
530    /// # Examples
531    ///
532    /// ```
533    /// # let folder = tempfile::tempdir()?;
534    /// # use lsm_tree::{AbstractTree, Config, Tree};
535    /// #
536    /// # let tree = Config::new(folder).open()?;
537    /// tree.insert("a", "abc", 0);
538    ///
539    /// let item = tree.get("a", 1)?.expect("should have item");
540    /// assert_eq!("abc".as_bytes(), &*item);
541    ///
542    /// tree.remove_weak("a", 1);
543    ///
544    /// let item = tree.get("a", 2)?;
545    /// assert_eq!(None, item);
546    /// #
547    /// # Ok::<(), lsm_tree::Error>(())
548    /// ```
549    ///
550    /// # Errors
551    ///
552    /// Will return `Err` if an IO error occurs.
553    #[doc(hidden)]
554    fn remove_weak<K: Into<UserKey>>(&self, key: K, seqno: SeqNo) -> (u64, u64);
555}