lsm_tree/
abstract.rs

1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5use crate::{
6    blob_tree::FragmentationMap, compaction::CompactionStrategy, config::TreeType,
7    iter_guard::IterGuardImpl, table::Table, tree::inner::MemtableId, version::Version,
8    vlog::BlobFile, AnyTree, BlobTree, Config, Guard, InternalValue, KvPair, Memtable, SeqNo,
9    SequenceNumberCounter, TableId, Tree, TreeId, UserKey, UserValue,
10};
11use enum_dispatch::enum_dispatch;
12use std::{ops::RangeBounds, sync::Arc};
13
14pub type RangeItem = crate::Result<KvPair>;
15
16/// Generic Tree API
17#[expect(clippy::module_name_repetitions)]
18#[enum_dispatch]
19pub trait AbstractTree {
20    #[doc(hidden)]
21    fn next_table_id(&self) -> TableId;
22
23    #[doc(hidden)]
24    fn id(&self) -> TreeId;
25
26    #[doc(hidden)]
27    fn get_internal_entry(&self, key: &[u8], seqno: SeqNo) -> crate::Result<Option<InternalValue>>;
28
29    #[doc(hidden)]
30    fn current_version(&self) -> Version;
31
32    /// Synchronously flushes the active memtable to a table.
33    ///
34    /// The function may not return a result, if, during concurrent workloads, the memtable
35    /// ends up being empty before the flush is set up.
36    ///
37    /// The result will contain the [`Table`].
38    ///
39    /// # Errors
40    ///
41    /// Will return `Err` if an IO error occurs.
42    #[doc(hidden)]
43    fn flush_active_memtable(&self, seqno_threshold: SeqNo) -> crate::Result<Option<Table>>;
44
45    /// Returns an iterator that scans through the entire tree.
46    ///
47    /// Avoid using this function, or limit it as otherwise it may scan a lot of items.
48    fn iter(
49        &self,
50        seqno: SeqNo,
51        index: Option<Arc<Memtable>>,
52    ) -> Box<dyn DoubleEndedIterator<Item = IterGuardImpl<'_>> + '_> {
53        self.range::<&[u8], _>(.., seqno, index)
54    }
55
56    /// Returns an iterator over a prefixed set of items.
57    ///
58    /// Avoid using an empty prefix as it may scan a lot of items (unless limited).
59    fn prefix<K: AsRef<[u8]>>(
60        &self,
61        prefix: K,
62        seqno: SeqNo,
63        index: Option<Arc<Memtable>>,
64    ) -> Box<dyn DoubleEndedIterator<Item = IterGuardImpl<'_>> + '_>;
65
66    /// Returns an iterator over a range of items.
67    ///
68    /// Avoid using full or unbounded ranges as they may scan a lot of items (unless limited).
69    fn range<K: AsRef<[u8]>, R: RangeBounds<K>>(
70        &self,
71        range: R,
72        seqno: SeqNo,
73        index: Option<Arc<Memtable>>,
74    ) -> Box<dyn DoubleEndedIterator<Item = IterGuardImpl<'_>> + '_>;
75
76    /// Ingests a sorted stream of key-value pairs into the tree.
77    ///
78    /// Can only be called on a new fresh, empty tree.
79    ///
80    /// # Errors
81    ///
82    /// Will return `Err` if an IO error occurs.
83    ///
84    /// # Panics
85    ///
86    /// Panics if the tree is **not** initially empty.
87    ///
88    /// Will panic if the input iterator is not sorted in ascending order.
89    #[doc(hidden)]
90    fn ingest(
91        &self,
92        iter: impl Iterator<Item = (UserKey, UserValue)>,
93        seqno_generator: &SequenceNumberCounter,
94        visible_seqno: &SequenceNumberCounter,
95    ) -> crate::Result<()>;
96
97    /// Returns the approximate number of tombstones in the tree.
98    fn tombstone_count(&self) -> u64;
99
100    /// Returns the approximate number of weak tombstones (single deletes) in the tree.
101    fn weak_tombstone_count(&self) -> u64;
102
103    /// Returns the approximate number of values reclaimable once weak tombstones can be GC'd.
104    fn weak_tombstone_reclaimable_count(&self) -> u64;
105
106    // TODO: clear() with Nuke compaction strategy (write lock) -> drop_range(..)
107
108    /// Drops tables that are fully contained in a given range.
109    ///
110    /// Accepts any `RangeBounds`, including unbounded or exclusive endpoints.
111    /// If the normalized lower bound is greater than the upper bound, the
112    /// method returns without performing any work.
113    ///
114    /// # Errors
115    ///
116    /// Will return `Err` only if an IO error occurs during compaction.
117    fn drop_range<K: AsRef<[u8]>, R: RangeBounds<K>>(&self, range: R) -> crate::Result<()>;
118
119    /// Performs major compaction, blocking the caller until it's done.
120    ///
121    /// # Errors
122    ///
123    /// Will return `Err` if an IO error occurs.
124    fn major_compact(&self, target_size: u64, seqno_threshold: SeqNo) -> crate::Result<()>;
125
126    /// Returns the disk space used by stale blobs.
127    fn stale_blob_bytes(&self) -> u64 {
128        0
129    }
130
131    /// Gets the space usage of all filters in the tree.
132    ///
133    /// May not correspond to the actual memory size because filter blocks may be paged out.
134    fn filter_size(&self) -> usize;
135
136    /// Gets the memory usage of all pinned filters in the tree.
137    fn pinned_filter_size(&self) -> usize;
138
139    /// Gets the memory usage of all pinned index blocks in the tree.
140    fn pinned_block_index_size(&self) -> usize;
141
142    /// Gets the length of the version free list.
143    fn version_free_list_len(&self) -> usize;
144
145    /// Returns the metrics structure.
146    #[cfg(feature = "metrics")]
147    fn metrics(&self) -> &Arc<crate::Metrics>;
148
149    /// Synchronously flushes a memtable to a table.
150    ///
151    /// This method will not make the table immediately available,
152    /// use [`AbstractTree::register_tables`] for that.
153    ///
154    /// # Errors
155    ///
156    /// Will return `Err` if an IO error occurs.
157    #[warn(clippy::type_complexity)]
158    fn flush_memtable(
159        &self,
160        table_id: TableId, // TODO: remove?
161        memtable: &Arc<Memtable>,
162        seqno_threshold: SeqNo,
163    ) -> crate::Result<Option<(Table, Option<BlobFile>)>>;
164
165    /// Atomically registers flushed tables into the tree, removing their associated sealed memtables.
166    ///
167    /// # Errors
168    ///
169    /// Will return `Err` if an IO error occurs.
170    fn register_tables(
171        &self,
172        tables: &[Table],
173        blob_files: Option<&[BlobFile]>,
174        frag_map: Option<FragmentationMap>,
175        seqno_threshold: SeqNo,
176    ) -> crate::Result<()>;
177
178    /// Clears the active memtable atomically.
179    fn clear_active_memtable(&self);
180
181    /// Sets the active memtable.
182    ///
183    /// May be used to restore the LSM-tree's in-memory state from a write-ahead log
184    /// after tree recovery.
185    fn set_active_memtable(&self, memtable: Memtable);
186
187    /// Returns the number of sealed memtables.
188    fn sealed_memtable_count(&self) -> usize;
189
190    /// Adds a sealed memtables.
191    ///
192    /// May be used to restore the LSM-tree's in-memory state from some journals.
193    fn add_sealed_memtable(&self, id: MemtableId, memtable: Arc<Memtable>);
194
195    /// Performs compaction on the tree's levels, blocking the caller until it's done.
196    ///
197    /// # Errors
198    ///
199    /// Will return `Err` if an IO error occurs.
200    fn compact(
201        &self,
202        strategy: Arc<dyn CompactionStrategy>,
203        seqno_threshold: SeqNo,
204    ) -> crate::Result<()>;
205
206    /// Returns the next table's ID.
207    fn get_next_table_id(&self) -> TableId;
208
209    /// Returns the tree config.
210    fn tree_config(&self) -> &Config;
211
212    /// Returns the highest sequence number.
213    fn get_highest_seqno(&self) -> Option<SeqNo> {
214        let memtable_seqno = self.get_highest_memtable_seqno();
215        let table_seqno = self.get_highest_persisted_seqno();
216        memtable_seqno.max(table_seqno)
217    }
218
219    /// Returns the approximate size of the active memtable in bytes.
220    ///
221    /// May be used to flush the memtable if it grows too large.
222    fn active_memtable_size(&self) -> u64;
223
224    /// Returns the tree type.
225    fn tree_type(&self) -> TreeType;
226
227    /// Seals the active memtable, and returns a reference to it.
228    fn rotate_memtable(&self) -> Option<(MemtableId, Arc<Memtable>)>;
229
230    /// Returns the number of tables currently in the tree.
231    fn table_count(&self) -> usize;
232
233    /// Returns the number of tables in `levels[idx]`.
234    ///
235    /// Returns `None` if the level does not exist (if idx >= 7).
236    fn level_table_count(&self, idx: usize) -> Option<usize>;
237
238    /// Returns the number of disjoint runs in L0.
239    ///
240    /// Can be used to determine whether to write stall.
241    fn l0_run_count(&self) -> usize;
242
243    /// Returns the number of blob files currently in the tree.
244    fn blob_file_count(&self) -> usize {
245        0
246    }
247
248    /// Approximates the number of items in the tree.
249    fn approximate_len(&self) -> usize;
250
251    /// Returns the disk space usage.
252    fn disk_space(&self) -> u64;
253
254    /// Returns the highest sequence number of the active memtable.
255    fn get_highest_memtable_seqno(&self) -> Option<SeqNo>;
256
257    /// Returns the highest sequence number that is flushed to disk.
258    fn get_highest_persisted_seqno(&self) -> Option<SeqNo>;
259
260    /// Scans the entire tree, returning the number of items.
261    ///
262    /// ###### Caution
263    ///
264    /// This operation scans the entire tree: O(n) complexity!
265    ///
266    /// Never, under any circumstances, use .`len()` == 0 to check
267    /// if the tree is empty, use [`Tree::is_empty`] instead.
268    ///
269    /// # Examples
270    ///
271    /// ```
272    /// # use lsm_tree::Error as TreeError;
273    /// use lsm_tree::{AbstractTree, Config, Tree};
274    ///
275    /// let folder = tempfile::tempdir()?;
276    /// let tree = Config::new(folder).open()?;
277    ///
278    /// assert_eq!(tree.len(0, None)?, 0);
279    /// tree.insert("1", "abc", 0);
280    /// tree.insert("3", "abc", 1);
281    /// tree.insert("5", "abc", 2);
282    /// assert_eq!(tree.len(3, None)?, 3);
283    /// #
284    /// # Ok::<(), TreeError>(())
285    /// ```
286    ///
287    /// # Errors
288    ///
289    /// Will return `Err` if an IO error occurs.
290    fn len(&self, seqno: SeqNo, index: Option<Arc<Memtable>>) -> crate::Result<usize> {
291        let mut count = 0;
292
293        for item in self.iter(seqno, index) {
294            let _ = item.key()?;
295            count += 1;
296        }
297
298        Ok(count)
299    }
300
301    /// Returns `true` if the tree is empty.
302    ///
303    /// This operation has O(log N) complexity.
304    ///
305    /// # Examples
306    ///
307    /// ```
308    /// # let folder = tempfile::tempdir()?;
309    /// use lsm_tree::{AbstractTree, Config, Tree};
310    ///
311    /// let tree = Config::new(folder).open()?;
312    /// assert!(tree.is_empty(0, None)?);
313    ///
314    /// tree.insert("a", "abc", 0);
315    /// assert!(!tree.is_empty(1, None)?);
316    /// #
317    /// # Ok::<(), lsm_tree::Error>(())
318    /// ```
319    ///
320    /// # Errors
321    ///
322    /// Will return `Err` if an IO error occurs.
323    fn is_empty(&self, seqno: SeqNo, index: Option<Arc<Memtable>>) -> crate::Result<bool> {
324        self.first_key_value(seqno, index).map(|x| x.is_none())
325    }
326
327    /// Returns the first key-value pair in the tree.
328    /// The key in this pair is the minimum key in the tree.
329    ///
330    /// # Examples
331    ///
332    /// ```
333    /// # use lsm_tree::Error as TreeError;
334    /// # use lsm_tree::{AbstractTree, Config, Tree};
335    /// #
336    /// # let folder = tempfile::tempdir()?;
337    /// let tree = Config::new(folder).open()?;
338    ///
339    /// tree.insert("1", "abc", 0);
340    /// tree.insert("3", "abc", 1);
341    /// tree.insert("5", "abc", 2);
342    ///
343    /// let (key, _) = tree.first_key_value(3, None)?.expect("item should exist");
344    /// assert_eq!(&*key, "1".as_bytes());
345    /// #
346    /// # Ok::<(), TreeError>(())
347    /// ```
348    ///
349    /// # Errors
350    ///
351    /// Will return `Err` if an IO error occurs.
352    fn first_key_value(
353        &self,
354        seqno: SeqNo,
355        index: Option<Arc<Memtable>>,
356    ) -> crate::Result<Option<KvPair>> {
357        self.iter(seqno, index)
358            .next()
359            .map(Guard::into_inner)
360            .transpose()
361    }
362
363    /// Returns the last key-value pair in the tree.
364    /// The key in this pair is the maximum key in the tree.
365    ///
366    /// # Examples
367    ///
368    /// ```
369    /// # use lsm_tree::Error as TreeError;
370    /// # use lsm_tree::{AbstractTree, Config, Tree};
371    /// #
372    /// # let folder = tempfile::tempdir()?;
373    /// # let tree = Config::new(folder).open()?;
374    /// #
375    /// tree.insert("1", "abc", 0);
376    /// tree.insert("3", "abc", 1);
377    /// tree.insert("5", "abc", 2);
378    ///
379    /// let (key, _) = tree.last_key_value(3, None)?.expect("item should exist");
380    /// assert_eq!(&*key, "5".as_bytes());
381    /// #
382    /// # Ok::<(), TreeError>(())
383    /// ```
384    ///
385    /// # Errors
386    ///
387    /// Will return `Err` if an IO error occurs.
388    fn last_key_value(
389        &self,
390        seqno: SeqNo,
391        index: Option<Arc<Memtable>>,
392    ) -> crate::Result<Option<KvPair>> {
393        self.iter(seqno, index)
394            .next_back()
395            .map(Guard::into_inner)
396            .transpose()
397    }
398
399    /// Returns the size of a value if it exists.
400    ///
401    /// # Examples
402    ///
403    /// ```
404    /// # let folder = tempfile::tempdir()?;
405    /// use lsm_tree::{AbstractTree, Config, Tree};
406    ///
407    /// let tree = Config::new(folder).open()?;
408    /// tree.insert("a", "my_value", 0);
409    ///
410    /// let size = tree.size_of("a", 1)?.unwrap_or_default();
411    /// assert_eq!("my_value".len() as u32, size);
412    ///
413    /// let size = tree.size_of("b", 1)?.unwrap_or_default();
414    /// assert_eq!(0, size);
415    /// #
416    /// # Ok::<(), lsm_tree::Error>(())
417    /// ```
418    ///
419    /// # Errors
420    ///
421    /// Will return `Err` if an IO error occurs.
422    fn size_of<K: AsRef<[u8]>>(&self, key: K, seqno: SeqNo) -> crate::Result<Option<u32>>;
423
424    /// Retrieves an item from the tree.
425    ///
426    /// # Examples
427    ///
428    /// ```
429    /// # let folder = tempfile::tempdir()?;
430    /// use lsm_tree::{AbstractTree, Config, Tree};
431    ///
432    /// let tree = Config::new(folder).open()?;
433    /// tree.insert("a", "my_value", 0);
434    ///
435    /// let item = tree.get("a", 1)?;
436    /// assert_eq!(Some("my_value".as_bytes().into()), item);
437    /// #
438    /// # Ok::<(), lsm_tree::Error>(())
439    /// ```
440    ///
441    /// # Errors
442    ///
443    /// Will return `Err` if an IO error occurs.
444    fn get<K: AsRef<[u8]>>(&self, key: K, seqno: SeqNo) -> crate::Result<Option<UserValue>>;
445
446    /// Returns `true` if the tree contains the specified key.
447    ///
448    /// # Examples
449    ///
450    /// ```
451    /// # let folder = tempfile::tempdir()?;
452    /// # use lsm_tree::{AbstractTree, Config, Tree};
453    /// #
454    /// let tree = Config::new(folder).open()?;
455    /// assert!(!tree.contains_key("a", 0)?);
456    ///
457    /// tree.insert("a", "abc", 0);
458    /// assert!(tree.contains_key("a", 1)?);
459    /// #
460    /// # Ok::<(), lsm_tree::Error>(())
461    /// ```
462    ///
463    /// # Errors
464    ///
465    /// Will return `Err` if an IO error occurs.
466    fn contains_key<K: AsRef<[u8]>>(&self, key: K, seqno: SeqNo) -> crate::Result<bool> {
467        self.get(key, seqno).map(|x| x.is_some())
468    }
469
470    /// Inserts a key-value pair into the tree.
471    ///
472    /// If the key already exists, the item will be overwritten.
473    ///
474    /// Returns the added item's size and new size of the memtable.
475    ///
476    /// # Examples
477    ///
478    /// ```
479    /// # let folder = tempfile::tempdir()?;
480    /// use lsm_tree::{AbstractTree, Config, Tree};
481    ///
482    /// let tree = Config::new(folder).open()?;
483    /// tree.insert("a", "abc", 0);
484    /// #
485    /// # Ok::<(), lsm_tree::Error>(())
486    /// ```
487    ///
488    /// # Errors
489    ///
490    /// Will return `Err` if an IO error occurs.
491    fn insert<K: Into<UserKey>, V: Into<UserValue>>(
492        &self,
493        key: K,
494        value: V,
495        seqno: SeqNo,
496    ) -> (u64, u64);
497
498    /// Removes an item from the tree.
499    ///
500    /// Returns the added item's size and new size of the memtable.
501    ///
502    /// # Examples
503    ///
504    /// ```
505    /// # let folder = tempfile::tempdir()?;
506    /// # use lsm_tree::{AbstractTree, Config, Tree};
507    /// #
508    /// # let tree = Config::new(folder).open()?;
509    /// tree.insert("a", "abc", 0);
510    ///
511    /// let item = tree.get("a", 1)?.expect("should have item");
512    /// assert_eq!("abc".as_bytes(), &*item);
513    ///
514    /// tree.remove("a", 1);
515    ///
516    /// let item = tree.get("a", 2)?;
517    /// assert_eq!(None, item);
518    /// #
519    /// # Ok::<(), lsm_tree::Error>(())
520    /// ```
521    ///
522    /// # Errors
523    ///
524    /// Will return `Err` if an IO error occurs.
525    fn remove<K: Into<UserKey>>(&self, key: K, seqno: SeqNo) -> (u64, u64);
526
527    /// Removes an item from the tree.
528    ///
529    /// The tombstone marker of this delete operation will vanish when it
530    /// collides with its corresponding insertion.
531    /// This may cause older versions of the value to be resurrected, so it should
532    /// only be used and preferred in scenarios where a key is only ever written once.
533    ///
534    /// Returns the added item's size and new size of the memtable.
535    ///
536    /// # Examples
537    ///
538    /// ```
539    /// # let folder = tempfile::tempdir()?;
540    /// # use lsm_tree::{AbstractTree, Config, Tree};
541    /// #
542    /// # let tree = Config::new(folder).open()?;
543    /// tree.insert("a", "abc", 0);
544    ///
545    /// let item = tree.get("a", 1)?.expect("should have item");
546    /// assert_eq!("abc".as_bytes(), &*item);
547    ///
548    /// tree.remove_weak("a", 1);
549    ///
550    /// let item = tree.get("a", 2)?;
551    /// assert_eq!(None, item);
552    /// #
553    /// # Ok::<(), lsm_tree::Error>(())
554    /// ```
555    ///
556    /// # Errors
557    ///
558    /// Will return `Err` if an IO error occurs.
559    #[doc(hidden)]
560    fn remove_weak<K: Into<UserKey>>(&self, key: K, seqno: SeqNo) -> (u64, u64);
561}