lsm_tree/
abstract.rs

1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5use crate::{
6    blob_tree::FragmentationMap, compaction::CompactionStrategy, config::TreeType,
7    iter_guard::IterGuardImpl, table::Table, tree::inner::MemtableId, version::Version,
8    vlog::BlobFile, AnyTree, BlobTree, Config, Guard, InternalValue, KvPair, Memtable, SeqNo,
9    SequenceNumberCounter, TableId, Tree, TreeId, UserKey, UserValue,
10};
11use enum_dispatch::enum_dispatch;
12use std::{ops::RangeBounds, sync::Arc};
13
14pub type RangeItem = crate::Result<KvPair>;
15
16/// Generic Tree API
17#[enum_dispatch]
18pub trait AbstractTree {
19    #[doc(hidden)]
20    fn next_table_id(&self) -> TableId;
21
22    #[doc(hidden)]
23    fn id(&self) -> TreeId;
24
25    #[doc(hidden)]
26    fn get_internal_entry(&self, key: &[u8], seqno: SeqNo) -> crate::Result<Option<InternalValue>>;
27
28    #[doc(hidden)]
29    fn current_version(&self) -> Version;
30
31    /// Synchronously flushes the active memtable to a table.
32    ///
33    /// The function may not return a result, if, during concurrent workloads, the memtable
34    /// ends up being empty before the flush is set up.
35    ///
36    /// The result will contain the [`Table`].
37    ///
38    /// # Errors
39    ///
40    /// Will return `Err` if an IO error occurs.
41    #[doc(hidden)]
42    fn flush_active_memtable(&self, seqno_threshold: SeqNo) -> crate::Result<Option<Table>>;
43
44    /// Returns an iterator that scans through the entire tree.
45    ///
46    /// Avoid using this function, or limit it as otherwise it may scan a lot of items.
47    fn iter(
48        &self,
49        seqno: SeqNo,
50        index: Option<Arc<Memtable>>,
51    ) -> Box<dyn DoubleEndedIterator<Item = IterGuardImpl> + Send + 'static> {
52        self.range::<&[u8], _>(.., seqno, index)
53    }
54
55    /// Returns an iterator over a prefixed set of items.
56    ///
57    /// Avoid using an empty prefix as it may scan a lot of items (unless limited).
58    fn prefix<K: AsRef<[u8]>>(
59        &self,
60        prefix: K,
61        seqno: SeqNo,
62        index: Option<Arc<Memtable>>,
63    ) -> Box<dyn DoubleEndedIterator<Item = IterGuardImpl> + Send + 'static>;
64
65    /// Returns an iterator over a range of items.
66    ///
67    /// Avoid using full or unbounded ranges as they may scan a lot of items (unless limited).
68    fn range<K: AsRef<[u8]>, R: RangeBounds<K>>(
69        &self,
70        range: R,
71        seqno: SeqNo,
72        index: Option<Arc<Memtable>>,
73    ) -> Box<dyn DoubleEndedIterator<Item = IterGuardImpl> + Send + 'static>;
74
75    /// Ingests a sorted stream of key-value pairs into the tree.
76    ///
77    /// Can only be called on a new fresh, empty tree.
78    ///
79    /// # Errors
80    ///
81    /// Will return `Err` if an IO error occurs.
82    ///
83    /// # Panics
84    ///
85    /// Panics if the tree is **not** initially empty.
86    ///
87    /// Will panic if the input iterator is not sorted in ascending order.
88    #[doc(hidden)]
89    fn ingest(
90        &self,
91        iter: impl Iterator<Item = (UserKey, UserValue)>,
92        seqno_generator: &SequenceNumberCounter,
93        visible_seqno: &SequenceNumberCounter,
94    ) -> crate::Result<()>;
95
96    /// Returns the approximate number of tombstones in the tree.
97    fn tombstone_count(&self) -> u64;
98
99    /// Returns the approximate number of weak tombstones (single deletes) in the tree.
100    fn weak_tombstone_count(&self) -> u64;
101
102    /// Returns the approximate number of values reclaimable once weak tombstones can be GC'd.
103    fn weak_tombstone_reclaimable_count(&self) -> u64;
104
105    // TODO: clear() with Nuke compaction strategy (write lock) -> drop_range(..)
106
107    /// Drops tables that are fully contained in a given range.
108    ///
109    /// Accepts any `RangeBounds`, including unbounded or exclusive endpoints.
110    /// If the normalized lower bound is greater than the upper bound, the
111    /// method returns without performing any work.
112    ///
113    /// # Errors
114    ///
115    /// Will return `Err` only if an IO error occurs during compaction.
116    fn drop_range<K: AsRef<[u8]>, R: RangeBounds<K>>(&self, range: R) -> crate::Result<()>;
117
118    /// Performs major compaction, blocking the caller until it's done.
119    ///
120    /// # Errors
121    ///
122    /// Will return `Err` if an IO error occurs.
123    fn major_compact(&self, target_size: u64, seqno_threshold: SeqNo) -> crate::Result<()>;
124
125    /// Returns the disk space used by stale blobs.
126    fn stale_blob_bytes(&self) -> u64 {
127        0
128    }
129
130    /// Gets the space usage of all filters in the tree.
131    ///
132    /// May not correspond to the actual memory size because filter blocks may be paged out.
133    fn filter_size(&self) -> usize;
134
135    /// Gets the memory usage of all pinned filters in the tree.
136    fn pinned_filter_size(&self) -> usize;
137
138    /// Gets the memory usage of all pinned index blocks in the tree.
139    fn pinned_block_index_size(&self) -> usize;
140
141    /// Gets the length of the version free list.
142    fn version_free_list_len(&self) -> usize;
143
144    /// Returns the metrics structure.
145    #[cfg(feature = "metrics")]
146    fn metrics(&self) -> &Arc<crate::Metrics>;
147
148    /// Synchronously flushes a memtable to a table.
149    ///
150    /// This method will not make the table immediately available,
151    /// use [`AbstractTree::register_tables`] for that.
152    ///
153    /// # Errors
154    ///
155    /// Will return `Err` if an IO error occurs.
156    #[warn(clippy::type_complexity)]
157    fn flush_memtable(
158        &self,
159        table_id: TableId, // TODO: remove?
160        memtable: &Arc<Memtable>,
161        seqno_threshold: SeqNo,
162    ) -> crate::Result<Option<(Table, Option<BlobFile>)>>;
163
164    /// Atomically registers flushed tables into the tree, removing their associated sealed memtables.
165    ///
166    /// # Errors
167    ///
168    /// Will return `Err` if an IO error occurs.
169    fn register_tables(
170        &self,
171        tables: &[Table],
172        blob_files: Option<&[BlobFile]>,
173        frag_map: Option<FragmentationMap>,
174    ) -> crate::Result<()>;
175
176    /// Clears the active memtable atomically.
177    fn clear_active_memtable(&self);
178
179    /// Sets the active memtable.
180    ///
181    /// May be used to restore the LSM-tree's in-memory state from a write-ahead log
182    /// after tree recovery.
183    fn set_active_memtable(&self, memtable: Memtable);
184
185    /// Returns the number of sealed memtables.
186    fn sealed_memtable_count(&self) -> usize;
187
188    /// Adds a sealed memtables.
189    ///
190    /// May be used to restore the LSM-tree's in-memory state from some journals.
191    fn add_sealed_memtable(&self, id: MemtableId, memtable: Arc<Memtable>);
192
193    /// Performs compaction on the tree's levels, blocking the caller until it's done.
194    ///
195    /// # Errors
196    ///
197    /// Will return `Err` if an IO error occurs.
198    fn compact(
199        &self,
200        strategy: Arc<dyn CompactionStrategy>,
201        seqno_threshold: SeqNo,
202    ) -> crate::Result<()>;
203
204    /// Returns the next table's ID.
205    fn get_next_table_id(&self) -> TableId;
206
207    /// Returns the tree config.
208    fn tree_config(&self) -> &Config;
209
210    /// Returns the highest sequence number.
211    fn get_highest_seqno(&self) -> Option<SeqNo> {
212        let memtable_seqno = self.get_highest_memtable_seqno();
213        let table_seqno = self.get_highest_persisted_seqno();
214        memtable_seqno.max(table_seqno)
215    }
216
217    /// Returns the approximate size of the active memtable in bytes.
218    ///
219    /// May be used to flush the memtable if it grows too large.
220    fn active_memtable_size(&self) -> u64;
221
222    /// Returns the tree type.
223    fn tree_type(&self) -> TreeType;
224
225    /// Seals the active memtable, and returns a reference to it.
226    fn rotate_memtable(&self) -> Option<(MemtableId, Arc<Memtable>)>;
227
228    /// Returns the number of tables currently in the tree.
229    fn table_count(&self) -> usize;
230
231    /// Returns the number of tables in `levels[idx]`.
232    ///
233    /// Returns `None` if the level does not exist (if idx >= 7).
234    fn level_table_count(&self, idx: usize) -> Option<usize>;
235
236    /// Returns the number of disjoint runs in L0.
237    ///
238    /// Can be used to determine whether to write stall.
239    fn l0_run_count(&self) -> usize;
240
241    /// Returns the number of blob files currently in the tree.
242    fn blob_file_count(&self) -> usize {
243        0
244    }
245
246    /// Approximates the number of items in the tree.
247    fn approximate_len(&self) -> usize;
248
249    /// Returns the disk space usage.
250    fn disk_space(&self) -> u64;
251
252    /// Returns the highest sequence number of the active memtable.
253    fn get_highest_memtable_seqno(&self) -> Option<SeqNo>;
254
255    /// Returns the highest sequence number that is flushed to disk.
256    fn get_highest_persisted_seqno(&self) -> Option<SeqNo>;
257
258    /// Scans the entire tree, returning the number of items.
259    ///
260    /// ###### Caution
261    ///
262    /// This operation scans the entire tree: O(n) complexity!
263    ///
264    /// Never, under any circumstances, use .`len()` == 0 to check
265    /// if the tree is empty, use [`Tree::is_empty`] instead.
266    ///
267    /// # Examples
268    ///
269    /// ```
270    /// # use lsm_tree::Error as TreeError;
271    /// use lsm_tree::{AbstractTree, Config, Tree};
272    ///
273    /// let folder = tempfile::tempdir()?;
274    /// let tree = Config::new(folder, Default::default()).open()?;
275    ///
276    /// assert_eq!(tree.len(0, None)?, 0);
277    /// tree.insert("1", "abc", 0);
278    /// tree.insert("3", "abc", 1);
279    /// tree.insert("5", "abc", 2);
280    /// assert_eq!(tree.len(3, None)?, 3);
281    /// #
282    /// # Ok::<(), TreeError>(())
283    /// ```
284    ///
285    /// # Errors
286    ///
287    /// Will return `Err` if an IO error occurs.
288    fn len(&self, seqno: SeqNo, index: Option<Arc<Memtable>>) -> crate::Result<usize> {
289        let mut count = 0;
290
291        for item in self.iter(seqno, index) {
292            let _ = item.key()?;
293            count += 1;
294        }
295
296        Ok(count)
297    }
298
299    /// Returns `true` if the tree is empty.
300    ///
301    /// This operation has O(log N) complexity.
302    ///
303    /// # Examples
304    ///
305    /// ```
306    /// # let folder = tempfile::tempdir()?;
307    /// use lsm_tree::{AbstractTree, Config, Tree};
308    ///
309    /// let tree = Config::new(folder, Default::default()).open()?;
310    /// assert!(tree.is_empty(0, None)?);
311    ///
312    /// tree.insert("a", "abc", 0);
313    /// assert!(!tree.is_empty(1, None)?);
314    /// #
315    /// # Ok::<(), lsm_tree::Error>(())
316    /// ```
317    ///
318    /// # Errors
319    ///
320    /// Will return `Err` if an IO error occurs.
321    fn is_empty(&self, seqno: SeqNo, index: Option<Arc<Memtable>>) -> crate::Result<bool> {
322        self.first_key_value(seqno, index).map(|x| x.is_none())
323    }
324
325    /// Returns the first key-value pair in the tree.
326    /// The key in this pair is the minimum key in the tree.
327    ///
328    /// # Examples
329    ///
330    /// ```
331    /// # use lsm_tree::Error as TreeError;
332    /// # use lsm_tree::{AbstractTree, Config, Tree};
333    /// #
334    /// # let folder = tempfile::tempdir()?;
335    /// let tree = Config::new(folder, Default::default()).open()?;
336    ///
337    /// tree.insert("1", "abc", 0);
338    /// tree.insert("3", "abc", 1);
339    /// tree.insert("5", "abc", 2);
340    ///
341    /// let (key, _) = tree.first_key_value(3, None)?.expect("item should exist");
342    /// assert_eq!(&*key, "1".as_bytes());
343    /// #
344    /// # Ok::<(), TreeError>(())
345    /// ```
346    ///
347    /// # Errors
348    ///
349    /// Will return `Err` if an IO error occurs.
350    fn first_key_value(
351        &self,
352        seqno: SeqNo,
353        index: Option<Arc<Memtable>>,
354    ) -> crate::Result<Option<KvPair>> {
355        self.iter(seqno, index)
356            .next()
357            .map(Guard::into_inner)
358            .transpose()
359    }
360
361    /// Returns the last key-value pair in the tree.
362    /// The key in this pair is the maximum key in the tree.
363    ///
364    /// # Examples
365    ///
366    /// ```
367    /// # use lsm_tree::Error as TreeError;
368    /// # use lsm_tree::{AbstractTree, Config, Tree};
369    /// #
370    /// # let folder = tempfile::tempdir()?;
371    /// # let tree = Config::new(folder, Default::default()).open()?;
372    /// #
373    /// tree.insert("1", "abc", 0);
374    /// tree.insert("3", "abc", 1);
375    /// tree.insert("5", "abc", 2);
376    ///
377    /// let (key, _) = tree.last_key_value(3, None)?.expect("item should exist");
378    /// assert_eq!(&*key, "5".as_bytes());
379    /// #
380    /// # Ok::<(), TreeError>(())
381    /// ```
382    ///
383    /// # Errors
384    ///
385    /// Will return `Err` if an IO error occurs.
386    fn last_key_value(
387        &self,
388        seqno: SeqNo,
389        index: Option<Arc<Memtable>>,
390    ) -> crate::Result<Option<KvPair>> {
391        self.iter(seqno, index)
392            .next_back()
393            .map(Guard::into_inner)
394            .transpose()
395    }
396
397    /// Returns the size of a value if it exists.
398    ///
399    /// # Examples
400    ///
401    /// ```
402    /// # let folder = tempfile::tempdir()?;
403    /// use lsm_tree::{AbstractTree, Config, Tree};
404    ///
405    /// let tree = Config::new(folder, Default::default()).open()?;
406    /// tree.insert("a", "my_value", 0);
407    ///
408    /// let size = tree.size_of("a", 1)?.unwrap_or_default();
409    /// assert_eq!("my_value".len() as u32, size);
410    ///
411    /// let size = tree.size_of("b", 1)?.unwrap_or_default();
412    /// assert_eq!(0, size);
413    /// #
414    /// # Ok::<(), lsm_tree::Error>(())
415    /// ```
416    ///
417    /// # Errors
418    ///
419    /// Will return `Err` if an IO error occurs.
420    fn size_of<K: AsRef<[u8]>>(&self, key: K, seqno: SeqNo) -> crate::Result<Option<u32>>;
421
422    /// Retrieves an item from the tree.
423    ///
424    /// # Examples
425    ///
426    /// ```
427    /// # let folder = tempfile::tempdir()?;
428    /// use lsm_tree::{AbstractTree, Config, Tree};
429    ///
430    /// let tree = Config::new(folder, Default::default()).open()?;
431    /// tree.insert("a", "my_value", 0);
432    ///
433    /// let item = tree.get("a", 1)?;
434    /// assert_eq!(Some("my_value".as_bytes().into()), item);
435    /// #
436    /// # Ok::<(), lsm_tree::Error>(())
437    /// ```
438    ///
439    /// # Errors
440    ///
441    /// Will return `Err` if an IO error occurs.
442    fn get<K: AsRef<[u8]>>(&self, key: K, seqno: SeqNo) -> crate::Result<Option<UserValue>>;
443
444    /// Returns `true` if the tree contains the specified key.
445    ///
446    /// # Examples
447    ///
448    /// ```
449    /// # let folder = tempfile::tempdir()?;
450    /// # use lsm_tree::{AbstractTree, Config, Tree};
451    /// #
452    /// let tree = Config::new(folder, Default::default()).open()?;
453    /// assert!(!tree.contains_key("a", 0)?);
454    ///
455    /// tree.insert("a", "abc", 0);
456    /// assert!(tree.contains_key("a", 1)?);
457    /// #
458    /// # Ok::<(), lsm_tree::Error>(())
459    /// ```
460    ///
461    /// # Errors
462    ///
463    /// Will return `Err` if an IO error occurs.
464    fn contains_key<K: AsRef<[u8]>>(&self, key: K, seqno: SeqNo) -> crate::Result<bool> {
465        self.get(key, seqno).map(|x| x.is_some())
466    }
467
468    /// Inserts a key-value pair into the tree.
469    ///
470    /// If the key already exists, the item will be overwritten.
471    ///
472    /// Returns the added item's size and new size of the memtable.
473    ///
474    /// # Examples
475    ///
476    /// ```
477    /// # let folder = tempfile::tempdir()?;
478    /// use lsm_tree::{AbstractTree, Config, Tree};
479    ///
480    /// let tree = Config::new(folder, Default::default()).open()?;
481    /// tree.insert("a", "abc", 0);
482    /// #
483    /// # Ok::<(), lsm_tree::Error>(())
484    /// ```
485    ///
486    /// # Errors
487    ///
488    /// Will return `Err` if an IO error occurs.
489    fn insert<K: Into<UserKey>, V: Into<UserValue>>(
490        &self,
491        key: K,
492        value: V,
493        seqno: SeqNo,
494    ) -> (u64, u64);
495
496    /// Removes an item from the tree.
497    ///
498    /// Returns the added item's size and new size of the memtable.
499    ///
500    /// # Examples
501    ///
502    /// ```
503    /// # let folder = tempfile::tempdir()?;
504    /// # use lsm_tree::{AbstractTree, Config, Tree};
505    /// #
506    /// # let tree = Config::new(folder, Default::default()).open()?;
507    /// tree.insert("a", "abc", 0);
508    ///
509    /// let item = tree.get("a", 1)?.expect("should have item");
510    /// assert_eq!("abc".as_bytes(), &*item);
511    ///
512    /// tree.remove("a", 1);
513    ///
514    /// let item = tree.get("a", 2)?;
515    /// assert_eq!(None, item);
516    /// #
517    /// # Ok::<(), lsm_tree::Error>(())
518    /// ```
519    ///
520    /// # Errors
521    ///
522    /// Will return `Err` if an IO error occurs.
523    fn remove<K: Into<UserKey>>(&self, key: K, seqno: SeqNo) -> (u64, u64);
524
525    /// Removes an item from the tree.
526    ///
527    /// The tombstone marker of this delete operation will vanish when it
528    /// collides with its corresponding insertion.
529    /// This may cause older versions of the value to be resurrected, so it should
530    /// only be used and preferred in scenarios where a key is only ever written once.
531    ///
532    /// Returns the added item's size and new size of the memtable.
533    ///
534    /// # Examples
535    ///
536    /// ```
537    /// # let folder = tempfile::tempdir()?;
538    /// # use lsm_tree::{AbstractTree, Config, Tree};
539    /// #
540    /// # let tree = Config::new(folder, Default::default()).open()?;
541    /// tree.insert("a", "abc", 0);
542    ///
543    /// let item = tree.get("a", 1)?.expect("should have item");
544    /// assert_eq!("abc".as_bytes(), &*item);
545    ///
546    /// tree.remove_weak("a", 1);
547    ///
548    /// let item = tree.get("a", 2)?;
549    /// assert_eq!(None, item);
550    /// #
551    /// # Ok::<(), lsm_tree::Error>(())
552    /// ```
553    ///
554    /// # Errors
555    ///
556    /// Will return `Err` if an IO error occurs.
557    #[doc(hidden)]
558    fn remove_weak<K: Into<UserKey>>(&self, key: K, seqno: SeqNo) -> (u64, u64);
559}