lsm_tree/abstract.rs
1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5use crate::{
6 blob_tree::FragmentationMap, compaction::CompactionStrategy, config::TreeType,
7 iter_guard::IterGuardImpl, segment::Segment, tree::inner::MemtableId, version::Version,
8 vlog::BlobFile, AnyTree, BlobTree, Config, Guard, InternalValue, KvPair, Memtable, SegmentId,
9 SeqNo, SequenceNumberCounter, Tree, TreeId, UserKey, UserValue,
10};
11use enum_dispatch::enum_dispatch;
12use std::{ops::RangeBounds, sync::Arc};
13
14pub type RangeItem = crate::Result<KvPair>;
15
16/// Generic Tree API
17#[allow(clippy::module_name_repetitions)]
18#[enum_dispatch]
19pub trait AbstractTree {
20 #[doc(hidden)]
21 fn next_table_id(&self) -> SegmentId;
22
23 #[doc(hidden)]
24 fn id(&self) -> TreeId;
25
26 #[doc(hidden)]
27 fn get_internal_entry(&self, key: &[u8], seqno: SeqNo) -> crate::Result<Option<InternalValue>>;
28
29 #[doc(hidden)]
30 fn current_version(&self) -> Version;
31
32 /// Synchronously flushes the active memtable to a disk segment.
33 ///
34 /// The function may not return a result, if, during concurrent workloads, the memtable
35 /// ends up being empty before the flush is set up.
36 ///
37 /// The result will contain the [`Segment`].
38 ///
39 /// # Errors
40 ///
41 /// Will return `Err` if an IO error occurs.
42 #[doc(hidden)]
43 fn flush_active_memtable(&self, seqno_threshold: SeqNo) -> crate::Result<Option<Segment>>;
44
45 /// Returns an iterator that scans through the entire tree.
46 ///
47 /// Avoid using this function, or limit it as otherwise it may scan a lot of items.
48 fn iter(
49 &self,
50 seqno: SeqNo,
51 index: Option<Arc<Memtable>>,
52 ) -> Box<dyn DoubleEndedIterator<Item = IterGuardImpl<'_>> + '_> {
53 self.range::<&[u8], _>(.., seqno, index)
54 }
55
56 /// Returns an iterator over a prefixed set of items.
57 ///
58 /// Avoid using an empty prefix as it may scan a lot of items (unless limited).
59 fn prefix<K: AsRef<[u8]>>(
60 &self,
61 prefix: K,
62 seqno: SeqNo,
63 index: Option<Arc<Memtable>>,
64 ) -> Box<dyn DoubleEndedIterator<Item = IterGuardImpl<'_>> + '_>;
65
66 /// Returns an iterator over a range of items.
67 ///
68 /// Avoid using full or unbounded ranges as they may scan a lot of items (unless limited).
69 fn range<K: AsRef<[u8]>, R: RangeBounds<K>>(
70 &self,
71 range: R,
72 seqno: SeqNo,
73 index: Option<Arc<Memtable>>,
74 ) -> Box<dyn DoubleEndedIterator<Item = IterGuardImpl<'_>> + '_>;
75
76 /// Ingests a sorted stream of key-value pairs into the tree.
77 ///
78 /// Can only be called on a new fresh, empty tree.
79 ///
80 /// # Errors
81 ///
82 /// Will return `Err` if an IO error occurs.
83 ///
84 /// # Panics
85 ///
86 /// Panics if the tree is **not** initially empty.
87 ///
88 /// Will panic if the input iterator is not sorted in ascending order.
89 #[doc(hidden)]
90 fn ingest(
91 &self,
92 iter: impl Iterator<Item = (UserKey, UserValue)>,
93 seqno_generator: &SequenceNumberCounter,
94 visible_seqno: &SequenceNumberCounter,
95 ) -> crate::Result<()>;
96
97 /// Returns the approximate number of tombstones in the tree.
98 fn tombstone_count(&self) -> u64;
99
100 // TODO: clear() with Nuke compaction strategy (write lock) -> drop_range(..)
101
102 /// Drops segments that are fully contained in a given range.
103 ///
104 /// Accepts any `RangeBounds`, including unbounded or exclusive endpoints.
105 /// If the normalized lower bound is greater than the upper bound, the
106 /// method returns without performing any work.
107 ///
108 /// # Errors
109 ///
110 /// Will return `Err` only if an IO error occurs during compaction.
111 fn drop_range<K: AsRef<[u8]>, R: RangeBounds<K>>(&self, range: R) -> crate::Result<()>;
112
113 /// Performs major compaction, blocking the caller until it's done.
114 ///
115 /// # Errors
116 ///
117 /// Will return `Err` if an IO error occurs.
118 fn major_compact(&self, target_size: u64, seqno_threshold: SeqNo) -> crate::Result<()>;
119
120 /// Returns the disk space used by stale blobs.
121 fn stale_blob_bytes(&self) -> u64 {
122 0
123 }
124
125 /// Gets the space usage of all filters in the tree.
126 ///
127 /// May not correspond to the actual memory size because filter blocks may be paged out.
128 fn filter_size(&self) -> usize;
129
130 /// Gets the memory usage of all pinned filters in the tree.
131 fn pinned_filter_size(&self) -> usize;
132
133 /// Gets the memory usage of all pinned index blocks in the tree.
134 fn pinned_block_index_size(&self) -> usize;
135
136 /// Gets the length of the version free list.
137 fn version_free_list_len(&self) -> usize;
138
139 /// Returns the metrics structure.
140 #[cfg(feature = "metrics")]
141 fn metrics(&self) -> &Arc<crate::Metrics>;
142
143 /// Synchronously flushes a memtable to a disk segment.
144 ///
145 /// This method will not make the segment immediately available,
146 /// use [`AbstractTree::register_segments`] for that.
147 ///
148 /// # Errors
149 ///
150 /// Will return `Err` if an IO error occurs.
151 #[warn(clippy::type_complexity)]
152 fn flush_memtable(
153 &self,
154 segment_id: SegmentId, // TODO: remove?
155 memtable: &Arc<Memtable>,
156 seqno_threshold: SeqNo,
157 ) -> crate::Result<Option<(Segment, Option<BlobFile>)>>;
158
159 /// Atomically registers flushed disk segments into the tree, removing their associated sealed memtables.
160 ///
161 /// # Errors
162 ///
163 /// Will return `Err` if an IO error occurs.
164 fn register_segments(
165 &self,
166 segments: &[Segment],
167 blob_files: Option<&[BlobFile]>,
168 frag_map: Option<FragmentationMap>,
169 seqno_threshold: SeqNo,
170 ) -> crate::Result<()>;
171
172 /// Clears the active memtable atomically.
173 fn clear_active_memtable(&self);
174
175 /// Sets the active memtable.
176 ///
177 /// May be used to restore the LSM-tree's in-memory state from a write-ahead log
178 /// after tree recovery.
179 fn set_active_memtable(&self, memtable: Memtable);
180
181 /// Returns the number of sealed memtables.
182 fn sealed_memtable_count(&self) -> usize;
183
184 /// Adds a sealed memtables.
185 ///
186 /// May be used to restore the LSM-tree's in-memory state from some journals.
187 fn add_sealed_memtable(&self, id: MemtableId, memtable: Arc<Memtable>);
188
189 /// Performs compaction on the tree's levels, blocking the caller until it's done.
190 ///
191 /// # Errors
192 ///
193 /// Will return `Err` if an IO error occurs.
194 fn compact(
195 &self,
196 strategy: Arc<dyn CompactionStrategy>,
197 seqno_threshold: SeqNo,
198 ) -> crate::Result<()>;
199
200 /// Returns the next segment's ID.
201 fn get_next_segment_id(&self) -> SegmentId;
202
203 /// Returns the tree config.
204 fn tree_config(&self) -> &Config;
205
206 /// Returns the highest sequence number.
207 fn get_highest_seqno(&self) -> Option<SeqNo> {
208 let memtable_seqno = self.get_highest_memtable_seqno();
209 let segment_seqno = self.get_highest_persisted_seqno();
210 memtable_seqno.max(segment_seqno)
211 }
212
213 /// Returns the approximate size of the active memtable in bytes.
214 ///
215 /// May be used to flush the memtable if it grows too large.
216 fn active_memtable_size(&self) -> u64;
217
218 /// Returns the tree type.
219 fn tree_type(&self) -> TreeType;
220
221 /// Seals the active memtable, and returns a reference to it.
222 fn rotate_memtable(&self) -> Option<(MemtableId, Arc<Memtable>)>;
223
224 /// Returns the number of disk segments currently in the tree.
225 fn segment_count(&self) -> usize;
226
227 /// Returns the number of segments in `levels[idx]`.
228 ///
229 /// Returns `None` if the level does not exist (if idx >= 7).
230 fn level_segment_count(&self, idx: usize) -> Option<usize>;
231
232 /// Returns the number of disjoint runs in L0.
233 ///
234 /// Can be used to determine whether to write stall.
235 fn l0_run_count(&self) -> usize;
236
237 /// Returns the number of blob files currently in the tree.
238 fn blob_file_count(&self) -> usize {
239 0
240 }
241
242 /// Approximates the number of items in the tree.
243 fn approximate_len(&self) -> usize;
244
245 /// Returns the disk space usage.
246 fn disk_space(&self) -> u64;
247
248 /// Returns the highest sequence number of the active memtable.
249 fn get_highest_memtable_seqno(&self) -> Option<SeqNo>;
250
251 /// Returns the highest sequence number that is flushed to disk.
252 fn get_highest_persisted_seqno(&self) -> Option<SeqNo>;
253
254 /// Scans the entire tree, returning the number of items.
255 ///
256 /// ###### Caution
257 ///
258 /// This operation scans the entire tree: O(n) complexity!
259 ///
260 /// Never, under any circumstances, use .`len()` == 0 to check
261 /// if the tree is empty, use [`Tree::is_empty`] instead.
262 ///
263 /// # Examples
264 ///
265 /// ```
266 /// # use lsm_tree::Error as TreeError;
267 /// use lsm_tree::{AbstractTree, Config, Tree};
268 ///
269 /// let folder = tempfile::tempdir()?;
270 /// let tree = Config::new(folder).open()?;
271 ///
272 /// assert_eq!(tree.len(0, None)?, 0);
273 /// tree.insert("1", "abc", 0);
274 /// tree.insert("3", "abc", 1);
275 /// tree.insert("5", "abc", 2);
276 /// assert_eq!(tree.len(3, None)?, 3);
277 /// #
278 /// # Ok::<(), TreeError>(())
279 /// ```
280 ///
281 /// # Errors
282 ///
283 /// Will return `Err` if an IO error occurs.
284 fn len(&self, seqno: SeqNo, index: Option<Arc<Memtable>>) -> crate::Result<usize> {
285 let mut count = 0;
286
287 for item in self.iter(seqno, index) {
288 let _ = item.key()?;
289 count += 1;
290 }
291
292 Ok(count)
293 }
294
295 /// Returns `true` if the tree is empty.
296 ///
297 /// This operation has O(log N) complexity.
298 ///
299 /// # Examples
300 ///
301 /// ```
302 /// # let folder = tempfile::tempdir()?;
303 /// use lsm_tree::{AbstractTree, Config, Tree};
304 ///
305 /// let tree = Config::new(folder).open()?;
306 /// assert!(tree.is_empty(0, None)?);
307 ///
308 /// tree.insert("a", "abc", 0);
309 /// assert!(!tree.is_empty(1, None)?);
310 /// #
311 /// # Ok::<(), lsm_tree::Error>(())
312 /// ```
313 ///
314 /// # Errors
315 ///
316 /// Will return `Err` if an IO error occurs.
317 fn is_empty(&self, seqno: SeqNo, index: Option<Arc<Memtable>>) -> crate::Result<bool> {
318 self.first_key_value(seqno, index).map(|x| x.is_none())
319 }
320
321 /// Returns the first key-value pair in the tree.
322 /// The key in this pair is the minimum key in the tree.
323 ///
324 /// # Examples
325 ///
326 /// ```
327 /// # use lsm_tree::Error as TreeError;
328 /// # use lsm_tree::{AbstractTree, Config, Tree};
329 /// #
330 /// # let folder = tempfile::tempdir()?;
331 /// let tree = Config::new(folder).open()?;
332 ///
333 /// tree.insert("1", "abc", 0);
334 /// tree.insert("3", "abc", 1);
335 /// tree.insert("5", "abc", 2);
336 ///
337 /// let (key, _) = tree.first_key_value(3, None)?.expect("item should exist");
338 /// assert_eq!(&*key, "1".as_bytes());
339 /// #
340 /// # Ok::<(), TreeError>(())
341 /// ```
342 ///
343 /// # Errors
344 ///
345 /// Will return `Err` if an IO error occurs.
346 fn first_key_value(
347 &self,
348 seqno: SeqNo,
349 index: Option<Arc<Memtable>>,
350 ) -> crate::Result<Option<KvPair>> {
351 self.iter(seqno, index)
352 .next()
353 .map(Guard::into_inner)
354 .transpose()
355 }
356
357 /// Returns the last key-value pair in the tree.
358 /// The key in this pair is the maximum key in the tree.
359 ///
360 /// # Examples
361 ///
362 /// ```
363 /// # use lsm_tree::Error as TreeError;
364 /// # use lsm_tree::{AbstractTree, Config, Tree};
365 /// #
366 /// # let folder = tempfile::tempdir()?;
367 /// # let tree = Config::new(folder).open()?;
368 /// #
369 /// tree.insert("1", "abc", 0);
370 /// tree.insert("3", "abc", 1);
371 /// tree.insert("5", "abc", 2);
372 ///
373 /// let (key, _) = tree.last_key_value(3, None)?.expect("item should exist");
374 /// assert_eq!(&*key, "5".as_bytes());
375 /// #
376 /// # Ok::<(), TreeError>(())
377 /// ```
378 ///
379 /// # Errors
380 ///
381 /// Will return `Err` if an IO error occurs.
382 fn last_key_value(
383 &self,
384 seqno: SeqNo,
385 index: Option<Arc<Memtable>>,
386 ) -> crate::Result<Option<KvPair>> {
387 self.iter(seqno, index)
388 .next_back()
389 .map(Guard::into_inner)
390 .transpose()
391 }
392
393 /// Returns the size of a value if it exists.
394 ///
395 /// # Examples
396 ///
397 /// ```
398 /// # let folder = tempfile::tempdir()?;
399 /// use lsm_tree::{AbstractTree, Config, Tree};
400 ///
401 /// let tree = Config::new(folder).open()?;
402 /// tree.insert("a", "my_value", 0);
403 ///
404 /// let size = tree.size_of("a", 1)?.unwrap_or_default();
405 /// assert_eq!("my_value".len() as u32, size);
406 ///
407 /// let size = tree.size_of("b", 1)?.unwrap_or_default();
408 /// assert_eq!(0, size);
409 /// #
410 /// # Ok::<(), lsm_tree::Error>(())
411 /// ```
412 ///
413 /// # Errors
414 ///
415 /// Will return `Err` if an IO error occurs.
416 fn size_of<K: AsRef<[u8]>>(&self, key: K, seqno: SeqNo) -> crate::Result<Option<u32>>;
417
418 /// Retrieves an item from the tree.
419 ///
420 /// # Examples
421 ///
422 /// ```
423 /// # let folder = tempfile::tempdir()?;
424 /// use lsm_tree::{AbstractTree, Config, Tree};
425 ///
426 /// let tree = Config::new(folder).open()?;
427 /// tree.insert("a", "my_value", 0);
428 ///
429 /// let item = tree.get("a", 1)?;
430 /// assert_eq!(Some("my_value".as_bytes().into()), item);
431 /// #
432 /// # Ok::<(), lsm_tree::Error>(())
433 /// ```
434 ///
435 /// # Errors
436 ///
437 /// Will return `Err` if an IO error occurs.
438 fn get<K: AsRef<[u8]>>(&self, key: K, seqno: SeqNo) -> crate::Result<Option<UserValue>>;
439
440 /// Returns `true` if the tree contains the specified key.
441 ///
442 /// # Examples
443 ///
444 /// ```
445 /// # let folder = tempfile::tempdir()?;
446 /// # use lsm_tree::{AbstractTree, Config, Tree};
447 /// #
448 /// let tree = Config::new(folder).open()?;
449 /// assert!(!tree.contains_key("a", 0)?);
450 ///
451 /// tree.insert("a", "abc", 0);
452 /// assert!(tree.contains_key("a", 1)?);
453 /// #
454 /// # Ok::<(), lsm_tree::Error>(())
455 /// ```
456 ///
457 /// # Errors
458 ///
459 /// Will return `Err` if an IO error occurs.
460 fn contains_key<K: AsRef<[u8]>>(&self, key: K, seqno: SeqNo) -> crate::Result<bool> {
461 self.get(key, seqno).map(|x| x.is_some())
462 }
463
464 /// Inserts a key-value pair into the tree.
465 ///
466 /// If the key already exists, the item will be overwritten.
467 ///
468 /// Returns the added item's size and new size of the memtable.
469 ///
470 /// # Examples
471 ///
472 /// ```
473 /// # let folder = tempfile::tempdir()?;
474 /// use lsm_tree::{AbstractTree, Config, Tree};
475 ///
476 /// let tree = Config::new(folder).open()?;
477 /// tree.insert("a", "abc", 0);
478 /// #
479 /// # Ok::<(), lsm_tree::Error>(())
480 /// ```
481 ///
482 /// # Errors
483 ///
484 /// Will return `Err` if an IO error occurs.
485 fn insert<K: Into<UserKey>, V: Into<UserValue>>(
486 &self,
487 key: K,
488 value: V,
489 seqno: SeqNo,
490 ) -> (u64, u64);
491
492 /// Removes an item from the tree.
493 ///
494 /// Returns the added item's size and new size of the memtable.
495 ///
496 /// # Examples
497 ///
498 /// ```
499 /// # let folder = tempfile::tempdir()?;
500 /// # use lsm_tree::{AbstractTree, Config, Tree};
501 /// #
502 /// # let tree = Config::new(folder).open()?;
503 /// tree.insert("a", "abc", 0);
504 ///
505 /// let item = tree.get("a", 1)?.expect("should have item");
506 /// assert_eq!("abc".as_bytes(), &*item);
507 ///
508 /// tree.remove("a", 1);
509 ///
510 /// let item = tree.get("a", 2)?;
511 /// assert_eq!(None, item);
512 /// #
513 /// # Ok::<(), lsm_tree::Error>(())
514 /// ```
515 ///
516 /// # Errors
517 ///
518 /// Will return `Err` if an IO error occurs.
519 fn remove<K: Into<UserKey>>(&self, key: K, seqno: SeqNo) -> (u64, u64);
520
521 /// Removes an item from the tree.
522 ///
523 /// The tombstone marker of this delete operation will vanish when it
524 /// collides with its corresponding insertion.
525 /// This may cause older versions of the value to be resurrected, so it should
526 /// only be used and preferred in scenarios where a key is only ever written once.
527 ///
528 /// Returns the added item's size and new size of the memtable.
529 ///
530 /// # Examples
531 ///
532 /// ```
533 /// # let folder = tempfile::tempdir()?;
534 /// # use lsm_tree::{AbstractTree, Config, Tree};
535 /// #
536 /// # let tree = Config::new(folder).open()?;
537 /// tree.insert("a", "abc", 0);
538 ///
539 /// let item = tree.get("a", 1)?.expect("should have item");
540 /// assert_eq!("abc".as_bytes(), &*item);
541 ///
542 /// tree.remove_weak("a", 1);
543 ///
544 /// let item = tree.get("a", 2)?;
545 /// assert_eq!(None, item);
546 /// #
547 /// # Ok::<(), lsm_tree::Error>(())
548 /// ```
549 ///
550 /// # Errors
551 ///
552 /// Will return `Err` if an IO error occurs.
553 #[doc(hidden)]
554 fn remove_weak<K: Into<UserKey>>(&self, key: K, seqno: SeqNo) -> (u64, u64);
555}