lsm_tree/abstract.rs
1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5use crate::{
6 blob_tree::FragmentationMap, compaction::CompactionStrategy, config::TreeType,
7 iter_guard::IterGuardImpl, table::Table, tree::inner::MemtableId, version::Version,
8 vlog::BlobFile, AnyTree, BlobTree, Config, Guard, InternalValue, KvPair, Memtable, SeqNo,
9 SequenceNumberCounter, TableId, Tree, TreeId, UserKey, UserValue,
10};
11use enum_dispatch::enum_dispatch;
12use std::{ops::RangeBounds, sync::Arc};
13
14pub type RangeItem = crate::Result<KvPair>;
15
16/// Generic Tree API
17#[enum_dispatch]
18pub trait AbstractTree {
19 #[doc(hidden)]
20 fn next_table_id(&self) -> TableId;
21
22 #[doc(hidden)]
23 fn id(&self) -> TreeId;
24
25 #[doc(hidden)]
26 fn get_internal_entry(&self, key: &[u8], seqno: SeqNo) -> crate::Result<Option<InternalValue>>;
27
28 #[doc(hidden)]
29 fn current_version(&self) -> Version;
30
31 /// Synchronously flushes the active memtable to a table.
32 ///
33 /// The function may not return a result, if, during concurrent workloads, the memtable
34 /// ends up being empty before the flush is set up.
35 ///
36 /// The result will contain the [`Table`].
37 ///
38 /// # Errors
39 ///
40 /// Will return `Err` if an IO error occurs.
41 #[doc(hidden)]
42 fn flush_active_memtable(&self, seqno_threshold: SeqNo) -> crate::Result<Option<Table>>;
43
44 /// Returns an iterator that scans through the entire tree.
45 ///
46 /// Avoid using this function, or limit it as otherwise it may scan a lot of items.
47 fn iter(
48 &self,
49 seqno: SeqNo,
50 index: Option<Arc<Memtable>>,
51 ) -> Box<dyn DoubleEndedIterator<Item = IterGuardImpl> + Send + 'static> {
52 self.range::<&[u8], _>(.., seqno, index)
53 }
54
55 /// Returns an iterator over a prefixed set of items.
56 ///
57 /// Avoid using an empty prefix as it may scan a lot of items (unless limited).
58 fn prefix<K: AsRef<[u8]>>(
59 &self,
60 prefix: K,
61 seqno: SeqNo,
62 index: Option<Arc<Memtable>>,
63 ) -> Box<dyn DoubleEndedIterator<Item = IterGuardImpl> + Send + 'static>;
64
65 /// Returns an iterator over a range of items.
66 ///
67 /// Avoid using full or unbounded ranges as they may scan a lot of items (unless limited).
68 fn range<K: AsRef<[u8]>, R: RangeBounds<K>>(
69 &self,
70 range: R,
71 seqno: SeqNo,
72 index: Option<Arc<Memtable>>,
73 ) -> Box<dyn DoubleEndedIterator<Item = IterGuardImpl> + Send + 'static>;
74
75 /// Ingests a sorted stream of key-value pairs into the tree.
76 ///
77 /// Can only be called on a new fresh, empty tree.
78 ///
79 /// # Errors
80 ///
81 /// Will return `Err` if an IO error occurs.
82 ///
83 /// # Panics
84 ///
85 /// Panics if the tree is **not** initially empty.
86 ///
87 /// Will panic if the input iterator is not sorted in ascending order.
88 #[doc(hidden)]
89 fn ingest(
90 &self,
91 iter: impl Iterator<Item = (UserKey, UserValue)>,
92 seqno_generator: &SequenceNumberCounter,
93 visible_seqno: &SequenceNumberCounter,
94 ) -> crate::Result<()>;
95
96 /// Returns the approximate number of tombstones in the tree.
97 fn tombstone_count(&self) -> u64;
98
99 /// Returns the approximate number of weak tombstones (single deletes) in the tree.
100 fn weak_tombstone_count(&self) -> u64;
101
102 /// Returns the approximate number of values reclaimable once weak tombstones can be GC'd.
103 fn weak_tombstone_reclaimable_count(&self) -> u64;
104
105 // TODO: clear() with Nuke compaction strategy (write lock) -> drop_range(..)
106
107 /// Drops tables that are fully contained in a given range.
108 ///
109 /// Accepts any `RangeBounds`, including unbounded or exclusive endpoints.
110 /// If the normalized lower bound is greater than the upper bound, the
111 /// method returns without performing any work.
112 ///
113 /// # Errors
114 ///
115 /// Will return `Err` only if an IO error occurs during compaction.
116 fn drop_range<K: AsRef<[u8]>, R: RangeBounds<K>>(&self, range: R) -> crate::Result<()>;
117
118 /// Performs major compaction, blocking the caller until it's done.
119 ///
120 /// # Errors
121 ///
122 /// Will return `Err` if an IO error occurs.
123 fn major_compact(&self, target_size: u64, seqno_threshold: SeqNo) -> crate::Result<()>;
124
125 /// Returns the disk space used by stale blobs.
126 fn stale_blob_bytes(&self) -> u64 {
127 0
128 }
129
130 /// Gets the space usage of all filters in the tree.
131 ///
132 /// May not correspond to the actual memory size because filter blocks may be paged out.
133 fn filter_size(&self) -> usize;
134
135 /// Gets the memory usage of all pinned filters in the tree.
136 fn pinned_filter_size(&self) -> usize;
137
138 /// Gets the memory usage of all pinned index blocks in the tree.
139 fn pinned_block_index_size(&self) -> usize;
140
141 /// Gets the length of the version free list.
142 fn version_free_list_len(&self) -> usize;
143
144 /// Returns the metrics structure.
145 #[cfg(feature = "metrics")]
146 fn metrics(&self) -> &Arc<crate::Metrics>;
147
148 /// Synchronously flushes a memtable to a table.
149 ///
150 /// This method will not make the table immediately available,
151 /// use [`AbstractTree::register_tables`] for that.
152 ///
153 /// # Errors
154 ///
155 /// Will return `Err` if an IO error occurs.
156 #[warn(clippy::type_complexity)]
157 fn flush_memtable(
158 &self,
159 table_id: TableId, // TODO: remove?
160 memtable: &Arc<Memtable>,
161 seqno_threshold: SeqNo,
162 ) -> crate::Result<Option<(Table, Option<BlobFile>)>>;
163
164 /// Atomically registers flushed tables into the tree, removing their associated sealed memtables.
165 ///
166 /// # Errors
167 ///
168 /// Will return `Err` if an IO error occurs.
169 fn register_tables(
170 &self,
171 tables: &[Table],
172 blob_files: Option<&[BlobFile]>,
173 frag_map: Option<FragmentationMap>,
174 ) -> crate::Result<()>;
175
176 /// Clears the active memtable atomically.
177 fn clear_active_memtable(&self);
178
179 /// Sets the active memtable.
180 ///
181 /// May be used to restore the LSM-tree's in-memory state from a write-ahead log
182 /// after tree recovery.
183 fn set_active_memtable(&self, memtable: Memtable);
184
185 /// Returns the number of sealed memtables.
186 fn sealed_memtable_count(&self) -> usize;
187
188 /// Adds a sealed memtables.
189 ///
190 /// May be used to restore the LSM-tree's in-memory state from some journals.
191 fn add_sealed_memtable(&self, id: MemtableId, memtable: Arc<Memtable>);
192
193 /// Performs compaction on the tree's levels, blocking the caller until it's done.
194 ///
195 /// # Errors
196 ///
197 /// Will return `Err` if an IO error occurs.
198 fn compact(
199 &self,
200 strategy: Arc<dyn CompactionStrategy>,
201 seqno_threshold: SeqNo,
202 ) -> crate::Result<()>;
203
204 /// Returns the next table's ID.
205 fn get_next_table_id(&self) -> TableId;
206
207 /// Returns the tree config.
208 fn tree_config(&self) -> &Config;
209
210 /// Returns the highest sequence number.
211 fn get_highest_seqno(&self) -> Option<SeqNo> {
212 let memtable_seqno = self.get_highest_memtable_seqno();
213 let table_seqno = self.get_highest_persisted_seqno();
214 memtable_seqno.max(table_seqno)
215 }
216
217 /// Returns the approximate size of the active memtable in bytes.
218 ///
219 /// May be used to flush the memtable if it grows too large.
220 fn active_memtable_size(&self) -> u64;
221
222 /// Returns the tree type.
223 fn tree_type(&self) -> TreeType;
224
225 /// Seals the active memtable, and returns a reference to it.
226 fn rotate_memtable(&self) -> Option<(MemtableId, Arc<Memtable>)>;
227
228 /// Returns the number of tables currently in the tree.
229 fn table_count(&self) -> usize;
230
231 /// Returns the number of tables in `levels[idx]`.
232 ///
233 /// Returns `None` if the level does not exist (if idx >= 7).
234 fn level_table_count(&self, idx: usize) -> Option<usize>;
235
236 /// Returns the number of disjoint runs in L0.
237 ///
238 /// Can be used to determine whether to write stall.
239 fn l0_run_count(&self) -> usize;
240
241 /// Returns the number of blob files currently in the tree.
242 fn blob_file_count(&self) -> usize {
243 0
244 }
245
246 /// Approximates the number of items in the tree.
247 fn approximate_len(&self) -> usize;
248
249 /// Returns the disk space usage.
250 fn disk_space(&self) -> u64;
251
252 /// Returns the highest sequence number of the active memtable.
253 fn get_highest_memtable_seqno(&self) -> Option<SeqNo>;
254
255 /// Returns the highest sequence number that is flushed to disk.
256 fn get_highest_persisted_seqno(&self) -> Option<SeqNo>;
257
258 /// Scans the entire tree, returning the number of items.
259 ///
260 /// ###### Caution
261 ///
262 /// This operation scans the entire tree: O(n) complexity!
263 ///
264 /// Never, under any circumstances, use .`len()` == 0 to check
265 /// if the tree is empty, use [`Tree::is_empty`] instead.
266 ///
267 /// # Examples
268 ///
269 /// ```
270 /// # use lsm_tree::Error as TreeError;
271 /// use lsm_tree::{AbstractTree, Config, Tree};
272 ///
273 /// let folder = tempfile::tempdir()?;
274 /// let tree = Config::new(folder, Default::default()).open()?;
275 ///
276 /// assert_eq!(tree.len(0, None)?, 0);
277 /// tree.insert("1", "abc", 0);
278 /// tree.insert("3", "abc", 1);
279 /// tree.insert("5", "abc", 2);
280 /// assert_eq!(tree.len(3, None)?, 3);
281 /// #
282 /// # Ok::<(), TreeError>(())
283 /// ```
284 ///
285 /// # Errors
286 ///
287 /// Will return `Err` if an IO error occurs.
288 fn len(&self, seqno: SeqNo, index: Option<Arc<Memtable>>) -> crate::Result<usize> {
289 let mut count = 0;
290
291 for item in self.iter(seqno, index) {
292 let _ = item.key()?;
293 count += 1;
294 }
295
296 Ok(count)
297 }
298
299 /// Returns `true` if the tree is empty.
300 ///
301 /// This operation has O(log N) complexity.
302 ///
303 /// # Examples
304 ///
305 /// ```
306 /// # let folder = tempfile::tempdir()?;
307 /// use lsm_tree::{AbstractTree, Config, Tree};
308 ///
309 /// let tree = Config::new(folder, Default::default()).open()?;
310 /// assert!(tree.is_empty(0, None)?);
311 ///
312 /// tree.insert("a", "abc", 0);
313 /// assert!(!tree.is_empty(1, None)?);
314 /// #
315 /// # Ok::<(), lsm_tree::Error>(())
316 /// ```
317 ///
318 /// # Errors
319 ///
320 /// Will return `Err` if an IO error occurs.
321 fn is_empty(&self, seqno: SeqNo, index: Option<Arc<Memtable>>) -> crate::Result<bool> {
322 self.first_key_value(seqno, index).map(|x| x.is_none())
323 }
324
325 /// Returns the first key-value pair in the tree.
326 /// The key in this pair is the minimum key in the tree.
327 ///
328 /// # Examples
329 ///
330 /// ```
331 /// # use lsm_tree::Error as TreeError;
332 /// # use lsm_tree::{AbstractTree, Config, Tree};
333 /// #
334 /// # let folder = tempfile::tempdir()?;
335 /// let tree = Config::new(folder, Default::default()).open()?;
336 ///
337 /// tree.insert("1", "abc", 0);
338 /// tree.insert("3", "abc", 1);
339 /// tree.insert("5", "abc", 2);
340 ///
341 /// let (key, _) = tree.first_key_value(3, None)?.expect("item should exist");
342 /// assert_eq!(&*key, "1".as_bytes());
343 /// #
344 /// # Ok::<(), TreeError>(())
345 /// ```
346 ///
347 /// # Errors
348 ///
349 /// Will return `Err` if an IO error occurs.
350 fn first_key_value(
351 &self,
352 seqno: SeqNo,
353 index: Option<Arc<Memtable>>,
354 ) -> crate::Result<Option<KvPair>> {
355 self.iter(seqno, index)
356 .next()
357 .map(Guard::into_inner)
358 .transpose()
359 }
360
361 /// Returns the last key-value pair in the tree.
362 /// The key in this pair is the maximum key in the tree.
363 ///
364 /// # Examples
365 ///
366 /// ```
367 /// # use lsm_tree::Error as TreeError;
368 /// # use lsm_tree::{AbstractTree, Config, Tree};
369 /// #
370 /// # let folder = tempfile::tempdir()?;
371 /// # let tree = Config::new(folder, Default::default()).open()?;
372 /// #
373 /// tree.insert("1", "abc", 0);
374 /// tree.insert("3", "abc", 1);
375 /// tree.insert("5", "abc", 2);
376 ///
377 /// let (key, _) = tree.last_key_value(3, None)?.expect("item should exist");
378 /// assert_eq!(&*key, "5".as_bytes());
379 /// #
380 /// # Ok::<(), TreeError>(())
381 /// ```
382 ///
383 /// # Errors
384 ///
385 /// Will return `Err` if an IO error occurs.
386 fn last_key_value(
387 &self,
388 seqno: SeqNo,
389 index: Option<Arc<Memtable>>,
390 ) -> crate::Result<Option<KvPair>> {
391 self.iter(seqno, index)
392 .next_back()
393 .map(Guard::into_inner)
394 .transpose()
395 }
396
397 /// Returns the size of a value if it exists.
398 ///
399 /// # Examples
400 ///
401 /// ```
402 /// # let folder = tempfile::tempdir()?;
403 /// use lsm_tree::{AbstractTree, Config, Tree};
404 ///
405 /// let tree = Config::new(folder, Default::default()).open()?;
406 /// tree.insert("a", "my_value", 0);
407 ///
408 /// let size = tree.size_of("a", 1)?.unwrap_or_default();
409 /// assert_eq!("my_value".len() as u32, size);
410 ///
411 /// let size = tree.size_of("b", 1)?.unwrap_or_default();
412 /// assert_eq!(0, size);
413 /// #
414 /// # Ok::<(), lsm_tree::Error>(())
415 /// ```
416 ///
417 /// # Errors
418 ///
419 /// Will return `Err` if an IO error occurs.
420 fn size_of<K: AsRef<[u8]>>(&self, key: K, seqno: SeqNo) -> crate::Result<Option<u32>>;
421
422 /// Retrieves an item from the tree.
423 ///
424 /// # Examples
425 ///
426 /// ```
427 /// # let folder = tempfile::tempdir()?;
428 /// use lsm_tree::{AbstractTree, Config, Tree};
429 ///
430 /// let tree = Config::new(folder, Default::default()).open()?;
431 /// tree.insert("a", "my_value", 0);
432 ///
433 /// let item = tree.get("a", 1)?;
434 /// assert_eq!(Some("my_value".as_bytes().into()), item);
435 /// #
436 /// # Ok::<(), lsm_tree::Error>(())
437 /// ```
438 ///
439 /// # Errors
440 ///
441 /// Will return `Err` if an IO error occurs.
442 fn get<K: AsRef<[u8]>>(&self, key: K, seqno: SeqNo) -> crate::Result<Option<UserValue>>;
443
444 /// Returns `true` if the tree contains the specified key.
445 ///
446 /// # Examples
447 ///
448 /// ```
449 /// # let folder = tempfile::tempdir()?;
450 /// # use lsm_tree::{AbstractTree, Config, Tree};
451 /// #
452 /// let tree = Config::new(folder, Default::default()).open()?;
453 /// assert!(!tree.contains_key("a", 0)?);
454 ///
455 /// tree.insert("a", "abc", 0);
456 /// assert!(tree.contains_key("a", 1)?);
457 /// #
458 /// # Ok::<(), lsm_tree::Error>(())
459 /// ```
460 ///
461 /// # Errors
462 ///
463 /// Will return `Err` if an IO error occurs.
464 fn contains_key<K: AsRef<[u8]>>(&self, key: K, seqno: SeqNo) -> crate::Result<bool> {
465 self.get(key, seqno).map(|x| x.is_some())
466 }
467
468 /// Inserts a key-value pair into the tree.
469 ///
470 /// If the key already exists, the item will be overwritten.
471 ///
472 /// Returns the added item's size and new size of the memtable.
473 ///
474 /// # Examples
475 ///
476 /// ```
477 /// # let folder = tempfile::tempdir()?;
478 /// use lsm_tree::{AbstractTree, Config, Tree};
479 ///
480 /// let tree = Config::new(folder, Default::default()).open()?;
481 /// tree.insert("a", "abc", 0);
482 /// #
483 /// # Ok::<(), lsm_tree::Error>(())
484 /// ```
485 ///
486 /// # Errors
487 ///
488 /// Will return `Err` if an IO error occurs.
489 fn insert<K: Into<UserKey>, V: Into<UserValue>>(
490 &self,
491 key: K,
492 value: V,
493 seqno: SeqNo,
494 ) -> (u64, u64);
495
496 /// Removes an item from the tree.
497 ///
498 /// Returns the added item's size and new size of the memtable.
499 ///
500 /// # Examples
501 ///
502 /// ```
503 /// # let folder = tempfile::tempdir()?;
504 /// # use lsm_tree::{AbstractTree, Config, Tree};
505 /// #
506 /// # let tree = Config::new(folder, Default::default()).open()?;
507 /// tree.insert("a", "abc", 0);
508 ///
509 /// let item = tree.get("a", 1)?.expect("should have item");
510 /// assert_eq!("abc".as_bytes(), &*item);
511 ///
512 /// tree.remove("a", 1);
513 ///
514 /// let item = tree.get("a", 2)?;
515 /// assert_eq!(None, item);
516 /// #
517 /// # Ok::<(), lsm_tree::Error>(())
518 /// ```
519 ///
520 /// # Errors
521 ///
522 /// Will return `Err` if an IO error occurs.
523 fn remove<K: Into<UserKey>>(&self, key: K, seqno: SeqNo) -> (u64, u64);
524
525 /// Removes an item from the tree.
526 ///
527 /// The tombstone marker of this delete operation will vanish when it
528 /// collides with its corresponding insertion.
529 /// This may cause older versions of the value to be resurrected, so it should
530 /// only be used and preferred in scenarios where a key is only ever written once.
531 ///
532 /// Returns the added item's size and new size of the memtable.
533 ///
534 /// # Examples
535 ///
536 /// ```
537 /// # let folder = tempfile::tempdir()?;
538 /// # use lsm_tree::{AbstractTree, Config, Tree};
539 /// #
540 /// # let tree = Config::new(folder, Default::default()).open()?;
541 /// tree.insert("a", "abc", 0);
542 ///
543 /// let item = tree.get("a", 1)?.expect("should have item");
544 /// assert_eq!("abc".as_bytes(), &*item);
545 ///
546 /// tree.remove_weak("a", 1);
547 ///
548 /// let item = tree.get("a", 2)?;
549 /// assert_eq!(None, item);
550 /// #
551 /// # Ok::<(), lsm_tree::Error>(())
552 /// ```
553 ///
554 /// # Errors
555 ///
556 /// Will return `Err` if an IO error occurs.
557 #[doc(hidden)]
558 fn remove_weak<K: Into<UserKey>>(&self, key: K, seqno: SeqNo) -> (u64, u64);
559}