lsm_tree/abstract.rs
1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5use crate::{
6 compaction::CompactionStrategy, config::TreeType, tree::inner::MemtableId, AnyTree, BlobTree,
7 Config, KvPair, Memtable, Segment, SegmentId, SeqNo, Snapshot, Tree, UserKey, UserValue,
8};
9use enum_dispatch::enum_dispatch;
10use std::{
11 ops::RangeBounds,
12 sync::{Arc, RwLockWriteGuard},
13};
14
15pub type RangeItem = crate::Result<KvPair>;
16
17/// Generic Tree API
18#[allow(clippy::module_name_repetitions)]
19#[enum_dispatch]
20pub trait AbstractTree {
21 /// Ingests a sorted stream of key-value pairs into the tree.
22 ///
23 /// Can only be called on a new fresh, empty tree.
24 ///
25 /// # Errors
26 ///
27 /// Will return `Err` if an IO error occurs.
28 ///
29 /// # Panics
30 ///
31 /// Panics if the tree is **not** initially empty.
32 ///
33 /// Will panic if the input iterator is not sorted in ascending order.
34 #[doc(hidden)]
35 fn ingest(&self, iter: impl Iterator<Item = (UserKey, UserValue)>) -> crate::Result<()>;
36
37 /// Performs major compaction, blocking the caller until it's done.
38 ///
39 /// # Errors
40 ///
41 /// Will return `Err` if an IO error occurs.
42 fn major_compact(&self, target_size: u64, seqno_threshold: SeqNo) -> crate::Result<()>;
43
44 /// Gets the memory usage of all bloom filters in the tree.
45 fn bloom_filter_size(&self) -> usize;
46
47 #[doc(hidden)]
48 fn verify(&self) -> crate::Result<usize>;
49
50 /// Synchronously flushes a memtable to a disk segment.
51 ///
52 /// This method will not make the segment immediately available,
53 /// use [`AbstractTree::register_segments`] for that.
54 ///
55 /// # Errors
56 ///
57 /// Will return `Err` if an IO error occurs.
58 fn flush_memtable(
59 &self,
60 segment_id: SegmentId,
61 memtable: &Arc<Memtable>,
62 seqno_threshold: SeqNo,
63 ) -> crate::Result<Option<Segment>>;
64
65 /// Atomically registers flushed disk segments into the tree, removing their associated sealed memtables.
66 ///
67 /// # Errors
68 ///
69 /// Will return `Err` if an IO error occurs.
70 fn register_segments(&self, segments: &[Segment]) -> crate::Result<()>;
71
72 /// Write-locks the active memtable for exclusive access
73 fn lock_active_memtable(&self) -> RwLockWriteGuard<'_, Arc<Memtable>>;
74
75 /// Clears the active memtable atomically.
76 fn clear_active_memtable(&self);
77
78 /// Sets the active memtable.
79 ///
80 /// May be used to restore the LSM-tree's in-memory state from a write-ahead log
81 /// after tree recovery.
82 fn set_active_memtable(&self, memtable: Memtable);
83
84 /// Returns the amount of sealed memtables.
85 fn sealed_memtable_count(&self) -> usize;
86
87 /// Adds a sealed memtables.
88 ///
89 /// May be used to restore the LSM-tree's in-memory state from some journals.
90 fn add_sealed_memtable(&self, id: MemtableId, memtable: Arc<Memtable>);
91
92 /// Performs compaction on the tree's levels, blocking the caller until it's done.
93 ///
94 /// # Errors
95 ///
96 /// Will return `Err` if an IO error occurs.
97 fn compact(
98 &self,
99 strategy: Arc<dyn CompactionStrategy>,
100 seqno_threshold: SeqNo,
101 ) -> crate::Result<()>;
102
103 /// Returns the next segment's ID.
104 fn get_next_segment_id(&self) -> SegmentId;
105
106 /// Returns the tree config.
107 fn tree_config(&self) -> &Config;
108
109 /// Returns the highest sequence number.
110 fn get_highest_seqno(&self) -> Option<SeqNo> {
111 let memtable_seqno = self.get_highest_memtable_seqno();
112 let segment_seqno = self.get_highest_persisted_seqno();
113 memtable_seqno.max(segment_seqno)
114 }
115
116 /// Returns the approximate size of the active memtable in bytes.
117 ///
118 /// May be used to flush the memtable if it grows too large.
119 fn active_memtable_size(&self) -> u32;
120
121 /// Returns the tree type.
122 fn tree_type(&self) -> TreeType;
123
124 /// Seals the active memtable, and returns a reference to it.
125 fn rotate_memtable(&self) -> Option<(MemtableId, Arc<Memtable>)>;
126
127 /// Returns the amount of disk segments currently in the tree.
128 fn segment_count(&self) -> usize;
129
130 /// Returns the amount of segments in levels[idx].
131 ///
132 /// Returns `None` if the level does not exist (if idx >= 7).
133 fn level_segment_count(&self, idx: usize) -> Option<usize>;
134
135 /// Returns the amount of disjoint runs in L0.
136 ///
137 /// Can be used to determine whether to write stall.
138 fn l0_run_count(&self) -> usize;
139
140 /// Returns the amount of blob files currently in the tree.
141 fn blob_file_count(&self) -> usize {
142 0
143 }
144
145 /// Approximates the amount of items in the tree.
146 fn approximate_len(&self) -> usize;
147
148 /// Returns the disk space usage.
149 fn disk_space(&self) -> u64;
150
151 /// Returns the highest sequence number of the active memtable.
152 fn get_highest_memtable_seqno(&self) -> Option<SeqNo>;
153
154 /// Returns the highest sequence number that is flushed to disk.
155 fn get_highest_persisted_seqno(&self) -> Option<SeqNo>;
156
157 /// Scans the entire tree, returning the amount of items.
158 ///
159 /// ###### Caution
160 ///
161 /// This operation scans the entire tree: O(n) complexity!
162 ///
163 /// Never, under any circumstances, use .`len()` == 0 to check
164 /// if the tree is empty, use [`Tree::is_empty`] instead.
165 ///
166 /// # Examples
167 ///
168 /// ```
169 /// # use lsm_tree::Error as TreeError;
170 /// use lsm_tree::{AbstractTree, Config, Tree};
171 ///
172 /// let folder = tempfile::tempdir()?;
173 /// let tree = Config::new(folder).open()?;
174 ///
175 /// assert_eq!(tree.len(None, None)?, 0);
176 /// tree.insert("1", "abc", 0);
177 /// tree.insert("3", "abc", 1);
178 /// tree.insert("5", "abc", 2);
179 /// assert_eq!(tree.len(None, None)?, 3);
180 /// #
181 /// # Ok::<(), TreeError>(())
182 /// ```
183 ///
184 /// # Errors
185 ///
186 /// Will return `Err` if an IO error occurs.
187 fn len(&self, seqno: Option<SeqNo>, index: Option<Arc<Memtable>>) -> crate::Result<usize> {
188 let mut count = 0;
189
190 for item in self.iter(seqno, index) {
191 let _ = item?;
192 count += 1;
193 }
194
195 Ok(count)
196 }
197
198 /// Returns `true` if the tree is empty.
199 ///
200 /// This operation has O(1) complexity.
201 ///
202 /// # Examples
203 ///
204 /// ```
205 /// # let folder = tempfile::tempdir()?;
206 /// use lsm_tree::{AbstractTree, Config, Tree};
207 ///
208 /// let tree = Config::new(folder).open()?;
209 /// assert!(tree.is_empty(None, None)?);
210 ///
211 /// tree.insert("a", "abc", 0);
212 /// assert!(!tree.is_empty(None, None)?);
213 /// #
214 /// # Ok::<(), lsm_tree::Error>(())
215 /// ```
216 ///
217 /// # Errors
218 ///
219 /// Will return `Err` if an IO error occurs.
220 fn is_empty(&self, seqno: Option<SeqNo>, index: Option<Arc<Memtable>>) -> crate::Result<bool> {
221 self.first_key_value(seqno, index).map(|x| x.is_none())
222 }
223
224 /// Returns the first key-value pair in the tree.
225 /// The key in this pair is the minimum key in the tree.
226 ///
227 /// # Examples
228 ///
229 /// ```
230 /// # use lsm_tree::Error as TreeError;
231 /// # use lsm_tree::{AbstractTree, Config, Tree};
232 /// #
233 /// # let folder = tempfile::tempdir()?;
234 /// let tree = Config::new(folder).open()?;
235 ///
236 /// tree.insert("1", "abc", 0);
237 /// tree.insert("3", "abc", 1);
238 /// tree.insert("5", "abc", 2);
239 ///
240 /// let (key, _) = tree.first_key_value(None, None)?.expect("item should exist");
241 /// assert_eq!(&*key, "1".as_bytes());
242 /// #
243 /// # Ok::<(), TreeError>(())
244 /// ```
245 ///
246 /// # Errors
247 ///
248 /// Will return `Err` if an IO error occurs.
249 fn first_key_value(
250 &self,
251 seqno: Option<SeqNo>,
252 index: Option<Arc<Memtable>>,
253 ) -> crate::Result<Option<KvPair>> {
254 self.iter(seqno, index).next().transpose()
255 }
256
257 /// Returns the last key-value pair in the tree.
258 /// The key in this pair is the maximum key in the tree.
259 ///
260 /// # Examples
261 ///
262 /// ```
263 /// # use lsm_tree::Error as TreeError;
264 /// # use lsm_tree::{AbstractTree, Config, Tree};
265 /// #
266 /// # let folder = tempfile::tempdir()?;
267 /// # let tree = Config::new(folder).open()?;
268 /// #
269 /// tree.insert("1", "abc", 0);
270 /// tree.insert("3", "abc", 1);
271 /// tree.insert("5", "abc", 2);
272 ///
273 /// let (key, _) = tree.last_key_value(None, None)?.expect("item should exist");
274 /// assert_eq!(&*key, "5".as_bytes());
275 /// #
276 /// # Ok::<(), TreeError>(())
277 /// ```
278 ///
279 /// # Errors
280 ///
281 /// Will return `Err` if an IO error occurs.
282 fn last_key_value(
283 &self,
284 seqno: Option<SeqNo>,
285 index: Option<Arc<Memtable>>,
286 ) -> crate::Result<Option<KvPair>> {
287 self.iter(seqno, index).next_back().transpose()
288 }
289
290 /// Returns an iterator that scans through the entire tree.
291 ///
292 /// Avoid using this function, or limit it as otherwise it may scan a lot of items.
293 ///
294 /// # Examples
295 ///
296 /// ```
297 /// # let folder = tempfile::tempdir()?;
298 /// use lsm_tree::{AbstractTree, Config, Tree};
299 ///
300 /// let tree = Config::new(folder).open()?;
301 ///
302 /// tree.insert("a", "abc", 0);
303 /// tree.insert("f", "abc", 1);
304 /// tree.insert("g", "abc", 2);
305 /// assert_eq!(3, tree.iter(None, None).count());
306 /// #
307 /// # Ok::<(), lsm_tree::Error>(())
308 /// ```
309 fn iter(
310 &self,
311 seqno: Option<SeqNo>,
312 index: Option<Arc<Memtable>>,
313 ) -> Box<dyn DoubleEndedIterator<Item = crate::Result<KvPair>> + 'static> {
314 self.range::<&[u8], _>(.., seqno, index)
315 }
316
317 /// Returns an iterator that scans through the entire tree, returning keys only.
318 ///
319 /// Avoid using this function, or limit it as otherwise it may scan a lot of items.
320 ///
321 /// # Examples
322 ///
323 /// ```
324 /// # let folder = tempfile::tempdir()?;
325 /// use lsm_tree::{AbstractTree, Config, Tree};
326 ///
327 /// let tree = Config::new(folder).open()?;
328 ///
329 /// tree.insert("a", "abc", 0);
330 /// tree.insert("f", "abc", 1);
331 /// tree.insert("g", "abc", 2);
332 /// assert_eq!(3, tree.keys(None, None).count());
333 /// #
334 /// # Ok::<(), lsm_tree::Error>(())
335 /// ```
336 fn keys(
337 &self,
338 seqno: Option<SeqNo>,
339 index: Option<Arc<Memtable>>,
340 ) -> Box<dyn DoubleEndedIterator<Item = crate::Result<UserKey>> + 'static>;
341
342 /// Returns an iterator that scans through the entire tree, returning values only.
343 ///
344 /// Avoid using this function, or limit it as otherwise it may scan a lot of items.
345 ///
346 /// # Examples
347 ///
348 /// ```
349 /// # let folder = tempfile::tempdir()?;
350 /// use lsm_tree::{AbstractTree, Config, Tree};
351 ///
352 /// let tree = Config::new(folder).open()?;
353 ///
354 /// tree.insert("a", "abc", 0);
355 /// tree.insert("f", "abc", 1);
356 /// tree.insert("g", "abc", 2);
357 /// assert_eq!(3, tree.values(None, None).count());
358 /// #
359 /// # Ok::<(), lsm_tree::Error>(())
360 /// ```
361 fn values(
362 &self,
363 seqno: Option<SeqNo>,
364 index: Option<Arc<Memtable>>,
365 ) -> Box<dyn DoubleEndedIterator<Item = crate::Result<UserValue>> + 'static>;
366
367 /// Returns an iterator over a range of items.
368 ///
369 /// Avoid using full or unbounded ranges as they may scan a lot of items (unless limited).
370 ///
371 /// # Examples
372 ///
373 /// ```
374 /// # let folder = tempfile::tempdir()?;
375 /// use lsm_tree::{AbstractTree, Config, Tree};
376 ///
377 /// let tree = Config::new(folder).open()?;
378 ///
379 /// tree.insert("a", "abc", 0);
380 /// tree.insert("f", "abc", 1);
381 /// tree.insert("g", "abc", 2);
382 /// assert_eq!(2, tree.range("a"..="f", None, None).into_iter().count());
383 /// #
384 /// # Ok::<(), lsm_tree::Error>(())
385 /// ```
386 fn range<K: AsRef<[u8]>, R: RangeBounds<K>>(
387 &self,
388 range: R,
389 seqno: Option<SeqNo>,
390 index: Option<Arc<Memtable>>,
391 ) -> Box<dyn DoubleEndedIterator<Item = crate::Result<KvPair>> + 'static>;
392
393 /// Returns an iterator over a prefixed set of items.
394 ///
395 /// Avoid using an empty prefix as it may scan a lot of items (unless limited).
396 ///
397 /// # Examples
398 ///
399 /// ```
400 /// # let folder = tempfile::tempdir()?;
401 /// use lsm_tree::{AbstractTree, Config, Tree};
402 ///
403 /// let tree = Config::new(folder).open()?;
404 ///
405 /// tree.insert("a", "abc", 0);
406 /// tree.insert("ab", "abc", 1);
407 /// tree.insert("abc", "abc", 2);
408 /// assert_eq!(2, tree.prefix("ab", None, None).count());
409 /// #
410 /// # Ok::<(), lsm_tree::Error>(())
411 /// ```
412 fn prefix<K: AsRef<[u8]>>(
413 &self,
414 prefix: K,
415 seqno: Option<SeqNo>,
416 index: Option<Arc<Memtable>>,
417 ) -> Box<dyn DoubleEndedIterator<Item = crate::Result<KvPair>> + 'static>;
418
419 /// Returns the size of a value if it exists.
420 ///
421 /// # Examples
422 ///
423 /// ```
424 /// # let folder = tempfile::tempdir()?;
425 /// use lsm_tree::{AbstractTree, Config, Tree};
426 ///
427 /// let tree = Config::new(folder).open()?;
428 /// tree.insert("a", "my_value", 0);
429 ///
430 /// let size = tree.size_of("a", None)?.unwrap_or_default();
431 /// assert_eq!("my_value".len() as u32, size);
432 ///
433 /// let size = tree.size_of("b", None)?.unwrap_or_default();
434 /// assert_eq!(0, size);
435 /// #
436 /// # Ok::<(), lsm_tree::Error>(())
437 /// ```
438 ///
439 /// # Errors
440 ///
441 /// Will return `Err` if an IO error occurs.
442 fn size_of<K: AsRef<[u8]>>(&self, key: K, seqno: Option<SeqNo>) -> crate::Result<Option<u32>>;
443
444 /// Retrieves an item from the tree.
445 ///
446 /// # Examples
447 ///
448 /// ```
449 /// # let folder = tempfile::tempdir()?;
450 /// use lsm_tree::{AbstractTree, Config, Tree};
451 ///
452 /// let tree = Config::new(folder).open()?;
453 /// tree.insert("a", "my_value", 0);
454 ///
455 /// let item = tree.get("a", None)?;
456 /// assert_eq!(Some("my_value".as_bytes().into()), item);
457 /// #
458 /// # Ok::<(), lsm_tree::Error>(())
459 /// ```
460 ///
461 /// # Errors
462 ///
463 /// Will return `Err` if an IO error occurs.
464 fn get<K: AsRef<[u8]>>(&self, key: K, seqno: Option<SeqNo>)
465 -> crate::Result<Option<UserValue>>;
466
467 /// Opens a read-only point-in-time snapshot of the tree
468 ///
469 /// Dropping the snapshot will close the snapshot
470 ///
471 /// # Examples
472 ///
473 /// ```
474 /// # let folder = tempfile::tempdir()?;
475 /// use lsm_tree::{AbstractTree, Config, Tree};
476 ///
477 /// let tree = Config::new(folder).open()?;
478 ///
479 /// tree.insert("a", "abc", 0);
480 ///
481 /// let snapshot = tree.snapshot(1);
482 /// assert_eq!(snapshot.len()?, tree.len(None, None)?);
483 ///
484 /// tree.insert("b", "abc", 1);
485 ///
486 /// assert_eq!(2, tree.len(None, None)?);
487 /// assert_eq!(1, snapshot.len()?);
488 ///
489 /// assert!(snapshot.contains_key("a")?);
490 /// assert!(!snapshot.contains_key("b")?);
491 /// #
492 /// # Ok::<(), lsm_tree::Error>(())
493 /// ```
494 fn snapshot(&self, seqno: SeqNo) -> Snapshot;
495
496 /// Opens a snapshot of this partition with a given sequence number
497 fn snapshot_at(&self, seqno: SeqNo) -> Snapshot {
498 self.snapshot(seqno)
499 }
500
501 /// Returns `true` if the tree contains the specified key.
502 ///
503 /// # Examples
504 ///
505 /// ```
506 /// # let folder = tempfile::tempdir()?;
507 /// # use lsm_tree::{AbstractTree, Config, Tree};
508 /// #
509 /// let tree = Config::new(folder).open()?;
510 /// assert!(!tree.contains_key("a", None)?);
511 ///
512 /// tree.insert("a", "abc", 0);
513 /// assert!(tree.contains_key("a", None)?);
514 /// #
515 /// # Ok::<(), lsm_tree::Error>(())
516 /// ```
517 ///
518 /// # Errors
519 ///
520 /// Will return `Err` if an IO error occurs.
521 fn contains_key<K: AsRef<[u8]>>(&self, key: K, seqno: Option<SeqNo>) -> crate::Result<bool> {
522 self.get(key, seqno).map(|x| x.is_some())
523 }
524
525 /// Inserts a key-value pair into the tree.
526 ///
527 /// If the key already exists, the item will be overwritten.
528 ///
529 /// Returns the added item's size and new size of the memtable.
530 ///
531 /// # Examples
532 ///
533 /// ```
534 /// # let folder = tempfile::tempdir()?;
535 /// use lsm_tree::{AbstractTree, Config, Tree};
536 ///
537 /// let tree = Config::new(folder).open()?;
538 /// tree.insert("a", "abc", 0);
539 /// #
540 /// # Ok::<(), lsm_tree::Error>(())
541 /// ```
542 ///
543 /// # Errors
544 ///
545 /// Will return `Err` if an IO error occurs.
546 fn insert<K: Into<UserKey>, V: Into<UserValue>>(
547 &self,
548 key: K,
549 value: V,
550 seqno: SeqNo,
551 ) -> (u32, u32);
552
553 /// Removes an item from the tree.
554 ///
555 /// Returns the added item's size and new size of the memtable.
556 ///
557 /// # Examples
558 ///
559 /// ```
560 /// # let folder = tempfile::tempdir()?;
561 /// # use lsm_tree::{AbstractTree, Config, Tree};
562 /// #
563 /// # let tree = Config::new(folder).open()?;
564 /// tree.insert("a", "abc", 0);
565 ///
566 /// let item = tree.get("a", None)?.expect("should have item");
567 /// assert_eq!("abc".as_bytes(), &*item);
568 ///
569 /// tree.remove("a", 1);
570 ///
571 /// let item = tree.get("a", None)?;
572 /// assert_eq!(None, item);
573 /// #
574 /// # Ok::<(), lsm_tree::Error>(())
575 /// ```
576 ///
577 /// # Errors
578 ///
579 /// Will return `Err` if an IO error occurs.
580 fn remove<K: Into<UserKey>>(&self, key: K, seqno: SeqNo) -> (u32, u32);
581
582 /// Removes an item from the tree.
583 ///
584 /// The tombstone marker of this delete operation will vanish when it
585 /// collides with its corresponding insertion.
586 /// This may cause older versions of the value to be resurrected, so it should
587 /// only be used and preferred in scenarios where a key is only ever written once.
588 ///
589 /// Returns the added item's size and new size of the memtable.
590 ///
591 /// # Examples
592 ///
593 /// ```
594 /// # let folder = tempfile::tempdir()?;
595 /// # use lsm_tree::{AbstractTree, Config, Tree};
596 /// #
597 /// # let tree = Config::new(folder).open()?;
598 /// tree.insert("a", "abc", 0);
599 ///
600 /// let item = tree.get("a", None)?.expect("should have item");
601 /// assert_eq!("abc".as_bytes(), &*item);
602 ///
603 /// tree.remove_weak("a", 1);
604 ///
605 /// let item = tree.get("a", None)?;
606 /// assert_eq!(None, item);
607 /// #
608 /// # Ok::<(), lsm_tree::Error>(())
609 /// ```
610 ///
611 /// # Errors
612 ///
613 /// Will return `Err` if an IO error occurs.
614 fn remove_weak<K: Into<UserKey>>(&self, key: K, seqno: SeqNo) -> (u32, u32);
615}