lsm_tree/abstract.rs
1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5use crate::{
6 compaction::CompactionStrategy, config::TreeType, iter_guard::IterGuardImpl, segment::Segment,
7 tree::inner::MemtableId, vlog::BlobFile, AnyTree, BlobTree, Config, Guard, KvPair, Memtable,
8 SegmentId, SeqNo, SequenceNumberCounter, Tree, UserKey, UserValue,
9};
10use enum_dispatch::enum_dispatch;
11use std::{
12 ops::RangeBounds,
13 sync::{Arc, RwLockWriteGuard},
14};
15
16pub type RangeItem = crate::Result<KvPair>;
17
18/// Generic Tree API
19#[allow(clippy::module_name_repetitions)]
20#[enum_dispatch]
21pub trait AbstractTree {
22 /// Returns an iterator that scans through the entire tree.
23 ///
24 /// Avoid using this function, or limit it as otherwise it may scan a lot of items.
25 fn iter(
26 &self,
27 seqno: SeqNo,
28 index: Option<Arc<Memtable>>,
29 ) -> Box<dyn DoubleEndedIterator<Item = IterGuardImpl<'_>> + '_> {
30 self.range::<&[u8], _>(.., seqno, index)
31 }
32
33 /// Returns an iterator over a prefixed set of items.
34 ///
35 /// Avoid using an empty prefix as it may scan a lot of items (unless limited).
36 fn prefix<K: AsRef<[u8]>>(
37 &self,
38 prefix: K,
39 seqno: SeqNo,
40 index: Option<Arc<Memtable>>,
41 ) -> Box<dyn DoubleEndedIterator<Item = IterGuardImpl<'_>> + '_>;
42
43 /// Returns an iterator over a range of items.
44 ///
45 /// Avoid using full or unbounded ranges as they may scan a lot of items (unless limited).
46 fn range<K: AsRef<[u8]>, R: RangeBounds<K>>(
47 &self,
48 range: R,
49 seqno: SeqNo,
50 index: Option<Arc<Memtable>>,
51 ) -> Box<dyn DoubleEndedIterator<Item = IterGuardImpl<'_>> + '_>;
52
53 /// Ingests a sorted stream of key-value pairs into the tree.
54 ///
55 /// Can only be called on a new fresh, empty tree.
56 ///
57 /// # Errors
58 ///
59 /// Will return `Err` if an IO error occurs.
60 ///
61 /// # Panics
62 ///
63 /// Panics if the tree is **not** initially empty.
64 ///
65 /// Will panic if the input iterator is not sorted in ascending order.
66 #[doc(hidden)]
67 fn ingest(
68 &self,
69 iter: impl Iterator<Item = (UserKey, UserValue)>,
70 seqno_generator: &SequenceNumberCounter,
71 visible_seqno: &SequenceNumberCounter,
72 ) -> crate::Result<()>;
73
74 /// Returns the approximate number of tombstones in the tree.
75 fn tombstone_count(&self) -> u64;
76
77 // TODO: clear() with Nuke compaction strategy (write lock) -> drop_range(..)
78
79 /// Drops segments that are fully contained in a given range.
80 ///
81 /// # Errors
82 ///
83 /// Will return `Err` if an IO error occurs.
84 fn drop_range(&self, key_range: crate::KeyRange) -> crate::Result<()>;
85
86 /// Performs major compaction, blocking the caller until it's done.
87 ///
88 /// # Errors
89 ///
90 /// Will return `Err` if an IO error occurs.
91 fn major_compact(&self, target_size: u64, seqno_threshold: SeqNo) -> crate::Result<()>;
92
93 /// Gets the space usage of all filters in the tree.
94 ///
95 /// May not correspond to the actual memory size because filter blocks may be paged out.
96 fn filter_size(&self) -> usize;
97
98 /// Gets the memory usage of all pinned filters in the tree.
99 fn pinned_filter_size(&self) -> usize;
100
101 /// Gets the memory usage of all pinned index blocks in the tree.
102 fn pinned_block_index_size(&self) -> usize;
103
104 /// Gets the length of the version free list.
105 fn version_free_list_len(&self) -> usize;
106
107 /// Returns the metrics structure.
108 #[cfg(feature = "metrics")]
109 fn metrics(&self) -> &Arc<crate::Metrics>;
110
111 // TODO:?
112 /* #[doc(hidden)]
113 fn verify(&self) -> crate::Result<usize>; */
114
115 /// Synchronously flushes a memtable to a disk segment.
116 ///
117 /// This method will not make the segment immediately available,
118 /// use [`AbstractTree::register_segments`] for that.
119 ///
120 /// # Errors
121 ///
122 /// Will return `Err` if an IO error occurs.
123 fn flush_memtable(
124 &self,
125 segment_id: SegmentId, // TODO: remove?
126 memtable: &Arc<Memtable>,
127 seqno_threshold: SeqNo,
128 ) -> crate::Result<Option<(Segment, Option<BlobFile>)>>;
129
130 /// Atomically registers flushed disk segments into the tree, removing their associated sealed memtables.
131 ///
132 /// # Errors
133 ///
134 /// Will return `Err` if an IO error occurs.
135 fn register_segments(
136 &self,
137 segments: &[Segment],
138 blob_files: Option<&[BlobFile]>,
139 seqno_threshold: SeqNo,
140 ) -> crate::Result<()>;
141
142 /// Write-locks the active memtable for exclusive access
143 fn lock_active_memtable(&self) -> RwLockWriteGuard<'_, Arc<Memtable>>;
144
145 /// Clears the active memtable atomically.
146 fn clear_active_memtable(&self);
147
148 /// Sets the active memtable.
149 ///
150 /// May be used to restore the LSM-tree's in-memory state from a write-ahead log
151 /// after tree recovery.
152 fn set_active_memtable(&self, memtable: Memtable);
153
154 /// Returns the number of sealed memtables.
155 fn sealed_memtable_count(&self) -> usize;
156
157 /// Adds a sealed memtables.
158 ///
159 /// May be used to restore the LSM-tree's in-memory state from some journals.
160 fn add_sealed_memtable(&self, id: MemtableId, memtable: Arc<Memtable>);
161
162 /// Performs compaction on the tree's levels, blocking the caller until it's done.
163 ///
164 /// # Errors
165 ///
166 /// Will return `Err` if an IO error occurs.
167 fn compact(
168 &self,
169 strategy: Arc<dyn CompactionStrategy>,
170 seqno_threshold: SeqNo,
171 ) -> crate::Result<()>;
172
173 /// Returns the next segment's ID.
174 fn get_next_segment_id(&self) -> SegmentId;
175
176 /// Returns the tree config.
177 fn tree_config(&self) -> &Config;
178
179 /// Returns the highest sequence number.
180 fn get_highest_seqno(&self) -> Option<SeqNo> {
181 let memtable_seqno = self.get_highest_memtable_seqno();
182 let segment_seqno = self.get_highest_persisted_seqno();
183 memtable_seqno.max(segment_seqno)
184 }
185
186 /// Returns the approximate size of the active memtable in bytes.
187 ///
188 /// May be used to flush the memtable if it grows too large.
189 fn active_memtable_size(&self) -> u64;
190
191 /// Returns the tree type.
192 fn tree_type(&self) -> TreeType;
193
194 /// Seals the active memtable, and returns a reference to it.
195 fn rotate_memtable(&self) -> Option<(MemtableId, Arc<Memtable>)>;
196
197 /// Returns the number of disk segments currently in the tree.
198 fn segment_count(&self) -> usize;
199
200 /// Returns the number of segments in `levels[idx]`.
201 ///
202 /// Returns `None` if the level does not exist (if idx >= 7).
203 fn level_segment_count(&self, idx: usize) -> Option<usize>;
204
205 /// Returns the number of disjoint runs in L0.
206 ///
207 /// Can be used to determine whether to write stall.
208 fn l0_run_count(&self) -> usize;
209
210 /// Returns the number of blob files currently in the tree.
211 fn blob_file_count(&self) -> usize {
212 0
213 }
214
215 /// Approximates the number of items in the tree.
216 fn approximate_len(&self) -> usize;
217
218 /// Returns the disk space usage.
219 fn disk_space(&self) -> u64;
220
221 /// Returns the highest sequence number of the active memtable.
222 fn get_highest_memtable_seqno(&self) -> Option<SeqNo>;
223
224 /// Returns the highest sequence number that is flushed to disk.
225 fn get_highest_persisted_seqno(&self) -> Option<SeqNo>;
226
227 /// Scans the entire tree, returning the number of items.
228 ///
229 /// ###### Caution
230 ///
231 /// This operation scans the entire tree: O(n) complexity!
232 ///
233 /// Never, under any circumstances, use .`len()` == 0 to check
234 /// if the tree is empty, use [`Tree::is_empty`] instead.
235 ///
236 /// # Examples
237 ///
238 /// ```
239 /// # use lsm_tree::Error as TreeError;
240 /// use lsm_tree::{AbstractTree, Config, Tree};
241 ///
242 /// let folder = tempfile::tempdir()?;
243 /// let tree = Config::new(folder).open()?;
244 ///
245 /// assert_eq!(tree.len(0, None)?, 0);
246 /// tree.insert("1", "abc", 0);
247 /// tree.insert("3", "abc", 1);
248 /// tree.insert("5", "abc", 2);
249 /// assert_eq!(tree.len(3, None)?, 3);
250 /// #
251 /// # Ok::<(), TreeError>(())
252 /// ```
253 ///
254 /// # Errors
255 ///
256 /// Will return `Err` if an IO error occurs.
257 fn len(&self, seqno: SeqNo, index: Option<Arc<Memtable>>) -> crate::Result<usize> {
258 let mut count = 0;
259
260 for item in self.iter(seqno, index) {
261 let _ = item.key()?;
262 count += 1;
263 }
264
265 Ok(count)
266 }
267
268 /// Returns `true` if the tree is empty.
269 ///
270 /// This operation has O(log N) complexity.
271 ///
272 /// # Examples
273 ///
274 /// ```
275 /// # let folder = tempfile::tempdir()?;
276 /// use lsm_tree::{AbstractTree, Config, Tree};
277 ///
278 /// let tree = Config::new(folder).open()?;
279 /// assert!(tree.is_empty(0, None)?);
280 ///
281 /// tree.insert("a", "abc", 0);
282 /// assert!(!tree.is_empty(1, None)?);
283 /// #
284 /// # Ok::<(), lsm_tree::Error>(())
285 /// ```
286 ///
287 /// # Errors
288 ///
289 /// Will return `Err` if an IO error occurs.
290 fn is_empty(&self, seqno: SeqNo, index: Option<Arc<Memtable>>) -> crate::Result<bool> {
291 self.first_key_value(seqno, index).map(|x| x.is_none())
292 }
293
294 /// Returns the first key-value pair in the tree.
295 /// The key in this pair is the minimum key in the tree.
296 ///
297 /// # Examples
298 ///
299 /// ```
300 /// # use lsm_tree::Error as TreeError;
301 /// # use lsm_tree::{AbstractTree, Config, Tree};
302 /// #
303 /// # let folder = tempfile::tempdir()?;
304 /// let tree = Config::new(folder).open()?;
305 ///
306 /// tree.insert("1", "abc", 0);
307 /// tree.insert("3", "abc", 1);
308 /// tree.insert("5", "abc", 2);
309 ///
310 /// let (key, _) = tree.first_key_value(3, None)?.expect("item should exist");
311 /// assert_eq!(&*key, "1".as_bytes());
312 /// #
313 /// # Ok::<(), TreeError>(())
314 /// ```
315 ///
316 /// # Errors
317 ///
318 /// Will return `Err` if an IO error occurs.
319 fn first_key_value(
320 &self,
321 seqno: SeqNo,
322 index: Option<Arc<Memtable>>,
323 ) -> crate::Result<Option<KvPair>> {
324 self.iter(seqno, index)
325 .next()
326 .map(Guard::into_inner)
327 .transpose()
328 }
329
330 /// Returns the last key-value pair in the tree.
331 /// The key in this pair is the maximum key in the tree.
332 ///
333 /// # Examples
334 ///
335 /// ```
336 /// # use lsm_tree::Error as TreeError;
337 /// # use lsm_tree::{AbstractTree, Config, Tree};
338 /// #
339 /// # let folder = tempfile::tempdir()?;
340 /// # let tree = Config::new(folder).open()?;
341 /// #
342 /// tree.insert("1", "abc", 0);
343 /// tree.insert("3", "abc", 1);
344 /// tree.insert("5", "abc", 2);
345 ///
346 /// let (key, _) = tree.last_key_value(3, None)?.expect("item should exist");
347 /// assert_eq!(&*key, "5".as_bytes());
348 /// #
349 /// # Ok::<(), TreeError>(())
350 /// ```
351 ///
352 /// # Errors
353 ///
354 /// Will return `Err` if an IO error occurs.
355 fn last_key_value(
356 &self,
357 seqno: SeqNo,
358 index: Option<Arc<Memtable>>,
359 ) -> crate::Result<Option<KvPair>> {
360 self.iter(seqno, index)
361 .next_back()
362 .map(Guard::into_inner)
363 .transpose()
364 }
365
366 /// Returns the size of a value if it exists.
367 ///
368 /// # Examples
369 ///
370 /// ```
371 /// # let folder = tempfile::tempdir()?;
372 /// use lsm_tree::{AbstractTree, Config, Tree};
373 ///
374 /// let tree = Config::new(folder).open()?;
375 /// tree.insert("a", "my_value", 0);
376 ///
377 /// let size = tree.size_of("a", 1)?.unwrap_or_default();
378 /// assert_eq!("my_value".len() as u32, size);
379 ///
380 /// let size = tree.size_of("b", 1)?.unwrap_or_default();
381 /// assert_eq!(0, size);
382 /// #
383 /// # Ok::<(), lsm_tree::Error>(())
384 /// ```
385 ///
386 /// # Errors
387 ///
388 /// Will return `Err` if an IO error occurs.
389 fn size_of<K: AsRef<[u8]>>(&self, key: K, seqno: SeqNo) -> crate::Result<Option<u32>>;
390
391 /// Retrieves an item from the tree.
392 ///
393 /// # Examples
394 ///
395 /// ```
396 /// # let folder = tempfile::tempdir()?;
397 /// use lsm_tree::{AbstractTree, Config, Tree};
398 ///
399 /// let tree = Config::new(folder).open()?;
400 /// tree.insert("a", "my_value", 0);
401 ///
402 /// let item = tree.get("a", 1)?;
403 /// assert_eq!(Some("my_value".as_bytes().into()), item);
404 /// #
405 /// # Ok::<(), lsm_tree::Error>(())
406 /// ```
407 ///
408 /// # Errors
409 ///
410 /// Will return `Err` if an IO error occurs.
411 fn get<K: AsRef<[u8]>>(&self, key: K, seqno: SeqNo) -> crate::Result<Option<UserValue>>;
412
413 /// Returns `true` if the tree contains the specified key.
414 ///
415 /// # Examples
416 ///
417 /// ```
418 /// # let folder = tempfile::tempdir()?;
419 /// # use lsm_tree::{AbstractTree, Config, Tree};
420 /// #
421 /// let tree = Config::new(folder).open()?;
422 /// assert!(!tree.contains_key("a", 0)?);
423 ///
424 /// tree.insert("a", "abc", 0);
425 /// assert!(tree.contains_key("a", 1)?);
426 /// #
427 /// # Ok::<(), lsm_tree::Error>(())
428 /// ```
429 ///
430 /// # Errors
431 ///
432 /// Will return `Err` if an IO error occurs.
433 fn contains_key<K: AsRef<[u8]>>(&self, key: K, seqno: SeqNo) -> crate::Result<bool> {
434 self.get(key, seqno).map(|x| x.is_some())
435 }
436
437 /// Inserts a key-value pair into the tree.
438 ///
439 /// If the key already exists, the item will be overwritten.
440 ///
441 /// Returns the added item's size and new size of the memtable.
442 ///
443 /// # Examples
444 ///
445 /// ```
446 /// # let folder = tempfile::tempdir()?;
447 /// use lsm_tree::{AbstractTree, Config, Tree};
448 ///
449 /// let tree = Config::new(folder).open()?;
450 /// tree.insert("a", "abc", 0);
451 /// #
452 /// # Ok::<(), lsm_tree::Error>(())
453 /// ```
454 ///
455 /// # Errors
456 ///
457 /// Will return `Err` if an IO error occurs.
458 fn insert<K: Into<UserKey>, V: Into<UserValue>>(
459 &self,
460 key: K,
461 value: V,
462 seqno: SeqNo,
463 ) -> (u64, u64);
464
465 /// Removes an item from the tree.
466 ///
467 /// Returns the added item's size and new size of the memtable.
468 ///
469 /// # Examples
470 ///
471 /// ```
472 /// # let folder = tempfile::tempdir()?;
473 /// # use lsm_tree::{AbstractTree, Config, Tree};
474 /// #
475 /// # let tree = Config::new(folder).open()?;
476 /// tree.insert("a", "abc", 0);
477 ///
478 /// let item = tree.get("a", 1)?.expect("should have item");
479 /// assert_eq!("abc".as_bytes(), &*item);
480 ///
481 /// tree.remove("a", 1);
482 ///
483 /// let item = tree.get("a", 2)?;
484 /// assert_eq!(None, item);
485 /// #
486 /// # Ok::<(), lsm_tree::Error>(())
487 /// ```
488 ///
489 /// # Errors
490 ///
491 /// Will return `Err` if an IO error occurs.
492 fn remove<K: Into<UserKey>>(&self, key: K, seqno: SeqNo) -> (u64, u64);
493
494 /// Removes an item from the tree.
495 ///
496 /// The tombstone marker of this delete operation will vanish when it
497 /// collides with its corresponding insertion.
498 /// This may cause older versions of the value to be resurrected, so it should
499 /// only be used and preferred in scenarios where a key is only ever written once.
500 ///
501 /// Returns the added item's size and new size of the memtable.
502 ///
503 /// # Examples
504 ///
505 /// ```
506 /// # let folder = tempfile::tempdir()?;
507 /// # use lsm_tree::{AbstractTree, Config, Tree};
508 /// #
509 /// # let tree = Config::new(folder).open()?;
510 /// tree.insert("a", "abc", 0);
511 ///
512 /// let item = tree.get("a", 1)?.expect("should have item");
513 /// assert_eq!("abc".as_bytes(), &*item);
514 ///
515 /// tree.remove_weak("a", 1);
516 ///
517 /// let item = tree.get("a", 2)?;
518 /// assert_eq!(None, item);
519 /// #
520 /// # Ok::<(), lsm_tree::Error>(())
521 /// ```
522 ///
523 /// # Errors
524 ///
525 /// Will return `Err` if an IO error occurs.
526 #[doc(hidden)]
527 fn remove_weak<K: Into<UserKey>>(&self, key: K, seqno: SeqNo) -> (u64, u64);
528}