Skip to main content

lsm_tree/
abstract_tree.rs

1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5use crate::{
6    iter_guard::IterGuardImpl, table::Table, version::Version, vlog::BlobFile, AnyTree, BlobTree,
7    Config, Guard, InternalValue, KvPair, Memtable, SeqNo, TableId, Tree, UserKey, UserValue,
8};
9use std::{
10    ops::RangeBounds,
11    sync::{Arc, MutexGuard, RwLockWriteGuard},
12};
13
14pub type RangeItem = crate::Result<KvPair>;
15
16type FlushToTablesResult = (Vec<Table>, Option<Vec<BlobFile>>);
17
18// Sealed on purpose: this trait is still public as a consumer-side bound
19// (`&impl AbstractTree`), but external implementations are no longer part of
20// the supported extension surface. Internal flush/version hooks keep evolving
21// with crate-owned tree types and must not create downstream semver traps.
22//
23// `sealed` stays `pub` only so sibling modules in this crate can write
24// `crate::abstract_tree::sealed::Sealed` in their impls. The parent module
25// `abstract_tree` is not publicly exported from the crate root, so downstream
26// crates still cannot name or implement this trait.
27pub mod sealed {
28    pub trait Sealed {}
29}
30
31/// Generic Tree API
32#[enum_dispatch::enum_dispatch]
33pub trait AbstractTree: sealed::Sealed {
34    /// Debug method for tracing the MVCC history of a key.
35    #[doc(hidden)]
36    fn print_trace(&self, key: &[u8]) -> crate::Result<()>;
37
38    /// Returns the number of cached table file descriptors.
39    fn table_file_cache_size(&self) -> usize;
40
41    // TODO: remove
42    #[doc(hidden)]
43    fn version_memtable_size_sum(&self) -> u64 {
44        self.get_version_history_lock().memtable_size_sum()
45    }
46
47    #[doc(hidden)]
48    fn next_table_id(&self) -> TableId;
49
50    #[doc(hidden)]
51    fn id(&self) -> crate::TreeId;
52
53    /// Like [`AbstractTree::get`], but returns the actual internal entry, not just the user value.
54    ///
55    /// Used in tests.
56    #[doc(hidden)]
57    fn get_internal_entry(&self, key: &[u8], seqno: SeqNo) -> crate::Result<Option<InternalValue>>;
58
59    #[doc(hidden)]
60    fn current_version(&self) -> Version;
61
62    #[doc(hidden)]
63    fn get_version_history_lock(&self) -> RwLockWriteGuard<'_, crate::version::SuperVersions>;
64
65    /// Seals the active memtable and flushes to table(s).
66    ///
67    /// If there are already other sealed memtables lined up, those will be flushed as well.
68    ///
69    /// Only used in tests.
70    #[doc(hidden)]
71    fn flush_active_memtable(&self, eviction_seqno: SeqNo) -> crate::Result<()> {
72        let lock = self.get_flush_lock();
73        self.rotate_memtable();
74        self.flush(&lock, eviction_seqno)?;
75        Ok(())
76    }
77
78    /// Synchronously flushes pending sealed memtables to tables.
79    ///
80    /// Returns the sum of flushed memtable sizes that were flushed.
81    ///
82    /// The function may not return a result, if nothing was flushed.
83    ///
84    /// # Errors
85    ///
86    /// Will return `Err` if an IO error occurs.
87    fn flush(
88        &self,
89        _lock: &MutexGuard<'_, ()>,
90        seqno_threshold: SeqNo,
91    ) -> crate::Result<Option<u64>> {
92        use crate::{
93            compaction::stream::CompactionStream, merge::Merger, range_tombstone::RangeTombstone,
94        };
95
96        let version_history = self.get_version_history_lock();
97        let latest = version_history.latest_version();
98
99        if latest.sealed_memtables.len() == 0 {
100            return Ok(None);
101        }
102
103        let sealed_ids = latest
104            .sealed_memtables
105            .iter()
106            .map(|mt| mt.id)
107            .collect::<Vec<_>>();
108
109        let flushed_size = latest.sealed_memtables.iter().map(|mt| mt.size()).sum();
110
111        // Collect range tombstones from sealed memtables
112        let mut range_tombstones: Vec<RangeTombstone> = Vec::new();
113        for mt in latest.sealed_memtables.iter() {
114            range_tombstones.extend(mt.range_tombstones_sorted());
115        }
116        range_tombstones.sort();
117        range_tombstones.dedup();
118
119        let merger = Merger::new(
120            latest
121                .sealed_memtables
122                .iter()
123                .map(|mt| mt.iter().map(Ok))
124                .collect::<Vec<_>>(),
125            self.tree_config().comparator.clone(),
126        );
127        // RT suppression is not needed here: flush writes both entries and RTs
128        // to the output tables. Suppression happens at read time, not write time.
129        let stream = CompactionStream::new(merger, seqno_threshold)
130            .with_merge_operator(self.tree_config().merge_operator.clone());
131
132        drop(version_history);
133
134        // Clone needed: flush_to_tables_with_rt consumes the Vec, but on the
135        // RT-only path (no KV data, tables.is_empty()) we re-insert RTs into the
136        // active memtable. Flush is infrequent and RT count is small.
137        if let Some((tables, blob_files)) =
138            self.flush_to_tables_with_rt(stream, range_tombstones.clone())?
139        {
140            // If no tables were produced (RT-only memtable), re-insert RTs
141            // into active memtable so they aren't lost
142            if tables.is_empty() && !range_tombstones.is_empty() {
143                let active = self.active_memtable();
144                for rt in &range_tombstones {
145                    let _ =
146                        active.insert_range_tombstone(rt.start.clone(), rt.end.clone(), rt.seqno);
147                }
148            }
149
150            self.register_tables(
151                &tables,
152                blob_files.as_deref(),
153                None,
154                &sealed_ids,
155                seqno_threshold,
156            )?;
157        }
158
159        Ok(Some(flushed_size))
160    }
161
162    /// Returns an iterator that scans through the entire tree.
163    ///
164    /// Avoid using this function, or limit it as otherwise it may scan a lot of items.
165    fn iter(
166        &self,
167        seqno: SeqNo,
168        index: Option<(Arc<Memtable>, SeqNo)>,
169    ) -> Box<dyn DoubleEndedIterator<Item = IterGuardImpl> + Send + 'static> {
170        self.range::<&[u8], _>(.., seqno, index)
171    }
172
173    /// Returns an iterator over a prefixed set of items.
174    ///
175    /// Avoid using an empty prefix as it may scan a lot of items (unless limited).
176    fn prefix<K: AsRef<[u8]>>(
177        &self,
178        prefix: K,
179        seqno: SeqNo,
180        index: Option<(Arc<Memtable>, SeqNo)>,
181    ) -> Box<dyn DoubleEndedIterator<Item = IterGuardImpl> + Send + 'static>;
182
183    /// Returns an iterator over a range of items.
184    ///
185    /// Avoid using full or unbounded ranges as they may scan a lot of items (unless limited).
186    fn range<K: AsRef<[u8]>, R: RangeBounds<K>>(
187        &self,
188        range: R,
189        seqno: SeqNo,
190        index: Option<(Arc<Memtable>, SeqNo)>,
191    ) -> Box<dyn DoubleEndedIterator<Item = IterGuardImpl> + Send + 'static>;
192
193    /// Returns the approximate number of tombstones in the tree.
194    fn tombstone_count(&self) -> u64;
195
196    /// Returns the approximate number of weak tombstones (single deletes) in the tree.
197    fn weak_tombstone_count(&self) -> u64;
198
199    /// Returns the approximate number of values reclaimable once weak tombstones can be GC'd.
200    fn weak_tombstone_reclaimable_count(&self) -> u64;
201
202    /// Drops tables that are fully contained in a given range.
203    ///
204    /// Accepts any `RangeBounds`, including unbounded or exclusive endpoints.
205    /// If the normalized lower bound is greater than the upper bound, the
206    /// method returns without performing any work.
207    ///
208    /// # Errors
209    ///
210    /// Will return `Err` only if an IO error occurs.
211    fn drop_range<K: AsRef<[u8]>, R: RangeBounds<K>>(&self, range: R) -> crate::Result<()>;
212
213    /// Drops all tables and clears all memtables atomically.
214    ///
215    /// # Errors
216    ///
217    /// Will return `Err` only if an IO error occurs.
218    fn clear(&self) -> crate::Result<()>;
219
220    /// Performs major compaction, blocking the caller until it's done.
221    ///
222    /// Returns a [`crate::compaction::CompactionResult`] describing what action was taken.
223    ///
224    /// # Errors
225    ///
226    /// Will return `Err` if an IO error occurs.
227    fn major_compact(
228        &self,
229        target_size: u64,
230        seqno_threshold: SeqNo,
231    ) -> crate::Result<crate::compaction::CompactionResult>;
232
233    /// Returns the disk space used by stale blobs.
234    fn stale_blob_bytes(&self) -> u64 {
235        0
236    }
237
238    /// Gets the space usage of all filters in the tree.
239    ///
240    /// May not correspond to the actual memory size because filter blocks may be paged out.
241    fn filter_size(&self) -> u64;
242
243    /// Gets the memory usage of all pinned filters in the tree.
244    fn pinned_filter_size(&self) -> usize;
245
246    /// Gets the memory usage of all pinned index blocks in the tree.
247    fn pinned_block_index_size(&self) -> usize;
248
249    /// Gets the length of the version free list.
250    fn version_free_list_len(&self) -> usize;
251
252    /// Returns the metrics structure.
253    #[cfg(feature = "metrics")]
254    fn metrics(&self) -> &Arc<crate::Metrics>;
255
256    /// Acquires the flush lock which is required to call [`Tree::flush`].
257    fn get_flush_lock(&self) -> MutexGuard<'_, ()>;
258
259    /// Synchronously flushes a memtable to a table.
260    ///
261    /// This method will not make the table immediately available,
262    /// use [`AbstractTree::register_tables`] for that.
263    ///
264    /// # Errors
265    ///
266    /// Will return `Err` if an IO error occurs.
267    fn flush_to_tables(
268        &self,
269        stream: impl Iterator<Item = crate::Result<InternalValue>>,
270    ) -> crate::Result<Option<FlushToTablesResult>> {
271        self.flush_to_tables_with_rt(stream, Vec::new())
272    }
273
274    /// Like [`AbstractTree::flush_to_tables`], but also writes range tombstones.
275    ///
276    /// This is an internal extension hook on the crate's sealed tree types and
277    /// is hidden from generated documentation.
278    ///
279    /// # Errors
280    ///
281    /// Will return `Err` if an IO error occurs.
282    #[doc(hidden)]
283    fn flush_to_tables_with_rt(
284        &self,
285        stream: impl Iterator<Item = crate::Result<InternalValue>>,
286        range_tombstones: Vec<crate::range_tombstone::RangeTombstone>,
287    ) -> crate::Result<Option<FlushToTablesResult>>;
288
289    /// Atomically registers flushed tables into the tree, removing their associated sealed memtables.
290    ///
291    /// # Errors
292    ///
293    /// Will return `Err` if an IO error occurs.
294    fn register_tables(
295        &self,
296        tables: &[Table],
297        blob_files: Option<&[BlobFile]>,
298        frag_map: Option<crate::blob_tree::FragmentationMap>,
299        sealed_memtables_to_delete: &[crate::tree::inner::MemtableId],
300        gc_watermark: SeqNo,
301    ) -> crate::Result<()>;
302
303    /// Clears the active memtable atomically.
304    fn clear_active_memtable(&self);
305
306    /// Returns the number of sealed memtables.
307    fn sealed_memtable_count(&self) -> usize;
308
309    /// Performs compaction on the tree's levels, blocking the caller until it's done.
310    ///
311    /// Returns a [`crate::compaction::CompactionResult`] describing what action was taken.
312    ///
313    /// # Errors
314    ///
315    /// Will return `Err` if an IO error occurs.
316    fn compact(
317        &self,
318        strategy: Arc<dyn crate::compaction::CompactionStrategy>,
319        seqno_threshold: SeqNo,
320    ) -> crate::Result<crate::compaction::CompactionResult>;
321
322    /// Returns the next table's ID.
323    fn get_next_table_id(&self) -> TableId;
324
325    /// Returns the tree config.
326    fn tree_config(&self) -> &Config;
327
328    /// Returns the highest sequence number.
329    fn get_highest_seqno(&self) -> Option<SeqNo> {
330        let memtable_seqno = self.get_highest_memtable_seqno();
331        let table_seqno = self.get_highest_persisted_seqno();
332        memtable_seqno.max(table_seqno)
333    }
334
335    /// Returns the active memtable.
336    fn active_memtable(&self) -> Arc<Memtable>;
337
338    /// Returns the tree type.
339    fn tree_type(&self) -> crate::TreeType;
340
341    /// Seals the active memtable.
342    fn rotate_memtable(&self) -> Option<Arc<Memtable>>;
343
344    /// Returns the number of tables currently in the tree.
345    fn table_count(&self) -> usize;
346
347    /// Returns the number of tables in `levels[idx]`.
348    ///
349    /// Returns `None` if the level does not exist (if idx >= 7).
350    fn level_table_count(&self, idx: usize) -> Option<usize>;
351
352    /// Returns the number of disjoint runs in L0.
353    ///
354    /// Can be used to determine whether to write stall.
355    fn l0_run_count(&self) -> usize;
356
357    /// Returns the number of blob files currently in the tree.
358    fn blob_file_count(&self) -> usize;
359
360    /// Approximates the number of items in the tree.
361    fn approximate_len(&self) -> usize;
362
363    /// Returns the disk space usage.
364    fn disk_space(&self) -> u64;
365
366    /// Returns the highest sequence number of the active memtable.
367    fn get_highest_memtable_seqno(&self) -> Option<SeqNo>;
368
369    /// Returns the highest sequence number that is flushed to disk.
370    fn get_highest_persisted_seqno(&self) -> Option<SeqNo>;
371
372    /// Scans the entire tree, returning the number of items.
373    ///
374    /// ###### Caution
375    ///
376    /// This operation scans the entire tree: O(n) complexity!
377    ///
378    /// Never, under any circumstances, use .`len()` == 0 to check
379    /// if the tree is empty, use [`Tree::is_empty`] instead.
380    ///
381    /// # Examples
382    ///
383    /// ```
384    /// # use lsm_tree::Error as TreeError;
385    /// use lsm_tree::{AbstractTree, Config, Tree};
386    ///
387    /// let folder = tempfile::tempdir()?;
388    /// let tree = Config::new(folder, Default::default(), Default::default()).open()?;
389    ///
390    /// assert_eq!(tree.len(0, None)?, 0);
391    /// tree.insert("1", "abc", 0);
392    /// tree.insert("3", "abc", 1);
393    /// tree.insert("5", "abc", 2);
394    /// assert_eq!(tree.len(3, None)?, 3);
395    /// #
396    /// # Ok::<(), TreeError>(())
397    /// ```
398    ///
399    /// # Errors
400    ///
401    /// Will return `Err` if an IO error occurs.
402    fn len(&self, seqno: SeqNo, index: Option<(Arc<Memtable>, SeqNo)>) -> crate::Result<usize> {
403        let mut count = 0;
404
405        for item in self.iter(seqno, index) {
406            let _ = item.key()?;
407            count += 1;
408        }
409
410        Ok(count)
411    }
412
413    /// Returns `true` if the tree is empty.
414    ///
415    /// This operation has O(log N) complexity.
416    ///
417    /// # Examples
418    ///
419    /// ```
420    /// # let folder = tempfile::tempdir()?;
421    /// use lsm_tree::{AbstractTree, Config, Tree};
422    ///
423    /// let tree = Config::new(folder, Default::default(), Default::default()).open()?;
424    /// assert!(tree.is_empty(0, None)?);
425    ///
426    /// tree.insert("a", "abc", 0);
427    /// assert!(!tree.is_empty(1, None)?);
428    /// #
429    /// # Ok::<(), lsm_tree::Error>(())
430    /// ```
431    ///
432    /// # Errors
433    ///
434    /// Will return `Err` if an IO error occurs.
435    fn is_empty(&self, seqno: SeqNo, index: Option<(Arc<Memtable>, SeqNo)>) -> crate::Result<bool> {
436        Ok(self
437            .first_key_value(seqno, index)
438            .map(crate::Guard::key)
439            .transpose()?
440            .is_none())
441    }
442
443    /// Returns the first key-value pair in the tree.
444    /// The key in this pair is the minimum key in the tree.
445    ///
446    /// # Examples
447    ///
448    /// ```
449    /// # use lsm_tree::Error as TreeError;
450    /// # use lsm_tree::{AbstractTree, Config, Tree, Guard};
451    /// #
452    /// # let folder = tempfile::tempdir()?;
453    /// let tree = Config::new(folder, Default::default(), Default::default()).open()?;
454    ///
455    /// tree.insert("1", "abc", 0);
456    /// tree.insert("3", "abc", 1);
457    /// tree.insert("5", "abc", 2);
458    ///
459    /// let key = tree.first_key_value(3, None).expect("item should exist").key()?;
460    /// assert_eq!(&*key, "1".as_bytes());
461    /// #
462    /// # Ok::<(), TreeError>(())
463    /// ```
464    ///
465    /// # Errors
466    ///
467    /// Will return `Err` if an IO error occurs.
468    fn first_key_value(
469        &self,
470        seqno: SeqNo,
471        index: Option<(Arc<Memtable>, SeqNo)>,
472    ) -> Option<IterGuardImpl> {
473        self.iter(seqno, index).next()
474    }
475
476    /// Returns the last key-value pair in the tree.
477    /// The key in this pair is the maximum key in the tree.
478    ///
479    /// # Examples
480    ///
481    /// ```
482    /// # use lsm_tree::Error as TreeError;
483    /// # use lsm_tree::{AbstractTree, Config, Tree, Guard};
484    /// #
485    /// # let folder = tempfile::tempdir()?;
486    /// # let tree = Config::new(folder, Default::default(), Default::default()).open()?;
487    /// #
488    /// tree.insert("1", "abc", 0);
489    /// tree.insert("3", "abc", 1);
490    /// tree.insert("5", "abc", 2);
491    ///
492    /// let key = tree.last_key_value(3, None).expect("item should exist").key()?;
493    /// assert_eq!(&*key, "5".as_bytes());
494    /// #
495    /// # Ok::<(), TreeError>(())
496    /// ```
497    ///
498    /// # Errors
499    ///
500    /// Will return `Err` if an IO error occurs.
501    fn last_key_value(
502        &self,
503        seqno: SeqNo,
504        index: Option<(Arc<Memtable>, SeqNo)>,
505    ) -> Option<IterGuardImpl> {
506        self.iter(seqno, index).next_back()
507    }
508
509    /// Returns the size of a value if it exists.
510    ///
511    /// # Examples
512    ///
513    /// ```
514    /// # let folder = tempfile::tempdir()?;
515    /// use lsm_tree::{AbstractTree, Config, Tree};
516    ///
517    /// let tree = Config::new(folder, Default::default(), Default::default()).open()?;
518    /// tree.insert("a", "my_value", 0);
519    ///
520    /// let size = tree.size_of("a", 1)?.unwrap_or_default();
521    /// assert_eq!("my_value".len() as u32, size);
522    ///
523    /// let size = tree.size_of("b", 1)?.unwrap_or_default();
524    /// assert_eq!(0, size);
525    /// #
526    /// # Ok::<(), lsm_tree::Error>(())
527    /// ```
528    ///
529    /// # Errors
530    ///
531    /// Will return `Err` if an IO error occurs.
532    fn size_of<K: AsRef<[u8]>>(&self, key: K, seqno: SeqNo) -> crate::Result<Option<u32>>;
533
534    /// Retrieves an item from the tree.
535    ///
536    /// # Examples
537    ///
538    /// ```
539    /// # let folder = tempfile::tempdir()?;
540    /// use lsm_tree::{AbstractTree, Config, Tree};
541    ///
542    /// let tree = Config::new(folder, Default::default(), Default::default()).open()?;
543    /// tree.insert("a", "my_value", 0);
544    ///
545    /// let item = tree.get("a", 1)?;
546    /// assert_eq!(Some("my_value".as_bytes().into()), item);
547    /// #
548    /// # Ok::<(), lsm_tree::Error>(())
549    /// ```
550    ///
551    /// # Errors
552    ///
553    /// Will return `Err` if an IO error occurs.
554    fn get<K: AsRef<[u8]>>(&self, key: K, seqno: SeqNo) -> crate::Result<Option<UserValue>>;
555
556    /// Returns `true` if the tree contains the specified key.
557    ///
558    /// # Examples
559    ///
560    /// ```
561    /// # let folder = tempfile::tempdir()?;
562    /// # use lsm_tree::{AbstractTree, Config, Tree};
563    /// #
564    /// let tree = Config::new(folder, Default::default(), Default::default()).open()?;
565    /// assert!(!tree.contains_key("a", 0)?);
566    ///
567    /// tree.insert("a", "abc", 0);
568    /// assert!(tree.contains_key("a", 1)?);
569    /// #
570    /// # Ok::<(), lsm_tree::Error>(())
571    /// ```
572    ///
573    /// # Errors
574    ///
575    /// Will return `Err` if an IO error occurs.
576    fn contains_key<K: AsRef<[u8]>>(&self, key: K, seqno: SeqNo) -> crate::Result<bool> {
577        self.get(key, seqno).map(|x| x.is_some())
578    }
579
580    /// Returns `true` if the tree contains any key with the given prefix.
581    ///
582    /// This is a convenience method that checks whether the corresponding
583    /// prefix iterator yields at least one item, while surfacing any IO
584    /// errors via the `Result` return type. Implementations may override
585    /// this method to provide a more efficient prefix-existence check.
586    ///
587    /// # Examples
588    ///
589    /// ```
590    /// # let folder = tempfile::tempdir()?;
591    /// use lsm_tree::{AbstractTree, Config, Tree};
592    ///
593    /// let tree = Config::new(folder, Default::default(), Default::default()).open()?;
594    /// assert!(!tree.contains_prefix("abc", 0, None)?);
595    ///
596    /// tree.insert("abc:1", "value", 0);
597    /// assert!(tree.contains_prefix("abc", 1, None)?);
598    /// assert!(!tree.contains_prefix("xyz", 1, None)?);
599    /// #
600    /// # Ok::<(), lsm_tree::Error>(())
601    /// ```
602    ///
603    /// # Errors
604    ///
605    /// Will return `Err` if an IO error occurs.
606    fn contains_prefix<K: AsRef<[u8]>>(
607        &self,
608        prefix: K,
609        seqno: SeqNo,
610        index: Option<(Arc<Memtable>, SeqNo)>,
611    ) -> crate::Result<bool> {
612        match self.prefix(prefix, seqno, index).next() {
613            Some(guard) => guard.key().map(|_| true),
614            None => Ok(false),
615        }
616    }
617
618    /// Reads multiple keys from the tree.
619    ///
620    /// Implementations may choose to perform all lookups against a single
621    /// version snapshot and acquire the version lock only once, which can be
622    /// more efficient than calling [`AbstractTree::get`] in a loop. The
623    /// default trait implementation, however, is a convenience wrapper that
624    /// simply calls [`AbstractTree::get`] for each key and therefore does not
625    /// guarantee a single-snapshot or single-lock acquisition. Optimized
626    /// implementations (such as [`Tree`] and [`BlobTree`]) provide the
627    /// single-snapshot/one-lock behavior.
628    ///
629    /// # Examples
630    ///
631    /// ```
632    /// # let folder = tempfile::tempdir()?;
633    /// use lsm_tree::{AbstractTree, Config, Tree};
634    ///
635    /// let tree = Config::new(folder, Default::default(), Default::default()).open()?;
636    /// tree.insert("a", "value_a", 0);
637    /// tree.insert("b", "value_b", 1);
638    ///
639    /// let results = tree.multi_get(["a", "b", "c"], 2)?;
640    /// assert_eq!(results[0], Some("value_a".as_bytes().into()));
641    /// assert_eq!(results[1], Some("value_b".as_bytes().into()));
642    /// assert_eq!(results[2], None);
643    /// #
644    /// # Ok::<(), lsm_tree::Error>(())
645    /// ```
646    ///
647    /// # Errors
648    ///
649    /// Will return `Err` if an IO error occurs.
650    fn multi_get<K: AsRef<[u8]>>(
651        &self,
652        keys: impl IntoIterator<Item = K>,
653        seqno: SeqNo,
654    ) -> crate::Result<Vec<Option<UserValue>>> {
655        keys.into_iter().map(|key| self.get(key, seqno)).collect()
656    }
657
658    /// Inserts a key-value pair into the tree.
659    ///
660    /// If the key already exists, the item will be overwritten.
661    ///
662    /// Returns the added item's size and new size of the memtable.
663    ///
664    /// # Examples
665    ///
666    /// ```
667    /// # let folder = tempfile::tempdir()?;
668    /// use lsm_tree::{AbstractTree, Config, Tree};
669    ///
670    /// let tree = Config::new(folder, Default::default(), Default::default()).open()?;
671    /// tree.insert("a", "abc", 0);
672    /// #
673    /// # Ok::<(), lsm_tree::Error>(())
674    /// ```
675    ///
676    /// # Errors
677    ///
678    /// Will return `Err` if an IO error occurs.
679    fn insert<K: Into<UserKey>, V: Into<UserValue>>(
680        &self,
681        key: K,
682        value: V,
683        seqno: SeqNo,
684    ) -> (u64, u64);
685
686    /// Removes an item from the tree.
687    ///
688    /// Returns the added item's size and new size of the memtable.
689    ///
690    /// # Examples
691    ///
692    /// ```
693    /// # let folder = tempfile::tempdir()?;
694    /// # use lsm_tree::{AbstractTree, Config, Tree};
695    /// #
696    /// # let tree = Config::new(folder, Default::default(), Default::default()).open()?;
697    /// tree.insert("a", "abc", 0);
698    ///
699    /// let item = tree.get("a", 1)?.expect("should have item");
700    /// assert_eq!("abc".as_bytes(), &*item);
701    ///
702    /// tree.remove("a", 1);
703    ///
704    /// let item = tree.get("a", 2)?;
705    /// assert_eq!(None, item);
706    /// #
707    /// # Ok::<(), lsm_tree::Error>(())
708    /// ```
709    ///
710    /// # Errors
711    ///
712    /// Will return `Err` if an IO error occurs.
713    fn remove<K: Into<UserKey>>(&self, key: K, seqno: SeqNo) -> (u64, u64);
714
715    /// Writes a merge operand for a key.
716    ///
717    /// The operand is stored as a partial update that will be combined with
718    /// other operands and/or a base value via the configured [`crate::MergeOperator`]
719    /// during reads and compaction.
720    ///
721    /// Returns the added item's size and new size of the memtable.
722    ///
723    /// # Examples
724    ///
725    /// ```
726    /// # let folder = tempfile::tempdir()?;
727    /// # use lsm_tree::{AbstractTree, Config, MergeOperator, UserValue};
728    /// # use std::sync::Arc;
729    /// # struct SumMerge;
730    /// # impl MergeOperator for SumMerge {
731    /// #     fn merge(&self, _key: &[u8], base: Option<&[u8]>, operands: &[&[u8]]) -> lsm_tree::Result<UserValue> {
732    /// #         let mut sum: i64 = base.map_or(0, |b| i64::from_le_bytes(b.try_into().unwrap()));
733    /// #         for op in operands { sum += i64::from_le_bytes((*op).try_into().unwrap()); }
734    /// #         Ok(sum.to_le_bytes().to_vec().into())
735    /// #     }
736    /// # }
737    /// # let tree = Config::new(folder, Default::default(), Default::default())
738    /// #     .with_merge_operator(Some(Arc::new(SumMerge)))
739    /// #     .open()?;
740    /// tree.merge("counter", 1_i64.to_le_bytes(), 0);
741    /// # Ok::<(), lsm_tree::Error>(())
742    /// ```
743    fn merge<K: Into<UserKey>, V: Into<UserValue>>(
744        &self,
745        key: K,
746        operand: V,
747        seqno: SeqNo,
748    ) -> (u64, u64);
749
750    /// Removes an item from the tree.
751    ///
752    /// The tombstone marker of this delete operation will vanish when it
753    /// collides with its corresponding insertion.
754    /// This may cause older versions of the value to be resurrected, so it should
755    /// only be used and preferred in scenarios where a key is only ever written once.
756    ///
757    /// Returns the added item's size and new size of the memtable.
758    ///
759    /// # Examples
760    ///
761    /// ```
762    /// # let folder = tempfile::tempdir()?;
763    /// # use lsm_tree::{AbstractTree, Config, Tree};
764    /// #
765    /// # let tree = Config::new(folder, Default::default(), Default::default()).open()?;
766    /// tree.insert("a", "abc", 0);
767    ///
768    /// let item = tree.get("a", 1)?.expect("should have item");
769    /// assert_eq!("abc".as_bytes(), &*item);
770    ///
771    /// tree.remove_weak("a", 1);
772    ///
773    /// let item = tree.get("a", 2)?;
774    /// assert_eq!(None, item);
775    /// #
776    /// # Ok::<(), lsm_tree::Error>(())
777    /// ```
778    ///
779    /// # Errors
780    ///
781    /// Will return `Err` if an IO error occurs.
782    #[doc(hidden)]
783    fn remove_weak<K: Into<UserKey>>(&self, key: K, seqno: SeqNo) -> (u64, u64);
784
785    /// Deletes all keys in the range `[start, end)` by inserting a range tombstone.
786    ///
787    /// This is much more efficient than deleting keys individually when
788    /// removing a contiguous range of keys.
789    ///
790    /// Returns the approximate size added to the memtable.
791    /// Returns 0 if `start >= end` (invalid interval is silently ignored).
792    ///
793    /// This is a required method on the crate's sealed tree types.
794    fn remove_range<K: Into<UserKey>>(&self, start: K, end: K, seqno: SeqNo) -> u64;
795
796    /// Deletes all keys with the given prefix by inserting a range tombstone.
797    ///
798    /// This is sugar over [`AbstractTree::remove_range`] using prefix bounds.
799    ///
800    /// Returns the approximate size added to the memtable.
801    /// Returns 0 for empty prefixes or all-`0xFF` prefixes (cannot form valid half-open range).
802    fn remove_prefix<K: AsRef<[u8]>>(&self, prefix: K, seqno: SeqNo) -> u64 {
803        use crate::range::prefix_to_range;
804        use std::ops::Bound;
805
806        let (lo, hi) = prefix_to_range(prefix.as_ref());
807
808        let Bound::Included(start) = lo else { return 0 };
809
810        // Bound::Unbounded means the prefix is all 0xFF — no representable
811        // exclusive upper bound exists, so we cannot form a valid range tombstone.
812        let Bound::Excluded(end) = hi else { return 0 };
813
814        self.remove_range(start, end, seqno)
815    }
816}