lsm_tree/lib.rs
1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5//! A K.I.S.S. implementation of log-structured merge trees (LSM-trees/LSMTs).
6//!
7//! ##### NOTE
8//!
9//! > This crate only provides a primitive LSM-tree, not a full storage engine.
10//! > You probably want to use <https://crates.io/crates/fjall> instead.
11//! > For example, it does not ship with a write-ahead log, so writes are not
12//! > persisted until manually flushing the memtable.
13//!
14//! ##### About
15//!
16//! This crate exports a `Tree` that supports a subset of the `BTreeMap` API.
17//!
18//! LSM-trees are an alternative to B-trees to persist a sorted list of items (e.g. a database table)
19//! on disk and perform fast lookup queries.
20//! Instead of updating a disk-based data structure in-place,
21//! deltas (inserts and deletes) are added into an in-memory write buffer (`Memtable`).
22//! Data is then flushed to disk segments, as the write buffer reaches some threshold.
23//!
24//! Amassing many segments on disk will degrade read performance and waste disk space usage, so segments
25//! can be periodically merged into larger segments in a process called `Compaction`.
26//! Different compaction strategies have different advantages and drawbacks, and should be chosen based
27//! on the workload characteristics.
28//!
29//! Because maintaining an efficient structure is deferred to the compaction process, writing to an LSMT
30//! is very fast (O(1) complexity).
31//!
32//! Keys are limited to 65536 bytes, values are limited to 2^32 bytes. As is normal with any kind of storage
33//! engine, larger keys and values have a bigger performance impact.
34//!
35//! # Example usage
36//!
37//! ```
38//! use lsm_tree::{AbstractTree, Config, Tree};
39//! #
40//! # let folder = tempfile::tempdir()?;
41//!
42//! // A tree is a single physical keyspace/index/...
43//! // and supports a BTreeMap-like API
44//! let tree = Config::new(folder).open()?;
45//!
46//! // Note compared to the BTreeMap API, operations return a Result<T>
47//! // So you can handle I/O errors if they occur
48//! tree.insert("my_key", "my_value", /* sequence number */ 0);
49//!
50//! let item = tree.get("my_key", None)?;
51//! assert_eq!(Some("my_value".as_bytes().into()), item);
52//!
53//! // Search by prefix
54//! for item in tree.prefix("prefix", None, None) {
55//! // ...
56//! }
57//!
58//! // Search by range
59//! for item in tree.range("a"..="z", None, None) {
60//! // ...
61//! }
62//!
63//! // Iterators implement DoubleEndedIterator, so you can search backwards, too!
64//! for item in tree.prefix("prefix", None, None).rev() {
65//! // ...
66//! }
67//!
68//! // Flush to secondary storage, clearing the memtable
69//! // and persisting all in-memory data.
70//! // Note, this flushes synchronously, which may not be desired
71//! tree.flush_active_memtable(0)?;
72//! assert_eq!(Some("my_value".as_bytes().into()), item);
73//!
74//! // When some disk segments have amassed, use compaction
75//! // to reduce the amount of disk segments
76//!
77//! // Choose compaction strategy based on workload
78//! use lsm_tree::compaction::Leveled;
79//! # use std::sync::Arc;
80//!
81//! let strategy = Leveled::default();
82//!
83//! let version_gc_threshold = 0;
84//! tree.compact(Arc::new(strategy), version_gc_threshold)?;
85//!
86//! assert_eq!(Some("my_value".as_bytes().into()), item);
87//! #
88//! # Ok::<(), lsm_tree::Error>(())
89//! ```
90
91#![doc(html_logo_url = "https://raw.githubusercontent.com/fjall-rs/lsm-tree/main/logo.png")]
92#![doc(html_favicon_url = "https://raw.githubusercontent.com/fjall-rs/lsm-tree/main/logo.png")]
93#![deny(unsafe_code)]
94#![deny(clippy::all, missing_docs, clippy::cargo)]
95#![deny(clippy::unwrap_used)]
96#![deny(clippy::indexing_slicing)]
97#![warn(clippy::pedantic, clippy::nursery)]
98#![warn(clippy::expect_used)]
99#![allow(clippy::missing_const_for_fn)]
100#![warn(clippy::multiple_crate_versions)]
101#![allow(clippy::option_if_let_else)]
102#![warn(clippy::needless_lifetimes)]
103#![warn(clippy::cloned_ref_to_slice_refs)]
104#![warn(clippy::non_canonical_partial_ord_impl)]
105
106pub(crate) type HashMap<K, V> = std::collections::HashMap<K, V, xxhash_rust::xxh3::Xxh3Builder>;
107pub(crate) type HashSet<K> = std::collections::HashSet<K, xxhash_rust::xxh3::Xxh3Builder>;
108
109#[allow(unused)]
110macro_rules! set {
111 ($($x:expr),+ $(,)?) => {
112 [$($x),+].into_iter().collect::<HashSet<_>>()
113 }
114}
115
116macro_rules! fail_iter {
117 ($e:expr) => {
118 match $e {
119 Ok(v) => v,
120 Err(e) => return Some(Err(e)),
121 }
122 };
123}
124
125mod any_tree;
126
127mod r#abstract;
128
129#[doc(hidden)]
130pub mod binary_search;
131
132#[doc(hidden)]
133pub mod blob_tree;
134
135mod cache;
136
137#[doc(hidden)]
138pub mod bloom;
139
140pub mod compaction;
141mod config;
142
143#[doc(hidden)]
144pub mod descriptor_table;
145
146mod error;
147// mod export;
148
149#[doc(hidden)]
150pub mod file;
151
152mod key;
153
154#[doc(hidden)]
155pub mod level_manifest;
156
157mod level_reader;
158mod level_scanner;
159
160mod manifest;
161mod memtable;
162
163#[doc(hidden)]
164pub mod merge;
165
166mod multi_reader;
167
168#[doc(hidden)]
169pub mod mvcc_stream;
170
171mod path;
172
173#[doc(hidden)]
174pub mod range;
175
176#[doc(hidden)]
177pub mod segment;
178
179mod seqno;
180mod snapshot;
181mod windows;
182
183#[doc(hidden)]
184pub mod stop_signal;
185
186mod time;
187mod tree;
188mod value;
189mod version;
190
191/// KV-tuple, typically returned by an iterator
192pub type KvPair = (UserKey, UserValue);
193
194#[doc(hidden)]
195pub use value_log::KeyRange;
196
197#[doc(hidden)]
198pub mod coding {
199 pub use value_log::coding::{Decode, DecodeError, Encode, EncodeError};
200}
201
202#[doc(hidden)]
203pub use {
204 merge::BoxedIterator,
205 segment::{block::checksum::Checksum, id::GlobalSegmentId, meta::SegmentId},
206 tree::inner::TreeId,
207 value::InternalValue,
208};
209
210pub use {
211 cache::Cache,
212 coding::{DecodeError, EncodeError},
213 config::{Config, TreeType},
214 error::{Error, Result},
215 memtable::Memtable,
216 r#abstract::AbstractTree,
217 segment::{meta::CompressionType, Segment},
218 seqno::SequenceNumberCounter,
219 snapshot::Snapshot,
220 tree::Tree,
221 value::{SeqNo, UserKey, UserValue, ValueType},
222 version::Version,
223};
224
225pub use any_tree::AnyTree;
226
227pub use blob_tree::BlobTree;
228
229pub use value_log::{BlobCache, Slice};
230
231/// Blob garbage collection utilities
232pub mod gc {
233 pub use value_log::{
234 GcReport as Report, GcStrategy as Strategy, SpaceAmpStrategy, StaleThresholdStrategy,
235 };
236}