lsm_tree/lib.rs
1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5//! A K.I.S.S. implementation of log-structured merge trees (LSM-trees/LSMTs).
6//!
7//! ##### NOTE
8//!
9//! > This crate only provides a primitive LSM-tree, not a full storage engine.
10//! > You probably want to use <https://crates.io/crates/fjall> instead.
11//! > For example, it does not ship with a write-ahead log, so writes are not
12//! > persisted until manually flushing the memtable.
13//!
14//! ##### About
15//!
16//! This crate exports a `Tree` that supports a subset of the `BTreeMap` API.
17//!
18//! LSM-trees are an alternative to B-trees to persist a sorted list of items (e.g. a database table)
19//! on disk and perform fast lookup queries.
20//! Instead of updating a disk-based data structure in-place,
21//! deltas (inserts and deletes) are added into an in-memory write buffer (`Memtable`).
22//! Data is then flushed to disk segments when the write buffer reaches some threshold.
23//!
24//! Amassing many segments on disk will degrade read performance and waste disk space, so segments
25//! can be periodically merged into larger segments in a process called `Compaction`.
26//! Different compaction strategies have different advantages and drawbacks, and should be chosen based
27//! on the workload characteristics.
28//!
29//! Because maintaining an efficient structure is deferred to the compaction process, writing to an LSMT
30//! is very fast (_O(1)_ complexity).
31//!
32//! Keys are limited to 65536 bytes, values are limited to 2^32 bytes. As is normal with any kind of storage
33//! engine, larger keys and values have a bigger performance impact.
34//!
35//! # Example usage
36//!
37//! ```
38//! use lsm_tree::{AbstractTree, Config, Tree};
39//! #
40//! # let folder = tempfile::tempdir()?;
41//!
42//! // A tree is a single physical keyspace/index/...
43//! // and supports a BTreeMap-like API
44//! let tree = Config::new(folder).open()?;
45//!
46//! // Note compared to the BTreeMap API, operations return a Result<T>
47//! // So you can handle I/O errors if they occur
48//! tree.insert("my_key", "my_value", /* sequence number */ 0);
49//!
50//! let item = tree.get("my_key", 1)?;
51//! assert_eq!(Some("my_value".as_bytes().into()), item);
52//!
53//! // Search by prefix
54//! for item in tree.prefix("prefix", 1, None) {
55//! // ...
56//! }
57//!
58//! // Search by range
59//! for item in tree.range("a"..="z", 1, None) {
60//! // ...
61//! }
62//!
63//! // Iterators implement DoubleEndedIterator, so you can search backwards, too!
64//! for item in tree.prefix("user1", 1, None).rev() {
65//! // ...
66//! }
67//!
68//! // Flush to secondary storage, clearing the memtable
69//! // and persisting all in-memory data.
70//! // Note, this flushes synchronously, which may not be desired
71//! tree.flush_active_memtable(0)?;
72//!
73//! // When some disk segments have amassed, use compaction
74//! // to reduce the number of disk segments
75//!
76//! // Choose compaction strategy based on workload
77//! use lsm_tree::compaction::Leveled;
78//! # use std::sync::Arc;
79//!
80//! let strategy = Leveled::default();
81//!
82//! let version_gc_threshold = 0;
83//! tree.compact(Arc::new(strategy), version_gc_threshold)?;
84//! #
85//! # Ok::<(), lsm_tree::Error>(())
86//! ```
87
88#![doc(html_logo_url = "https://raw.githubusercontent.com/fjall-rs/lsm-tree/main/logo.png")]
89#![doc(html_favicon_url = "https://raw.githubusercontent.com/fjall-rs/lsm-tree/main/logo.png")]
90#![deny(clippy::all, missing_docs, clippy::cargo)]
91#![deny(clippy::unwrap_used)]
92#![deny(clippy::indexing_slicing)]
93#![warn(clippy::pedantic, clippy::nursery)]
94#![warn(clippy::expect_used)]
95#![allow(clippy::missing_const_for_fn)]
96#![warn(clippy::multiple_crate_versions)]
97#![allow(clippy::option_if_let_else)]
98#![warn(clippy::redundant_feature_names)]
99// the bytes feature uses unsafe to improve from_reader performance; so we need to relax this lint
100// #![cfg_attr(feature = "bytes", deny(unsafe_code))]
101// #![cfg_attr(not(feature = "bytes"), forbid(unsafe_code))]
102
103pub(crate) type HashMap<K, V> = std::collections::HashMap<K, V, rustc_hash::FxBuildHasher>;
104pub(crate) type HashSet<K> = std::collections::HashSet<K, rustc_hash::FxBuildHasher>;
105
106#[allow(unused)]
107macro_rules! set {
108 ($($x:expr),+ $(,)?) => {
109 [$($x),+].into_iter().collect::<HashSet<_>>()
110 }
111}
112
113macro_rules! fail_iter {
114 ($e:expr) => {
115 match $e {
116 Ok(v) => v,
117 Err(e) => return Some(Err(e.into())),
118 }
119 };
120}
121
122mod any_tree;
123
124mod r#abstract;
125
126#[doc(hidden)]
127pub mod binary_search;
128
129#[doc(hidden)]
130pub mod blob_tree;
131
132#[doc(hidden)]
133mod cache;
134
135#[doc(hidden)]
136pub mod coding;
137
138pub mod compaction;
139mod compression;
140
141/// Configuration
142pub mod config;
143
144mod double_ended_peekable;
145
146mod error;
147
148pub(crate) mod fallible_clipping_iter;
149
150#[doc(hidden)]
151pub mod file;
152
153mod hash;
154
155mod iter_guard;
156
157mod key;
158mod key_range;
159
160#[doc(hidden)]
161pub mod level_manifest;
162
163mod run_reader;
164mod run_scanner;
165
166mod manifest;
167mod memtable;
168
169#[doc(hidden)]
170mod descriptor_table;
171
172#[doc(hidden)]
173pub mod merge;
174
175#[cfg(feature = "metrics")]
176pub(crate) mod metrics;
177
178mod multi_reader;
179
180#[doc(hidden)]
181pub mod mvcc_stream;
182
183mod path;
184
185#[doc(hidden)]
186pub mod range;
187
188mod seqno;
189mod slice;
190mod slice_windows;
191
192#[doc(hidden)]
193pub mod stop_signal;
194
195mod format_version;
196mod time;
197mod tree;
198mod value;
199mod version;
200mod vlog;
201
202#[doc(hidden)]
203pub mod segment;
204
205/// KV-tuple, typically returned by an iterator
206pub type KvPair = (UserKey, UserValue);
207
208#[doc(hidden)]
209pub use key_range::KeyRange;
210
211#[doc(hidden)]
212pub use {
213 merge::BoxedIterator,
214 segment::{block::Checksum, GlobalSegmentId, Segment, SegmentId},
215 tree::ingest::Ingestion,
216 tree::inner::TreeId,
217 value::InternalValue,
218};
219
220pub use {
221 cache::Cache,
222 coding::{DecodeError, EncodeError},
223 compression::CompressionType,
224 config::{Config, TreeType},
225 descriptor_table::DescriptorTable,
226 error::{Error, Result},
227 format_version::FormatVersion,
228 iter_guard::IterGuard as Guard,
229 memtable::Memtable,
230 r#abstract::AbstractTree,
231 seqno::SequenceNumberCounter,
232 tree::Tree,
233 value::{SeqNo, ValueType},
234 vlog::BlobFile,
235};
236
237#[cfg(feature = "metrics")]
238pub use metrics::Metrics;
239
240pub use any_tree::AnyTree;
241
242pub use blob_tree::BlobTree;
243
244pub use slice::Slice;
245
246/// User defined key
247pub type UserKey = Slice;
248
249/// User defined data (byte array)
250pub type UserValue = Slice;
251
252/// Blob garbage collection utilities
253pub mod gc {
254 pub use super::vlog::{
255 GcReport as Report, GcStrategy as Strategy, SpaceAmpStrategy, StaleThresholdStrategy,
256 };
257}
258
259// TODO: investigate perf
260macro_rules! unwrap {
261 ($x:expr) => {{
262 #[cfg(not(feature = "use_unsafe"))]
263 {
264 $x.expect("should read")
265 }
266
267 #[cfg(feature = "use_unsafe")]
268 {
269 unsafe { $x.unwrap_unchecked() }
270 }
271 }};
272}
273
274pub(crate) use unwrap;