lsm_tree/lib.rs
1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5//! A K.I.S.S. implementation of log-structured merge trees (LSM-trees/LSMTs).
6//!
7//! ##### NOTE
8//!
9//! > This crate only provides a primitive LSM-tree, not a full storage engine.
10//! > You probably want to use <https://crates.io/crates/fjall> instead.
11//! > For example, it does not ship with a write-ahead log, so writes are not
12//! > persisted until manually flushing the memtable.
13//!
14//! ##### About
15//!
16//! This crate exports a `Tree` that supports a subset of the `BTreeMap` API.
17//!
18//! LSM-trees are an alternative to B-trees to persist a sorted list of items (e.g. a database table)
19//! on disk and perform fast lookup queries.
20//! Instead of updating a disk-based data structure in-place,
21//! deltas (inserts and deletes) are added into an in-memory write buffer (`Memtable`).
22//! Data is then flushed to disk segments, as the write buffer reaches some threshold.
23//!
24//! Amassing many segments on disk will degrade read performance and waste disk space usage, so segments
25//! can be periodically merged into larger segments in a process called `Compaction`.
26//! Different compaction strategies have different advantages and drawbacks, and should be chosen based
27//! on the workload characteristics.
28//!
29//! Because maintaining an efficient structure is deferred to the compaction process, writing to an LSMT
30//! is very fast (O(1) complexity).
31//!
32//! Keys are limited to 65536 bytes, values are limited to 2^32 bytes. As is normal with any kind of storage
33//! engine, larger keys and values have a bigger performance impact.
34//!
35//! # Example usage
36//!
37//! ```
38//! use lsm_tree::{AbstractTree, Config, Tree};
39//! #
40//! # let folder = tempfile::tempdir()?;
41//!
42//! // A tree is a single physical keyspace/index/...
43//! // and supports a BTreeMap-like API
44//! let tree = Config::new(folder).open()?;
45//!
46//! // Note compared to the BTreeMap API, operations return a Result<T>
47//! // So you can handle I/O errors if they occur
48//! tree.insert("my_key", "my_value", /* sequence number */ 0);
49//!
50//! let item = tree.get("my_key", None)?;
51//! assert_eq!(Some("my_value".as_bytes().into()), item);
52//!
53//! // Search by prefix
54//! for item in tree.prefix("prefix", None, None) {
55//! // ...
56//! }
57//!
58//! // Search by range
59//! for item in tree.range("a"..="z", None, None) {
60//! // ...
61//! }
62//!
63//! // Iterators implement DoubleEndedIterator, so you can search backwards, too!
64//! for item in tree.prefix("prefix", None, None).rev() {
65//! // ...
66//! }
67//!
68//! // Flush to secondary storage, clearing the memtable
69//! // and persisting all in-memory data.
70//! // Note, this flushes synchronously, which may not be desired
71//! tree.flush_active_memtable(0)?;
72//! assert_eq!(Some("my_value".as_bytes().into()), item);
73//!
74//! // When some disk segments have amassed, use compaction
75//! // to reduce the amount of disk segments
76//!
77//! // Choose compaction strategy based on workload
78//! use lsm_tree::compaction::Leveled;
79//! # use std::sync::Arc;
80//!
81//! let strategy = Leveled::default();
82//!
83//! let version_gc_threshold = 0;
84//! tree.compact(Arc::new(strategy), version_gc_threshold)?;
85//!
86//! assert_eq!(Some("my_value".as_bytes().into()), item);
87//! #
88//! # Ok::<(), lsm_tree::Error>(())
89//! ```
90
91#![doc(html_logo_url = "https://raw.githubusercontent.com/fjall-rs/lsm-tree/main/logo.png")]
92#![doc(html_favicon_url = "https://raw.githubusercontent.com/fjall-rs/lsm-tree/main/logo.png")]
93#![deny(unsafe_code)]
94#![deny(clippy::all, missing_docs, clippy::cargo)]
95#![deny(clippy::unwrap_used)]
96#![deny(clippy::indexing_slicing)]
97#![warn(clippy::pedantic, clippy::nursery)]
98#![warn(clippy::expect_used)]
99#![allow(clippy::missing_const_for_fn)]
100#![warn(clippy::multiple_crate_versions)]
101#![allow(clippy::option_if_let_else)]
102#![warn(clippy::needless_lifetimes)]
103
104pub(crate) type HashMap<K, V> = std::collections::HashMap<K, V, xxhash_rust::xxh3::Xxh3Builder>;
105pub(crate) type HashSet<K> = std::collections::HashSet<K, xxhash_rust::xxh3::Xxh3Builder>;
106
107#[allow(unused)]
108macro_rules! set {
109 ($($x:expr),+ $(,)?) => {
110 [$($x),+].into_iter().collect::<HashSet<_>>()
111 }
112}
113
114macro_rules! fail_iter {
115 ($e:expr) => {
116 match $e {
117 Ok(v) => v,
118 Err(e) => return Some(Err(e)),
119 }
120 };
121}
122
123mod any_tree;
124
125mod r#abstract;
126
127#[doc(hidden)]
128pub mod binary_search;
129
130#[doc(hidden)]
131pub mod blob_tree;
132
133mod cache;
134
135#[doc(hidden)]
136pub mod bloom;
137
138pub mod compaction;
139mod config;
140
141#[doc(hidden)]
142pub mod descriptor_table;
143
144mod error;
145// mod export;
146
147#[doc(hidden)]
148pub mod file;
149
150mod key;
151
152#[doc(hidden)]
153pub mod level_manifest;
154
155mod level_reader;
156mod level_scanner;
157
158mod manifest;
159mod memtable;
160
161#[doc(hidden)]
162pub mod merge;
163
164mod multi_reader;
165
166#[doc(hidden)]
167pub mod mvcc_stream;
168
169mod path;
170
171#[doc(hidden)]
172pub mod range;
173
174#[doc(hidden)]
175pub mod segment;
176
177mod seqno;
178mod snapshot;
179mod windows;
180
181#[doc(hidden)]
182pub mod stop_signal;
183
184mod time;
185mod tree;
186mod value;
187mod version;
188
189/// KV-tuple, typically returned by an iterator
190pub type KvPair = (UserKey, UserValue);
191
192#[doc(hidden)]
193pub use value_log::KeyRange;
194
195#[doc(hidden)]
196pub mod coding {
197 pub use value_log::coding::{Decode, DecodeError, Encode, EncodeError};
198}
199
200#[doc(hidden)]
201pub use {
202 merge::BoxedIterator,
203 segment::{block::checksum::Checksum, id::GlobalSegmentId, meta::SegmentId},
204 tree::inner::TreeId,
205 value::InternalValue,
206};
207
208pub use {
209 cache::Cache,
210 coding::{DecodeError, EncodeError},
211 config::{Config, TreeType},
212 error::{Error, Result},
213 memtable::Memtable,
214 r#abstract::AbstractTree,
215 segment::{meta::CompressionType, Segment},
216 seqno::SequenceNumberCounter,
217 snapshot::Snapshot,
218 tree::Tree,
219 value::{SeqNo, UserKey, UserValue, ValueType},
220 version::Version,
221};
222
223pub use any_tree::AnyTree;
224
225pub use blob_tree::BlobTree;
226
227pub use value_log::{BlobCache, Slice};
228
229/// Blob garbage collection utilities
230pub mod gc {
231 pub use value_log::{
232 GcReport as Report, GcStrategy as Strategy, SpaceAmpStrategy, StaleThresholdStrategy,
233 };
234}