Skip to main content

lsm_tree/
lib.rs

1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5//! A K.I.S.S. implementation of log-structured merge trees (LSM-trees/LSMTs).
6//!
7//! ##### NOTE
8//!
9//! > This crate only provides a primitive LSM-tree, not a full storage engine.
10//! > You probably want to use <https://crates.io/crates/fjall> instead.
11//! > For example, it does not ship with a write-ahead log, so writes are not
12//! > persisted until manually flushing the memtable.
13//!
14//! ##### About
15//!
16//! This crate exports a `Tree` that supports a subset of the `BTreeMap` API.
17//!
18//! LSM-trees are an alternative to B-trees to persist a sorted list of items (e.g. a database table)
19//! on disk and perform fast lookup queries.
20//! Instead of updating a disk-based data structure in-place,
21//! deltas (inserts and deletes) are added into an in-memory write buffer (`Memtable`).
22//! Data is then flushed to disk-resident table files when the write buffer reaches some threshold.
23//!
24//! Amassing many tables on disk will degrade read performance and waste disk space, so tables
25//! can be periodically merged into larger tables in a process called `Compaction`.
26//! Different compaction strategies have different advantages and drawbacks, and should be chosen based
27//! on the workload characteristics.
28//!
29//! Because maintaining an efficient structure is deferred to the compaction process, writing to an LSMT
30//! is very fast (_O(1)_ complexity).
31//!
32//! Keys are limited to 65536 bytes, values are limited to 2^32 bytes. As is normal with any kind of storage
33//! engine, larger keys and values have a bigger performance impact.
34
35#![doc(
36    html_logo_url = "https://raw.githubusercontent.com/structured-world/coordinode-lsm-tree/main/logo.png"
37)]
38#![doc(
39    html_favicon_url = "https://raw.githubusercontent.com/structured-world/coordinode-lsm-tree/main/logo.png"
40)]
41#![deny(clippy::all, missing_docs, clippy::cargo)]
42#![deny(clippy::unwrap_used)]
43#![deny(clippy::indexing_slicing)]
44#![warn(clippy::pedantic, clippy::nursery)]
45#![warn(clippy::expect_used)]
46#![allow(clippy::missing_const_for_fn)]
47#![warn(clippy::multiple_crate_versions)]
48#![allow(clippy::option_if_let_else)]
49#![warn(clippy::redundant_feature_names)]
50#![cfg_attr(coverage_nightly, feature(coverage_attribute))]
51
52#[doc(hidden)]
53pub type HashMap<K, V> = std::collections::HashMap<K, V, rustc_hash::FxBuildHasher>;
54
55pub(crate) type HashSet<K> = std::collections::HashSet<K, rustc_hash::FxBuildHasher>;
56
57macro_rules! fail_iter {
58    ($e:expr) => {
59        match $e {
60            Ok(v) => v,
61            Err(e) => return Some(Err(e.into())),
62        }
63    };
64}
65
66macro_rules! unwrap {
67    ($x:expr) => {{ $x.expect("should read") }};
68}
69
70mod any_tree;
71
72mod abstract_tree;
73
74#[doc(hidden)]
75pub mod blob_tree;
76
77mod comparator;
78
79#[doc(hidden)]
80mod cache;
81
82#[doc(hidden)]
83pub mod checksum;
84
85#[doc(hidden)]
86pub mod coding;
87
88pub mod compaction;
89#[doc(hidden)]
90pub mod compression;
91
92/// Block-level encryption at rest.
93pub mod encryption;
94
95/// Configuration
96pub mod config;
97
98#[doc(hidden)]
99pub mod descriptor_table;
100
101#[doc(hidden)]
102pub mod file_accessor;
103
104mod double_ended_peekable;
105mod error;
106
107#[doc(hidden)]
108pub mod file;
109
110/// Pluggable filesystem abstraction for I/O backends.
111pub mod fs;
112
113mod hash;
114mod heap;
115mod ingestion;
116mod iter_guard;
117mod key;
118mod key_range;
119mod manifest;
120mod memtable;
121mod merge_operator;
122mod run_reader;
123mod run_scanner;
124
125#[doc(hidden)]
126pub mod merge;
127
128#[cfg(feature = "metrics")]
129pub(crate) mod metrics;
130
131// mod multi_reader;
132
133#[doc(hidden)]
134pub mod mvcc_stream;
135
136mod path;
137mod pinnable_slice;
138mod prefix;
139
140#[doc(hidden)]
141pub mod range;
142
143pub(crate) mod active_tombstone_set;
144pub(crate) mod range_tombstone;
145pub(crate) mod range_tombstone_filter;
146
147#[doc(hidden)]
148pub mod table;
149
150mod seqno;
151mod slice;
152mod slice_windows;
153
154#[doc(hidden)]
155pub mod stop_signal;
156
157mod format_version;
158mod time;
159mod tree;
160
161/// Utility functions
162pub mod util;
163
164mod value;
165mod value_type;
166mod write_batch;
167
168/// Integrity verification for SST and blob files.
169pub mod verify;
170
171mod version;
172mod vlog;
173
174/// User defined key (byte array)
175pub type UserKey = Slice;
176
177/// User defined data (byte array)
178pub type UserValue = Slice;
179
180/// KV-tuple (key + value)
181pub type KvPair = (UserKey, UserValue);
182
183// The `#[doc(hidden)]` block below re-exports crate internals that are reachable
184// at the crate root for benchmarks and integration tests, but are NOT part of the
185// public API contract — they carry no semver guarantee and may be renamed, moved,
186// or removed without a major version bump. External callers that import these
187// hidden items do so at their own risk. `cargo doc` excludes them from generated
188// rustdoc output; only intra-crate test/bench code is expected to use them.
189#[doc(hidden)]
190pub use {
191    blob_tree::{Guard as BlobGuard, handle::BlobIndirection},
192    checksum::Checksum,
193    iter_guard::IterGuardImpl,
194    key_range::KeyRange,
195    merge::BoxedIterator,
196    slice::Builder,
197    // Re-exported for `benches/lsp.rs` only — see hidden-block contract above.
198    table::util::longest_shared_prefix_length,
199    table::{GlobalTableId, Table, TableId},
200    tree::Guard as StandardGuard,
201    tree::inner::TreeId,
202    value::InternalValue,
203};
204
205pub use encryption::EncryptionProvider;
206
207#[cfg(feature = "encryption")]
208pub use encryption::Aes256GcmProvider;
209
210pub use pinnable_slice::PinnableSlice;
211pub use write_batch::WriteBatch;
212
213pub use {
214    abstract_tree::AbstractTree,
215    any_tree::AnyTree,
216    blob_tree::BlobTree,
217    cache::Cache,
218    comparator::{DefaultUserComparator, SharedComparator, UserComparator},
219    compression::CompressionType,
220    config::{Config, KvSeparationOptions, TreeType},
221    descriptor_table::DescriptorTable,
222    error::{Error, Result},
223    format_version::FormatVersion,
224    ingestion::AnyIngestion,
225    iter_guard::IterGuard as Guard,
226    memtable::{Memtable, MemtableId},
227    merge_operator::MergeOperator,
228    prefix::PrefixExtractor,
229    seqno::{
230        MAX_SEQNO, SequenceNumberCounter, SequenceNumberGenerator, SharedSequenceNumberGenerator,
231    },
232    slice::Slice,
233    tree::Tree,
234    value::SeqNo,
235    value_type::ValueType,
236    vlog::BlobFile,
237};
238
239#[cfg(zstd_any)]
240pub use compression::ZstdDictionary;
241
242#[cfg(feature = "metrics")]
243pub use metrics::Metrics;
244
245#[doc(hidden)]
246#[must_use]
247#[allow(missing_docs, clippy::missing_errors_doc, clippy::unwrap_used)]
248pub fn get_tmp_folder() -> tempfile::TempDir {
249    if let Ok(p) = std::env::var("LSMT_TMP_FOLDER") {
250        tempfile::tempdir_in(p)
251    } else {
252        tempfile::tempdir()
253    }
254    .unwrap()
255}