Skip to main content

lsm_tree/
lib.rs

1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5//! A K.I.S.S. implementation of log-structured merge trees (LSM-trees/LSMTs).
6//!
7//! ##### NOTE
8//!
9//! > This crate only provides a primitive LSM-tree, not a full storage engine.
10//! > You probably want to use <https://crates.io/crates/fjall> instead.
11//! > For example, it does not ship with a write-ahead log, so writes are not
12//! > persisted until manually flushing the memtable.
13//!
14//! ##### About
15//!
16//! This crate exports a `Tree` that supports a subset of the `BTreeMap` API.
17//!
18//! LSM-trees are an alternative to B-trees to persist a sorted list of items (e.g. a database table)
19//! on disk and perform fast lookup queries.
20//! Instead of updating a disk-based data structure in-place,
21//! deltas (inserts and deletes) are added into an in-memory write buffer (`Memtable`).
22//! Data is then flushed to disk-resident table files when the write buffer reaches some threshold.
23//!
24//! Amassing many tables on disk will degrade read performance and waste disk space, so tables
25//! can be periodically merged into larger tables in a process called `Compaction`.
26//! Different compaction strategies have different advantages and drawbacks, and should be chosen based
27//! on the workload characteristics.
28//!
29//! Because maintaining an efficient structure is deferred to the compaction process, writing to an LSMT
30//! is very fast (_O(1)_ complexity).
31//!
32//! Keys are limited to 65536 bytes, values are limited to 2^32 bytes. As is normal with any kind of storage
33//! engine, larger keys and values have a bigger performance impact.
34
35#![doc(html_logo_url = "https://raw.githubusercontent.com/fjall-rs/lsm-tree/main/logo.png")]
36#![doc(html_favicon_url = "https://raw.githubusercontent.com/fjall-rs/lsm-tree/main/logo.png")]
37#![deny(clippy::all, missing_docs, clippy::cargo)]
38#![deny(clippy::unwrap_used)]
39#![deny(clippy::indexing_slicing)]
40#![warn(clippy::pedantic, clippy::nursery)]
41#![warn(clippy::expect_used)]
42#![allow(clippy::missing_const_for_fn)]
43#![warn(clippy::multiple_crate_versions)]
44#![allow(clippy::option_if_let_else)]
45#![warn(clippy::redundant_feature_names)]
46#![cfg_attr(coverage_nightly, feature(coverage_attribute))]
47
48#[doc(hidden)]
49pub type HashMap<K, V> = std::collections::HashMap<K, V, rustc_hash::FxBuildHasher>;
50
51pub(crate) type HashSet<K> = std::collections::HashSet<K, rustc_hash::FxBuildHasher>;
52
53macro_rules! fail_iter {
54    ($e:expr) => {
55        match $e {
56            Ok(v) => v,
57            Err(e) => return Some(Err(e.into())),
58        }
59    };
60}
61
62macro_rules! unwrap {
63    ($x:expr) => {{
64        $x.expect("should read")
65    }};
66}
67
68pub(crate) use unwrap;
69
70mod any_tree;
71
72mod r#abstract;
73
74#[doc(hidden)]
75pub mod blob_tree;
76
77#[doc(hidden)]
78mod cache;
79
80#[doc(hidden)]
81pub mod checksum;
82
83#[doc(hidden)]
84pub mod coding;
85
86pub mod compaction;
87mod compression;
88
89/// Configuration
90pub mod config;
91
92#[doc(hidden)]
93pub mod descriptor_table;
94
95#[doc(hidden)]
96pub mod file_accessor;
97
98mod double_ended_peekable;
99mod error;
100
101#[doc(hidden)]
102pub mod file;
103
104mod hash;
105mod ingestion;
106mod iter_guard;
107mod key;
108mod key_range;
109mod manifest;
110mod memtable;
111mod run_reader;
112mod run_scanner;
113
114#[doc(hidden)]
115pub mod merge;
116
117#[cfg(feature = "metrics")]
118pub(crate) mod metrics;
119
120// mod multi_reader;
121
122#[doc(hidden)]
123pub mod mvcc_stream;
124
125mod path;
126
127#[doc(hidden)]
128pub mod range;
129
130#[doc(hidden)]
131pub mod table;
132
133mod seqno;
134mod slice;
135mod slice_windows;
136
137#[doc(hidden)]
138pub mod stop_signal;
139
140mod format_version;
141mod time;
142mod tree;
143
144/// Utility functions
145pub mod util;
146
147mod value;
148mod value_type;
149mod version;
150mod vlog;
151
152/// User defined key (byte array)
153pub type UserKey = Slice;
154
155/// User defined data (byte array)
156pub type UserValue = Slice;
157
158/// KV-tuple (key + value)
159pub type KvPair = (UserKey, UserValue);
160
161#[doc(hidden)]
162pub use {
163    blob_tree::{handle::BlobIndirection, Guard as BlobGuard},
164    checksum::Checksum,
165    iter_guard::IterGuardImpl,
166    key_range::KeyRange,
167    merge::BoxedIterator,
168    slice::Builder,
169    table::{GlobalTableId, Table, TableId},
170    tree::inner::TreeId,
171    tree::Guard as StandardGuard,
172    value::InternalValue,
173};
174
175pub use {
176    any_tree::AnyTree,
177    blob_tree::BlobTree,
178    cache::Cache,
179    compression::CompressionType,
180    config::{Config, KvSeparationOptions, TreeType},
181    descriptor_table::DescriptorTable,
182    error::{Error, Result},
183    format_version::FormatVersion,
184    ingestion::AnyIngestion,
185    iter_guard::IterGuard as Guard,
186    memtable::{Memtable, MemtableId},
187    r#abstract::AbstractTree,
188    seqno::SequenceNumberCounter,
189    slice::Slice,
190    tree::Tree,
191    value::SeqNo,
192    value_type::ValueType,
193    vlog::BlobFile,
194};
195
196#[cfg(feature = "metrics")]
197pub use metrics::Metrics;
198
199#[doc(hidden)]
200#[must_use]
201#[allow(missing_docs, clippy::missing_errors_doc, clippy::unwrap_used)]
202pub fn get_tmp_folder() -> tempfile::TempDir {
203    if let Ok(p) = std::env::var("LSMT_TMP_FOLDER") {
204        tempfile::tempdir_in(p)
205    } else {
206        tempfile::tempdir()
207    }
208    .unwrap()
209}