lsm_tree/
lib.rs

1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5//! A K.I.S.S. implementation of log-structured merge trees (LSM-trees/LSMTs).
6//!
7//! ##### NOTE
8//!
9//! > This crate only provides a primitive LSM-tree, not a full storage engine.
10//! > You probably want to use <https://crates.io/crates/fjall> instead.
11//! > For example, it does not ship with a write-ahead log, so writes are not
12//! > persisted until manually flushing the memtable.
13//!
14//! ##### About
15//!
16//! This crate exports a `Tree` that supports a subset of the `BTreeMap` API.
17//!
18//! LSM-trees are an alternative to B-trees to persist a sorted list of items (e.g. a database table)
19//! on disk and perform fast lookup queries.
20//! Instead of updating a disk-based data structure in-place,
21//! deltas (inserts and deletes) are added into an in-memory write buffer (`Memtable`).
22//! Data is then flushed to disk-resident table files when the write buffer reaches some threshold.
23//!
24//! Amassing many tables on disk will degrade read performance and waste disk space, so tables
25//! can be periodically merged into larger tables in a process called `Compaction`.
26//! Different compaction strategies have different advantages and drawbacks, and should be chosen based
27//! on the workload characteristics.
28//!
29//! Because maintaining an efficient structure is deferred to the compaction process, writing to an LSMT
30//! is very fast (_O(1)_ complexity).
31//!
32//! Keys are limited to 65536 bytes, values are limited to 2^32 bytes. As is normal with any kind of storage
33//! engine, larger keys and values have a bigger performance impact.
34
35#![doc(html_logo_url = "https://raw.githubusercontent.com/fjall-rs/lsm-tree/main/logo.png")]
36#![doc(html_favicon_url = "https://raw.githubusercontent.com/fjall-rs/lsm-tree/main/logo.png")]
37#![deny(clippy::all, missing_docs, clippy::cargo)]
38#![deny(clippy::unwrap_used)]
39#![deny(clippy::indexing_slicing)]
40#![warn(clippy::pedantic, clippy::nursery)]
41#![warn(clippy::expect_used)]
42#![allow(clippy::missing_const_for_fn)]
43#![warn(clippy::multiple_crate_versions)]
44#![allow(clippy::option_if_let_else)]
45#![warn(clippy::redundant_feature_names)]
46// the bytes feature uses unsafe to improve from_reader performance; so we need to relax this lint
47// #![cfg_attr(feature = "bytes", deny(unsafe_code))]
48// #![cfg_attr(not(feature = "bytes"), forbid(unsafe_code))]
49
50#[doc(hidden)]
51pub type HashMap<K, V> = std::collections::HashMap<K, V, rustc_hash::FxBuildHasher>;
52
53pub(crate) type HashSet<K> = std::collections::HashSet<K, rustc_hash::FxBuildHasher>;
54
55macro_rules! fail_iter {
56    ($e:expr) => {
57        match $e {
58            Ok(v) => v,
59            Err(e) => return Some(Err(e.into())),
60        }
61    };
62}
63
64macro_rules! unwrap {
65    ($x:expr) => {{
66        $x.expect("should read")
67    }};
68}
69
70pub(crate) use unwrap;
71
72mod any_tree;
73
74mod r#abstract;
75
76#[doc(hidden)]
77pub mod blob_tree;
78
79#[doc(hidden)]
80mod cache;
81
82#[doc(hidden)]
83pub mod checksum;
84
85#[doc(hidden)]
86pub mod coding;
87
88pub mod compaction;
89mod compression;
90
91/// Configuration
92pub mod config;
93
94#[doc(hidden)]
95pub mod descriptor_table;
96
97mod double_ended_peekable;
98mod error;
99
100#[doc(hidden)]
101pub mod file;
102
103mod hash;
104mod ingestion;
105mod iter_guard;
106mod key;
107mod key_range;
108mod manifest;
109mod memtable;
110mod run_reader;
111mod run_scanner;
112
113#[doc(hidden)]
114pub mod merge;
115
116#[cfg(feature = "metrics")]
117pub(crate) mod metrics;
118
119// mod multi_reader;
120
121#[doc(hidden)]
122pub mod mvcc_stream;
123
124mod path;
125
126#[doc(hidden)]
127pub mod range;
128
129#[doc(hidden)]
130pub mod table;
131
132mod seqno;
133mod slice;
134mod slice_windows;
135
136#[doc(hidden)]
137pub mod stop_signal;
138
139mod format_version;
140mod time;
141mod tree;
142
143/// Utility functions
144pub mod util;
145
146mod value;
147mod value_type;
148mod version;
149mod vlog;
150
151/// User defined key (byte array)
152pub type UserKey = Slice;
153
154/// User defined data (byte array)
155pub type UserValue = Slice;
156
157/// KV-tuple (key + value)
158pub type KvPair = (UserKey, UserValue);
159
160#[doc(hidden)]
161pub use {
162    blob_tree::{handle::BlobIndirection, Guard as BlobGuard},
163    checksum::Checksum,
164    iter_guard::IterGuardImpl,
165    key_range::KeyRange,
166    merge::BoxedIterator,
167    slice::Builder,
168    table::{GlobalTableId, Table, TableId},
169    tree::inner::TreeId,
170    tree::Guard as StandardGuard,
171    value::InternalValue,
172};
173
174pub use {
175    any_tree::AnyTree,
176    blob_tree::BlobTree,
177    cache::Cache,
178    compression::CompressionType,
179    config::{Config, KvSeparationOptions, TreeType},
180    descriptor_table::DescriptorTable,
181    error::{Error, Result},
182    format_version::FormatVersion,
183    ingestion::AnyIngestion,
184    iter_guard::IterGuard as Guard,
185    memtable::Memtable,
186    r#abstract::AbstractTree,
187    seqno::SequenceNumberCounter,
188    slice::Slice,
189    tree::Tree,
190    value::SeqNo,
191    value_type::ValueType,
192    vlog::BlobFile,
193};
194
195#[cfg(feature = "metrics")]
196pub use metrics::Metrics;
197
198#[doc(hidden)]
199#[must_use]
200#[allow(missing_docs, clippy::missing_errors_doc, clippy::unwrap_used)]
201pub fn get_tmp_folder() -> tempfile::TempDir {
202    if let Ok(p) = std::env::var("LSMT_TMP_FOLDER") {
203        tempfile::tempdir_in(p)
204    } else {
205        tempfile::tempdir()
206    }
207    .unwrap()
208}