lsm_tree/
lib.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
// Copyright (c) 2024-present, fjall-rs
// This source code is licensed under both the Apache 2.0 and MIT License
// (found in the LICENSE-* files in the repository)

//! A K.I.S.S. implementation of log-structured merge trees (LSM-trees/LSMTs).
//!
//! ##### NOTE
//!
//! > This crate only provides a primitive LSM-tree, not a full storage engine.
//! > You probably want to use <https://crates.io/crates/fjall> instead.
//! > For example, it does not ship with a write-ahead log, so writes are not
//! > persisted until manually flushing the memtable.
//!
//! ##### About
//!
//! This crate exports a `Tree` that supports a subset of the `BTreeMap` API.
//!
//! LSM-trees are an alternative to B-trees to persist a sorted list of items (e.g. a database table)
//! on disk and perform fast lookup queries.
//! Instead of updating a disk-based data structure in-place,
//! deltas (inserts and deletes) are added into an in-memory write buffer (`Memtable`).
//! Data is then flushed to disk segments, as the write buffer reaches some threshold.
//!
//! Amassing many segments on disk will degrade read performance and waste disk space usage, so segments
//! can be periodically merged into larger segments in a process called `Compaction`.
//! Different compaction strategies have different advantages and drawbacks, and should be chosen based
//! on the workload characteristics.
//!
//! Because maintaining an efficient structure is deferred to the compaction process, writing to an LSMT
//! is very fast (O(1) complexity).
//!
//! Keys are limited to 65536 bytes, values are limited to 2^32 bytes. As is normal with any kind of storage
//! engine, larger keys and values have a bigger performance impact.
//!
//! # Example usage
//!
//! ```
//! use lsm_tree::{AbstractTree, Config, Tree};
//! #
//! # let folder = tempfile::tempdir()?;
//!
//! // A tree is a single physical keyspace/index/...
//! // and supports a BTreeMap-like API
//! let tree = Config::new(folder).open()?;
//!
//! // Note compared to the BTreeMap API, operations return a Result<T>
//! // So you can handle I/O errors if they occur
//! tree.insert("my_key", "my_value", /* sequence number */ 0);
//!
//! let item = tree.get("my_key")?;
//! assert_eq!(Some("my_value".as_bytes().into()), item);
//!
//! // Search by prefix
//! for item in tree.prefix("prefix") {
//!   // ...
//! }
//!
//! // Search by range
//! for item in tree.range("a"..="z") {
//!   // ...
//! }
//!
//! // Iterators implement DoubleEndedIterator, so you can search backwards, too!
//! for item in tree.prefix("prefix").rev() {
//!   // ...
//! }
//!
//! // Flush to secondary storage, clearing the memtable
//! // and persisting all in-memory data.
//! // Note, this flushes synchronously, which may not be desired
//! tree.flush_active_memtable(0)?;
//! assert_eq!(Some("my_value".as_bytes().into()), item);
//!
//! // When some disk segments have amassed, use compaction
//! // to reduce the amount of disk segments
//!
//! // Choose compaction strategy based on workload
//! use lsm_tree::compaction::Leveled;
//! # use std::sync::Arc;
//!
//! let strategy = Leveled::default();
//!
//! let version_gc_threshold = 0;
//! tree.compact(Arc::new(strategy), version_gc_threshold)?;
//!
//! assert_eq!(Some("my_value".as_bytes().into()), item);
//! #
//! # Ok::<(), lsm_tree::Error>(())
//! ```

#![doc(html_logo_url = "https://raw.githubusercontent.com/fjall-rs/lsm-tree/main/logo.png")]
#![doc(html_favicon_url = "https://raw.githubusercontent.com/fjall-rs/lsm-tree/main/logo.png")]
#![forbid(unsafe_code)]
#![deny(clippy::all, missing_docs, clippy::cargo)]
#![deny(clippy::unwrap_used)]
#![deny(clippy::indexing_slicing)]
#![warn(clippy::pedantic, clippy::nursery)]
#![warn(clippy::expect_used)]
#![allow(clippy::missing_const_for_fn)]
#![warn(clippy::multiple_crate_versions)]
#![allow(clippy::option_if_let_else)]

pub(crate) type HashMap<K, V> = std::collections::HashMap<K, V, xxhash_rust::xxh3::Xxh3Builder>;
pub(crate) type HashSet<K> = std::collections::HashSet<K, xxhash_rust::xxh3::Xxh3Builder>;

macro_rules! fail_iter {
    ($e:expr) => {
        match $e {
            Ok(v) => v,
            Err(e) => return Some(Err(e)),
        }
    };
}

mod any_tree;

mod r#abstract;

#[doc(hidden)]
pub mod blob_tree;

mod block_cache;

#[doc(hidden)]
#[cfg(feature = "bloom")]
pub mod bloom;

#[doc(hidden)]
pub mod coding;
pub mod compaction;
mod config;

#[doc(hidden)]
pub mod descriptor_table;

mod either;
mod error;
// mod export;

#[doc(hidden)]
pub mod file;

mod key;
mod key_range;

#[doc(hidden)]
pub mod level_manifest;

mod manifest;
mod memtable;

#[doc(hidden)]
pub mod merge;

mod mvcc_stream;
mod path;

#[doc(hidden)]
pub mod range;

#[doc(hidden)]
pub mod segment;

mod seqno;
mod snapshot;

#[doc(hidden)]
pub mod stop_signal;

mod time;
mod tree;
mod value;
mod version;

/// KV-tuple, typically returned by an iterator
pub type KvPair = (UserKey, UserValue);

#[doc(hidden)]
pub use {
    merge::BoxedIterator,
    segment::{block::checksum::Checksum, id::GlobalSegmentId, meta::SegmentId},
    tree::inner::TreeId,
    value::InternalValue,
};

pub use {
    block_cache::BlockCache,
    coding::{DecodeError, EncodeError},
    config::{Config, TreeType},
    error::{Error, Result},
    memtable::Memtable,
    r#abstract::AbstractTree,
    segment::{meta::CompressionType, Segment},
    seqno::SequenceNumberCounter,
    snapshot::Snapshot,
    tree::Tree,
    value::{SeqNo, UserKey, UserValue, ValueType},
    version::Version,
};

pub use any_tree::AnyTree;

pub use blob_tree::BlobTree;

pub use value_log::{
    BlobCache, GcReport, GcStrategy, Slice, SpaceAmpStrategy, StaleThresholdStrategy,
};