sled/
lib.rs

1//! `sled` is a high-performance embedded database with
2//! an API that is similar to a `BTreeMap<[u8], [u8]>`,
3//! but with several additional capabilities for
4//! assisting creators of stateful systems.
5//!
6//! It is fully thread-safe, and all operations are
7//! atomic. Multiple `Tree`s with isolated keyspaces
8//! are supported with the
9//! [`Db::open_tree`](struct.Db.html#method.open_tree) method.
10//!
11//! ACID transactions involving reads and writes to
12//! multiple items are supported with the
13//! [`Tree::transaction`](struct.Tree.html#method.transaction)
14//! method. Transactions may also operate over
15//! multiple `Tree`s (see
16//! [`Tree::transaction`](struct.Tree.html#method.transaction)
17//! docs for more info).
18//!
19//! Users may also subscribe to updates on individual
20//! `Tree`s by using the
21//! [`Tree::watch_prefix`](struct.Tree.html#method.watch_prefix)
22//! method, which returns a blocking `Iterator` over
23//! updates to keys that begin with the provided
24//! prefix. You may supply an empty prefix to subscribe
25//! to everything.
26//!
27//! [Merge operators](https://github.com/spacejam/sled/wiki/merge-operators)
28//! (aka read-modify-write operators) are supported. A
29//! merge operator is a function that specifies
30//! how new data can be merged into an existing value
31//! without requiring both a read and a write.
32//! Using the
33//! [`Tree::merge`](struct.Tree.html#method.merge)
34//! method, you may "push" data to a `Tree` value
35//! and have the provided merge operator combine
36//! it with the existing value, if there was one.
37//! They are set on a per-`Tree` basis, and essentially
38//! allow any sort of data structure to be built
39//! using merges as an atomic high-level operation.
40//!
41//! `sled` is built by experienced database engineers
42//! who think users should spend less time tuning and
43//! working against high-friction APIs. Expect
44//! significant ergonomic and performance improvements
45//! over time. Most surprises are bugs, so please
46//! [let us know](mailto:t@jujit.su?subject=sled%20sucks!!!) if something
47//! is high friction.
48//!
49//! # Examples
50//!
51//! ```
52//! # let _ = std::fs::remove_dir_all("my_db");
53//! let db: sled::Db = sled::open("my_db").unwrap();
54//!
55//! // insert and get
56//! db.insert(b"yo!", b"v1");
57//! assert_eq!(&db.get(b"yo!").unwrap().unwrap(), b"v1");
58//!
59//! // Atomic compare-and-swap.
60//! db.compare_and_swap(
61//!     b"yo!",      // key
62//!     Some(b"v1"), // old value, None for not present
63//!     Some(b"v2"), // new value, None for delete
64//! )
65//! .unwrap();
66//!
67//! // Iterates over key-value pairs, starting at the given key.
68//! let scan_key: &[u8] = b"a non-present key before yo!";
69//! let mut iter = db.range(scan_key..);
70//! assert_eq!(&iter.next().unwrap().unwrap().0, b"yo!");
71//! assert_eq!(iter.next(), None);
72//!
73//! db.remove(b"yo!");
74//! assert_eq!(db.get(b"yo!"), Ok(None));
75//!
76//! let other_tree: sled::Tree = db.open_tree(b"cool db facts").unwrap();
77//! other_tree.insert(
78//!     b"k1",
79//!     &b"a Db acts like a Tree due to implementing Deref<Target = Tree>"[..]
80//! ).unwrap();
81//! # let _ = std::fs::remove_dir_all("my_db");
82//! ```
83#![doc(
84    html_logo_url = "https://raw.githubusercontent.com/spacejam/sled/master/art/tree_face_anti-transphobia.png"
85)]
86#![deny(
87    missing_docs,
88    future_incompatible,
89    nonstandard_style,
90    rust_2018_idioms,
91    missing_copy_implementations,
92    trivial_casts,
93    trivial_numeric_casts,
94    unsafe_code,
95    unused_qualifications
96)]
97#![deny(
98    // over time, consider enabling the commented-out lints below
99    clippy::cast_lossless,
100    clippy::cast_possible_truncation,
101    clippy::cast_possible_wrap,
102    clippy::cast_precision_loss,
103    clippy::cast_sign_loss,
104    clippy::checked_conversions,
105    clippy::decimal_literal_representation,
106    clippy::doc_markdown,
107    // clippy::else_if_without_else,
108    clippy::empty_enum,
109    clippy::explicit_into_iter_loop,
110    clippy::explicit_iter_loop,
111    clippy::expl_impl_clone_on_copy,
112    clippy::fallible_impl_from,
113    clippy::filter_map,
114    clippy::filter_map_next,
115    clippy::find_map,
116    clippy::float_arithmetic,
117    clippy::get_unwrap,
118    clippy::if_not_else,
119    // clippy::indexing_slicing,
120    clippy::inline_always,
121    //clippy::integer_arithmetic,
122    clippy::invalid_upcast_comparisons,
123    clippy::items_after_statements,
124    clippy::map_entry,
125    clippy::map_flatten,
126    clippy::match_same_arms,
127    clippy::maybe_infinite_iter,
128    clippy::mem_forget,
129    // clippy::missing_const_for_fn,
130    // clippy::missing_docs_in_private_items,
131    clippy::module_name_repetitions,
132    clippy::multiple_inherent_impl,
133    clippy::mut_mut,
134    clippy::needless_borrow,
135    clippy::needless_continue,
136    clippy::needless_pass_by_value,
137    clippy::non_ascii_literal,
138    clippy::path_buf_push_overwrite,
139    clippy::print_stdout,
140    clippy::pub_enum_variant_names,
141    clippy::redundant_closure_for_method_calls,
142    clippy::shadow_reuse,
143    clippy::shadow_same,
144    clippy::shadow_unrelated,
145    clippy::single_match_else,
146    clippy::string_add,
147    clippy::string_add_assign,
148    clippy::type_repetition_in_bounds,
149    clippy::unicode_not_nfc,
150    // clippy::unimplemented,
151    clippy::unseparated_literal_suffix,
152    clippy::used_underscore_binding,
153    clippy::wildcard_dependencies,
154    // clippy::wildcard_enum_match_arm,
155    clippy::wrong_pub_self_convention,
156)]
157#![warn(clippy::multiple_crate_versions)]
158#![allow(clippy::mem_replace_with_default)] // Not using std::mem::take() due to MSRV of 1.37 (intro'd in 1.40)
159#![allow(clippy::match_like_matches_macro)] // Not using std::matches! due to MSRV of 1.37 (intro'd in 1.42)
160
161macro_rules! io_fail {
162    ($config:expr, $e:expr) => {
163        #[cfg(feature = "failpoints")]
164        {
165            debug_delay();
166            if fail::is_active($e) {
167                $config.set_global_error(Error::FailPoint);
168                return Err(Error::FailPoint).into();
169            }
170        }
171    };
172}
173
174macro_rules! testing_assert {
175    ($($e:expr),*) => {
176        #[cfg(feature = "lock_free_delays")]
177        assert!($($e),*)
178    };
179}
180
181mod arc;
182mod atomic_shim;
183mod batch;
184mod binary_search;
185mod concurrency_control;
186mod config;
187mod context;
188mod db;
189mod dll;
190mod fastcmp;
191mod fastlock;
192mod histogram;
193mod iter;
194mod ivec;
195mod lazy;
196mod lru;
197mod meta;
198mod metrics;
199mod node;
200mod oneshot;
201mod pagecache;
202mod prefix;
203mod result;
204mod serialization;
205mod stack;
206mod subscriber;
207mod sys_limits;
208pub mod transaction;
209mod tree;
210
211/// Functionality for conditionally triggering failpoints under test.
212#[cfg(feature = "failpoints")]
213pub mod fail;
214
215#[cfg(feature = "docs")]
216pub mod doc;
217
218#[cfg(any(
219    miri,
220    not(any(
221        windows,
222        target_os = "linux",
223        target_os = "macos",
224        target_os = "dragonfly",
225        target_os = "freebsd",
226        target_os = "openbsd",
227        target_os = "netbsd",
228    ))
229))]
230mod threadpool {
231    use super::{OneShot, Result};
232
233    /// Just execute a task without involving threads.
234    pub fn spawn<F, R>(work: F) -> Result<OneShot<R>>
235    where
236        F: FnOnce() -> R + Send + 'static,
237        R: Send + 'static,
238    {
239        let (promise_filler, promise) = OneShot::pair();
240        promise_filler.fill((work)());
241        Ok(promise)
242    }
243}
244
245#[cfg(all(
246    not(miri),
247    any(
248        windows,
249        target_os = "linux",
250        target_os = "macos",
251        target_os = "dragonfly",
252        target_os = "freebsd",
253        target_os = "openbsd",
254        target_os = "netbsd",
255    )
256))]
257mod threadpool;
258
259#[cfg(all(
260    not(miri),
261    any(
262        windows,
263        target_os = "linux",
264        target_os = "macos",
265        target_os = "dragonfly",
266        target_os = "freebsd",
267        target_os = "openbsd",
268        target_os = "netbsd",
269    )
270))]
271mod flusher;
272
273#[cfg(feature = "event_log")]
274/// The event log helps debug concurrency issues.
275pub mod event_log;
276
277#[cfg(feature = "measure_allocs")]
278mod measure_allocs;
279
280#[cfg(feature = "measure_allocs")]
281#[global_allocator]
282static ALLOCATOR: measure_allocs::TrackingAllocator =
283    measure_allocs::TrackingAllocator;
284
285const DEFAULT_TREE_ID: &[u8] = b"__sled__default";
286
287/// hidden re-export of items for testing purposes
288#[doc(hidden)]
289pub use {
290    self::{
291        config::RunningConfig,
292        lazy::Lazy,
293        pagecache::{
294            constants::{
295                MAX_MSG_HEADER_LEN, MAX_SPACE_AMPLIFICATION,
296                MINIMUM_ITEMS_PER_SEGMENT, SEG_HEADER_LEN,
297            },
298            BatchManifest, DiskPtr, Log, LogKind, LogOffset, LogRead, Lsn,
299            PageCache, PageId,
300        },
301        serialization::Serialize,
302    },
303    crossbeam_epoch::{
304        pin as crossbeam_pin, Atomic, Guard as CrossbeamGuard, Owned, Shared,
305    },
306};
307
308pub use self::{
309    batch::Batch,
310    config::{Config, Mode},
311    db::{open, Db},
312    iter::Iter,
313    ivec::IVec,
314    result::{Error, Result},
315    subscriber::{Event, Subscriber},
316    transaction::Transactional,
317    tree::{CompareAndSwapError, Tree},
318};
319
320use {
321    self::{
322        arc::Arc,
323        atomic_shim::{AtomicI64 as AtomicLsn, AtomicU64},
324        binary_search::binary_search_lub,
325        concurrency_control::Protector,
326        context::Context,
327        fastcmp::fastcmp,
328        histogram::Histogram,
329        lru::Lru,
330        meta::Meta,
331        metrics::{clock, Measure, M},
332        node::{Data, Node},
333        oneshot::{OneShot, OneShotFiller},
334        result::CasResult,
335        subscriber::Subscribers,
336        tree::TreeInner,
337    },
338    crossbeam_utils::{Backoff, CachePadded},
339    log::{debug, error, trace, warn},
340    pagecache::RecoveryGuard,
341    parking_lot::{Condvar, Mutex, RwLock},
342    std::{
343        collections::BTreeMap,
344        convert::TryFrom,
345        fmt::{self, Debug},
346        io::{Read, Write},
347        sync::atomic::{
348            AtomicUsize,
349            Ordering::{Acquire, Release, SeqCst},
350        },
351    },
352};
353
354#[doc(hidden)]
355pub fn pin() -> Guard {
356    Guard { inner: crossbeam_pin(), readset: vec![], writeset: vec![] }
357}
358
359#[doc(hidden)]
360pub struct Guard {
361    inner: CrossbeamGuard,
362    readset: Vec<PageId>,
363    writeset: Vec<PageId>,
364}
365
366impl std::ops::Deref for Guard {
367    type Target = CrossbeamGuard;
368
369    fn deref(&self) -> &CrossbeamGuard {
370        &self.inner
371    }
372}
373
374#[derive(Debug)]
375struct Conflict;
376
377type Conflictable<T> = std::result::Result<T, Conflict>;
378
379fn crc32(buf: &[u8]) -> u32 {
380    let mut hasher = crc32fast::Hasher::new();
381    hasher.update(buf);
382    hasher.finalize()
383}
384
385fn calculate_message_crc32(header: &[u8], body: &[u8]) -> u32 {
386    let mut hasher = crc32fast::Hasher::new();
387    hasher.update(body);
388    hasher.update(&header[4..]);
389    let crc32 = hasher.finalize();
390    crc32 ^ 0xFFFF_FFFF
391}
392
393#[cfg(any(test, feature = "lock_free_delays"))]
394mod debug_delay;
395
396#[cfg(any(test, feature = "lock_free_delays"))]
397use debug_delay::debug_delay;
398
399/// This function is useful for inducing random jitter into our atomic
400/// operations, shaking out more possible interleavings quickly. It gets
401/// fully eliminated by the compiler in non-test code.
402#[cfg(not(any(test, feature = "lock_free_delays")))]
403const fn debug_delay() {}
404
405/// Link denotes a tree node or its modification fragment such as
406/// key addition or removal.
407#[derive(Clone, Debug, PartialEq)]
408pub(crate) enum Link {
409    /// A new value is set for a given key
410    Set(IVec, IVec),
411    /// The associated value is removed for a given key
412    Del(IVec),
413    /// A child of this Index node is marked as mergable
414    ParentMergeIntention(PageId),
415    /// The merging child has been completely merged into its left sibling
416    ParentMergeConfirm,
417    /// A Node is marked for being merged into its left sibling
418    ChildMergeCap,
419}
420
421/// A fast map that is not resistant to collision attacks. Works
422/// on 8 bytes at a time.
423pub(crate) type FastMap8<K, V> = std::collections::HashMap<
424    K,
425    V,
426    std::hash::BuildHasherDefault<fxhash::FxHasher64>,
427>;
428
429/// A fast set that is not resistant to collision attacks. Works
430/// on 8 bytes at a time.
431pub(crate) type FastSet8<V> = std::collections::HashSet<
432    V,
433    std::hash::BuildHasherDefault<fxhash::FxHasher64>,
434>;
435
436/// A function that may be configured on a particular shared `Tree`
437/// that will be applied as a kind of read-modify-write operator
438/// to any values that are written using the `Tree::merge` method.
439///
440/// The first argument is the key. The second argument is the
441/// optional existing value that was in place before the
442/// merged value being applied. The Third argument is the
443/// data being merged into the item.
444///
445/// You may return `None` to delete the value completely.
446///
447/// Merge operators are shared by all instances of a particular
448/// `Tree`. Different merge operators may be set on different
449/// `Tree`s.
450///
451/// # Examples
452///
453/// ```
454/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
455/// use sled::{Config, IVec};
456///
457/// fn concatenate_merge(
458///   _key: &[u8],               // the key being merged
459///   old_value: Option<&[u8]>,  // the previous value, if one existed
460///   merged_bytes: &[u8]        // the new bytes being merged in
461/// ) -> Option<Vec<u8>> {       // set the new value, return None to delete
462///   let mut ret = old_value
463///     .map(|ov| ov.to_vec())
464///     .unwrap_or_else(|| vec![]);
465///
466///   ret.extend_from_slice(merged_bytes);
467///
468///   Some(ret)
469/// }
470///
471/// let config = Config::new()
472///   .temporary(true);
473///
474/// let tree = config.open()?;
475/// tree.set_merge_operator(concatenate_merge);
476///
477/// let k = b"k1";
478///
479/// tree.insert(k, vec![0]);
480/// tree.merge(k, vec![1]);
481/// tree.merge(k, vec![2]);
482/// assert_eq!(tree.get(k), Ok(Some(IVec::from(vec![0, 1, 2]))));
483///
484/// // Replace previously merged data. The merge function will not be called.
485/// tree.insert(k, vec![3]);
486/// assert_eq!(tree.get(k), Ok(Some(IVec::from(vec![3]))));
487///
488/// // Merges on non-present values will cause the merge function to be called
489/// // with `old_value == None`. If the merge function returns something (which it
490/// // does, in this case) a new value will be inserted.
491/// tree.remove(k);
492/// tree.merge(k, vec![4]);
493/// assert_eq!(tree.get(k), Ok(Some(IVec::from(vec![4]))));
494/// # Ok(()) }
495/// ```
496pub trait MergeOperator:
497    Fn(&[u8], Option<&[u8]>, &[u8]) -> Option<Vec<u8>>
498{
499}
500impl<F> MergeOperator for F where
501    F: Fn(&[u8], Option<&[u8]>, &[u8]) -> Option<Vec<u8>>
502{
503}
504
505mod compile_time_assertions {
506    use crate::*;
507
508    #[allow(unreachable_code)]
509    fn _assert_public_types_send_sync() {
510        _assert_send::<Subscriber>(unreachable!());
511
512        _assert_send_sync::<Iter>(unreachable!());
513        _assert_send_sync::<Tree>(unreachable!());
514        _assert_send_sync::<Db>(unreachable!());
515        _assert_send_sync::<Batch>(unreachable!());
516        _assert_send_sync::<IVec>(unreachable!());
517        _assert_send_sync::<Config>(unreachable!());
518        _assert_send_sync::<CompareAndSwapError>(unreachable!());
519        _assert_send_sync::<Error>(unreachable!());
520        _assert_send_sync::<Event>(unreachable!());
521        _assert_send_sync::<Mode>(unreachable!());
522    }
523
524    fn _assert_send<S: Send>(_: &S) {}
525
526    fn _assert_send_sync<S: Send + Sync>(_: &S) {}
527}