value_log/
lib.rs

1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5//! Generic value log implementation for key-value separated storage.
6//!
7//! > This crate is intended as a building block for key-value separated storage.
8//! > You probably want to use <https://github.com/fjall-rs/fjall> instead.
9//!
10//! The value log's contents are split into segments, each segment holds a sorted
11//! list of key-value pairs:
12//!
13//! [k0, v0][k1, v1][k2, v2][k3, v3][k4, v4]
14//!
15//! The value log does not have an index - to efficiently retrieve an item, a
16//! [`ValueHandle`] needs to be retrieved from an [`IndexReader`]. Using the
17//! value handle then allows loading the value from the value log.
18//!
19//! Recently retrieved ("hot") items may be cached by an in-memory value cache to avoid
20//! repeated disk accesses.
21//!
22//! As data changes, old values will unnecessarily occupy disk space. As space amplification
23//! increases, stale data needs to be discarded by rewriting old segments (garbage collection).
24//! This process can happen on-line.
25//!
26//! Even though segments are internally sorted, which may help with range scans, data may not be stored
27//! contiguously, which hurts read performance of ranges. Point reads also require an extra level of
28//! indirection, as the value handle needs to be retrieved from the index. However, this index is generally
29//! small, so ideally it can be cached efficiently. And because compaction needs to rewrite less data, more
30//! disk I/O is freed to fulfill write and read requests.
31//!
32//! In summary, a value log trades read & space amplification for superior write
33//! amplification when storing large blobs.
34//!
35//! Use a value log, when:
36//! - you are storing large values (HTML pages, big JSON, small images, archiving, ...)
37//! - your data is rarely deleted or updated, or you do not have strict disk space requirements
38//! - your access pattern is point read heavy
39//!
40//! # Example usage
41//!
42//! ```
43//! # use value_log::{IndexReader, IndexWriter, MockIndex, MockIndexWriter};
44//! use value_log::{Config, ValueHandle, ValueLog};
45//!
46//! # fn main() -> value_log::Result<()> {
47//! # let folder = tempfile::tempdir()?;
48//! # let index = MockIndex::default();
49//! # let path = folder.path();
50//! #
51//! # #[derive(Clone, Default)]
52//! # struct MyCompressor;
53//! #
54//! # impl value_log::Compressor for MyCompressor {
55//! #    fn compress(&self, bytes: &[u8]) -> value_log::Result<Vec<u8>> {
56//! #        Ok(bytes.into())
57//! #    }
58//! #
59//! #    fn decompress(&self, bytes: &[u8]) -> value_log::Result<Vec<u8>> {
60//! #        Ok(bytes.into())
61//! #    }
62//! # }
63//! // Open or recover value log from disk
64//! let value_log = ValueLog::open(path, Config::<MyCompressor>::default())?;
65//!
66//! // Write some data
67//! # let mut index_writer = MockIndexWriter(index.clone());
68//! let mut writer = value_log.get_writer()?;
69//!
70//! for key in ["a", "b", "c", "d", "e"] {
71//!     let value = key.repeat(10_000);
72//!     let value = value.as_bytes();
73//!
74//!     let key = key.as_bytes();
75//!
76//!     let vhandle = writer.get_next_value_handle();
77//!     index_writer.insert_indirect(key, vhandle, value.len() as u32)?;
78//!
79//!     writer.write(key, value)?;
80//! }
81//!
82//! // Finish writing
83//! value_log.register_writer(writer)?;
84//!
85//! // Get some stats
86//! assert_eq!(1.0, value_log.space_amp());
87//! #
88//! # Ok(())
89//! # }
90//! ```
91
92#![doc(html_logo_url = "https://raw.githubusercontent.com/fjall-rs/value-log/main/logo.png")]
93#![doc(html_favicon_url = "https://raw.githubusercontent.com/fjall-rs/value-log/main/logo.png")]
94#![deny(clippy::all, missing_docs, clippy::cargo)]
95#![deny(clippy::unwrap_used)]
96#![deny(clippy::indexing_slicing)]
97#![warn(clippy::pedantic, clippy::nursery)]
98#![warn(clippy::expect_used)]
99#![allow(clippy::missing_const_for_fn)]
100#![warn(clippy::multiple_crate_versions)]
101// the bytes feature uses unsafe to improve from_reader performance; so we need to relax this lint
102#![cfg_attr(feature = "bytes", deny(unsafe_code))]
103#![cfg_attr(not(feature = "bytes"), forbid(unsafe_code))]
104
105mod blob_cache;
106mod coding;
107mod compression;
108mod config;
109mod error;
110mod gc;
111mod handle;
112mod id;
113mod index;
114mod key_range;
115mod manifest;
116mod mock;
117mod path;
118mod slice;
119
120#[doc(hidden)]
121pub mod scanner;
122
123mod segment;
124mod value;
125mod value_log;
126mod version;
127
128pub(crate) type HashMap<K, V> = std::collections::HashMap<K, V, xxhash_rust::xxh3::Xxh3Builder>;
129
130pub use {
131    blob_cache::BlobCache,
132    compression::Compressor,
133    config::Config,
134    error::{Error, Result},
135    gc::report::GcReport,
136    gc::{GcStrategy, SpaceAmpStrategy, StaleThresholdStrategy},
137    handle::ValueHandle,
138    index::{Reader as IndexReader, Writer as IndexWriter},
139    segment::multi_writer::MultiWriter as SegmentWriter,
140    slice::Slice,
141    value::{UserKey, UserValue},
142    value_log::ValueLog,
143    version::Version,
144};
145
146#[doc(hidden)]
147pub use segment::{reader::Reader as SegmentReader, Segment};
148
149#[doc(hidden)]
150pub use mock::{MockIndex, MockIndexWriter};
151
152#[doc(hidden)]
153pub use key_range::KeyRange;