value_log/lib.rs
1// Copyright (c) 2024-present, fjall-rs
2// This source code is licensed under both the Apache 2.0 and MIT License
3// (found in the LICENSE-* files in the repository)
4
5//! Generic value log implementation for key-value separated storage.
6//!
7//! > This crate is intended as a building block for key-value separated storage.
8//! > You probably want to use <https://github.com/fjall-rs/fjall> instead.
9//!
10//! The value log's contents are split into segments, each segment holds a sorted
11//! list of key-value pairs:
12//!
13//! [k0, v0][k1, v1][k2, v2][k3, v3][k4, v4]
14//!
15//! The value log does not have an index - to efficiently retrieve an item, a
16//! [`ValueHandle`] needs to be retrieved from an [`IndexReader`]. Using the
17//! value handle then allows loading the value from the value log.
18//!
19//! Recently retrieved ("hot") items may be cached by an in-memory value cache to avoid
20//! repeated disk accesses.
21//!
22//! As data changes, old values will unnecessarily occupy disk space. As space amplification
23//! increases, stale data needs to be discarded by rewriting old segments (garbage collection).
24//! This process can happen on-line.
25//!
26//! Even though segments are internally sorted, which may help with range scans, data may not be stored
27//! contiguously, which hurts read performance of ranges. Point reads also require an extra level of
28//! indirection, as the value handle needs to be retrieved from the index. However, this index is generally
29//! small, so ideally it can be cached efficiently. And because compaction needs to rewrite less data, more
30//! disk I/O is freed to fulfill write and read requests.
31//!
32//! In summary, a value log trades read & space amplification for superior write
33//! amplification when storing large blobs.
34//!
35//! Use a value log, when:
36//! - you are storing large values (HTML pages, big JSON, small images, archiving, ...)
37//! - your data is rarely deleted or updated, or you do not have strict disk space requirements
38//! - your access pattern is point read heavy
39
40#![doc(html_logo_url = "https://raw.githubusercontent.com/fjall-rs/value-log/main/logo.png")]
41#![doc(html_favicon_url = "https://raw.githubusercontent.com/fjall-rs/value-log/main/logo.png")]
42#![deny(clippy::all, missing_docs, clippy::cargo)]
43#![deny(clippy::unwrap_used)]
44#![deny(clippy::indexing_slicing)]
45#![warn(clippy::pedantic, clippy::nursery)]
46#![warn(clippy::expect_used)]
47#![allow(clippy::missing_const_for_fn)]
48#![warn(clippy::multiple_crate_versions)]
49// the bytes feature uses unsafe to improve from_reader performance; so we need to relax this lint
50#![cfg_attr(feature = "bytes", deny(unsafe_code))]
51#![cfg_attr(not(feature = "bytes"), forbid(unsafe_code))]
52
53mod blob_cache;
54
55#[doc(hidden)]
56pub mod coding;
57
58mod compression;
59mod config;
60mod error;
61mod gc;
62mod handle;
63mod id;
64mod index;
65mod key_range;
66mod manifest;
67mod path;
68mod slice;
69
70#[doc(hidden)]
71pub mod scanner;
72
73mod segment;
74mod value;
75mod value_log;
76mod version;
77
78pub(crate) type HashMap<K, V> = std::collections::HashMap<K, V, xxhash_rust::xxh3::Xxh3Builder>;
79
80pub use {
81 blob_cache::BlobCache,
82 compression::Compressor,
83 config::Config,
84 error::{Error, Result},
85 gc::report::GcReport,
86 gc::{GcStrategy, SpaceAmpStrategy, StaleThresholdStrategy},
87 handle::ValueHandle,
88 index::{Reader as IndexReader, Writer as IndexWriter},
89 segment::multi_writer::MultiWriter as SegmentWriter,
90 slice::Slice,
91 value::{UserKey, UserValue},
92 value_log::{ValueLog, ValueLogId},
93 version::Version,
94};
95
96#[doc(hidden)]
97pub use segment::{reader::Reader as SegmentReader, Segment};
98
99#[doc(hidden)]
100pub use key_range::KeyRange;