value_log/
lib.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
// Copyright (c) 2024-present, fjall-rs
// This source code is licensed under both the Apache 2.0 and MIT License
// (found in the LICENSE-* files in the repository)

//! Generic value log implementation for key-value separated storage.
//!
//! > This crate is intended as a building block for key-value separated storage.
//! > You probably want to use <https://github.com/fjall-rs/fjall> instead.
//!
//! The value log's contents are split into segments, each segment holds a sorted
//! list of key-value pairs:
//!
//! [k0, v0][k1, v1][k2, v2][k3, v3][k4, v4]
//!
//! The value log does not have an index - to efficiently retrieve an item, a
//! [`ValueHandle`] needs to be retrieved from an [`IndexReader`]. Using the
//! value handle then allows loading the value from the value log.
//!
//! Recently retrieved ("hot") items may be cached by an in-memory value cache to avoid
//! repeated disk accesses.
//!
//! As data changes, old values will unnecessarily occupy disk space. As space amplification
//! increases, stale data needs to be discarded by rewriting old segments (garbage collection).
//! This process can happen on-line.
//!
//! Even though segments are internally sorted, which may help with range scans, data may not be stored
//! contiguously, which hurts read performance of ranges. Point reads also require an extra level of
//! indirection, as the value handle needs to be retrieved from the index. However, this index is generally
//! small, so ideally it can be cached efficiently. And because compaction needs to rewrite less data, more
//! disk I/O is freed to fulfill write and read requests.
//!
//! In summary, a value log trades read & space amplification for superior write
//! amplification when storing large blobs.
//!
//! Use a value log, when:
//! - you are storing large values (HTML pages, big JSON, small images, archiving, ...)
//! - your data is rarely deleted or updated, or you do not have strict disk space requirements
//! - your access pattern is point read heavy
//!
//! # Example usage
//!
//! ```
//! # use value_log::{IndexReader, IndexWriter, MockIndex, MockIndexWriter};
//! use value_log::{Config, ValueHandle, ValueLog};
//!
//! # fn main() -> value_log::Result<()> {
//! # let folder = tempfile::tempdir()?;
//! # let index = MockIndex::default();
//! # let path = folder.path();
//! #
//! # #[derive(Clone, Default)]
//! # struct MyCompressor;
//! #
//! # impl value_log::Compressor for MyCompressor {
//! #    fn compress(&self, bytes: &[u8]) -> value_log::Result<Vec<u8>> {
//! #        Ok(bytes.into())
//! #    }
//! #
//! #    fn decompress(&self, bytes: &[u8]) -> value_log::Result<Vec<u8>> {
//! #        Ok(bytes.into())
//! #    }
//! # }
//! // Open or recover value log from disk
//! let value_log = ValueLog::open(path, Config::<MyCompressor>::default())?;
//!
//! // Write some data
//! # let mut index_writer = MockIndexWriter(index.clone());
//! let mut writer = value_log.get_writer()?;
//!
//! for key in ["a", "b", "c", "d", "e"] {
//!     let value = key.repeat(10_000);
//!     let value = value.as_bytes();
//!
//!     let key = key.as_bytes();
//!
//!     let vhandle = writer.get_next_value_handle();
//!     index_writer.insert_indirect(key, vhandle, value.len() as u32)?;
//!
//!     writer.write(key, value)?;
//! }
//!
//! // Finish writing
//! value_log.register_writer(writer)?;
//!
//! // Get some stats
//! assert_eq!(1.0, value_log.space_amp());
//! #
//! # Ok(())
//! # }
//! ```

#![doc(html_logo_url = "https://raw.githubusercontent.com/fjall-rs/value-log/main/logo.png")]
#![doc(html_favicon_url = "https://raw.githubusercontent.com/fjall-rs/value-log/main/logo.png")]
#![forbid(unsafe_code)]
#![deny(clippy::all, missing_docs, clippy::cargo)]
#![deny(clippy::unwrap_used)]
#![deny(clippy::indexing_slicing)]
#![warn(clippy::pedantic, clippy::nursery)]
#![warn(clippy::expect_used)]
#![allow(clippy::missing_const_for_fn)]
#![warn(clippy::multiple_crate_versions)]

mod blob_cache;
mod coding;
mod compression;
mod config;
mod error;
mod gc;
mod handle;
mod id;
mod index;
mod key_range;
mod manifest;
mod mock;
mod path;
mod slice;

#[doc(hidden)]
pub mod scanner;

mod segment;
mod value;
mod value_log;
mod version;

pub(crate) type HashMap<K, V> = std::collections::HashMap<K, V, xxhash_rust::xxh3::Xxh3Builder>;

pub use {
    blob_cache::BlobCache,
    compression::Compressor,
    config::Config,
    error::{Error, Result},
    gc::report::GcReport,
    gc::{GcStrategy, SpaceAmpStrategy, StaleThresholdStrategy},
    handle::ValueHandle,
    index::{Reader as IndexReader, Writer as IndexWriter},
    segment::multi_writer::MultiWriter as SegmentWriter,
    slice::Slice,
    value::{UserKey, UserValue},
    value_log::ValueLog,
    version::Version,
};

#[doc(hidden)]
pub use segment::{reader::Reader as SegmentReader, Segment};

#[doc(hidden)]
pub use mock::{MockIndex, MockIndexWriter};

#[doc(hidden)]
pub use key_range::KeyRange;