Skip to main content

firewood_storage/
lib.rs

1// Copyright (C) 2023, Ava Labs, Inc. All rights reserved.
2// See the file LICENSE.md for licensing terms.
3
4#![warn(missing_debug_implementations, rust_2018_idioms, missing_docs)]
5#![deny(unsafe_code)]
6#![cfg_attr(
7    not(target_pointer_width = "64"),
8    forbid(
9        clippy::cast_possible_truncation,
10        reason = "non-64 bit target likely to cause issues during u64 to usize conversions"
11    )
12)]
13
14//! # storage implements the storage of a [Node] on top of a `LinearStore`
15//!
16//! Nodes are stored at a [`LinearAddress`] within a [`ReadableStorage`].
17//!
18//! The [`NodeStore`] maintains a free list and the [`LinearAddress`] of a root node.
19//!
20//! A [`NodeStore`] is backed by a [`ReadableStorage`] which is persisted storage.
21
22use std::fmt::{Display, Formatter, LowerHex, Result};
23use std::ops::Range;
24
25mod checker;
26mod hashednode;
27mod hashedshunt;
28mod hashers;
29mod hashtype;
30mod iter;
31mod linear;
32mod node;
33mod nodestore;
34mod path;
35mod root_store;
36#[cfg(any(test, feature = "test_utils"))]
37mod test_utils;
38mod tries;
39mod u4;
40
41/// Logger module for handling logging functionality
42pub mod logger;
43
44#[macro_use]
45/// Macros module for defining macros used in the storage module
46pub mod macros;
47
48/// Metrics registry for storage layer metrics
49pub mod registry;
50// re-export these so callers don't need to know where they are
51pub use checker::{CheckOpt, CheckerReport, DBStats, FreeListsStats, TrieStats};
52pub use hashednode::{Hashable, Preimage, ValueDigest, hash_node, hash_preimage};
53pub use hashedshunt::HashableShunt;
54pub use hashtype::{HashType, IntoHashType, InvalidTrieHashLength, TrieHash};
55pub use linear::{FileIoError, ReadableStorage, WritableStorage};
56pub use node::path::{NibblesIterator, Path};
57pub use node::{BranchNode, Child, Children, ChildrenSlots, LeafNode, Node, PathIterItem};
58pub use nodestore::{
59    AreaIndex, Committed, HashedNodeReader, ImmutableProposal, LinearAddress, Mutable, MutableKind,
60    NodeHashAlgorithm, NodeHashAlgorithmTryFromIntError, NodeReader, NodeStore, NodeStoreHeader,
61    Parentable, Propose, Recon, Reconstructed, ReconstructionSource, RootReader, TrieReader,
62};
63pub use path::{
64    ComponentIter, IntoSplitPath, JoinedPath, PackedBytes, PackedPathComponents, PackedPathRef,
65    PartialPath, PathBuf, PathCommonPrefix, PathComponent, PathComponentSliceExt, PathGuard,
66    SplitPath, TriePath, TriePathAsPackedBytes, TriePathFromPackedBytes, TriePathFromUnpackedBytes,
67};
68pub use tries::{
69    DuplicateKeyError, HashedKeyValueTrieRoot, HashedTrieNode, IterAscending, IterDescending,
70    KeyValueTrieRoot, TrieEdgeIter, TrieEdgeState, TrieNode, TrieValueIter,
71};
72pub use u4::{TryFromIntError, U4};
73
74pub use linear::filebacked::FileBacked;
75pub use linear::memory::MemStore;
76pub use node::persist::MaybePersistedNode;
77pub use root_store::RootStore;
78#[cfg(any(test, feature = "test_utils"))]
79pub use test_utils::SeededRng;
80#[cfg(any(test, feature = "test_utils"))]
81pub use test_utils::TestRecorder;
82
83/// A shared node, which is just a triophe Arc of a node
84pub type SharedNode = triomphe::Arc<Node>;
85
86/// A wrapper around `SharedNode` for use in memory-sized caches.
87/// This newtype allows implementing foreign traits while maintaining
88/// compatibility with the rest of the codebase that uses `SharedNode` directly.
89#[derive(Debug, Clone, PartialEq, Eq)]
90pub(crate) struct CachedNode(pub(crate) SharedNode);
91
92impl From<SharedNode> for CachedNode {
93    fn from(node: SharedNode) -> Self {
94        CachedNode(node)
95    }
96}
97
98impl From<CachedNode> for SharedNode {
99    fn from(cached: CachedNode) -> Self {
100        cached.0
101    }
102}
103
104impl std::ops::Deref for CachedNode {
105    type Target = Node;
106    fn deref(&self) -> &Self::Target {
107        &self.0
108    }
109}
110
111impl lru_mem::HeapSize for CachedNode {
112    fn heap_size(&self) -> usize {
113        // Arc overhead plus the inner Node's heap size
114        // Note: We count the full Node size since this is the primary reference
115        self.0.heap_size()
116    }
117}
118
119impl CachedNode {
120    /// Insert this node into the cache, ignoring any errors.
121    ///
122    /// The only error that can occur is `EntryTooLarge`, which happens when a single
123    /// node's size exceeds the cache's maximum capacity. This is extremely unlikely
124    /// (would require a node with hundreds of megabytes of data), and if it occurs,
125    /// the node simply won't be cached. Since the cache is secondary storage and the
126    /// node is already persisted to disk, the database remains fully functional - just
127    /// with cache misses for oversized nodes.
128    ///
129    /// Updates cache memory metrics after insertion.
130    pub(crate) fn insert_into_cache(
131        self,
132        cache: &mut lru_mem::LruCache<LinearAddress, Self>,
133        addr: LinearAddress,
134    ) {
135        let _ = cache.insert(addr, self);
136        Self::update_cache_metrics(cache);
137    }
138
139    /// Update cache memory usage metrics.
140    ///
141    /// This is called after cache operations to keep metrics in sync.
142    /// The `current_size()` and `max_size()` methods are simple field accesses,
143    /// so this is very cheap to call frequently.
144    #[expect(
145        clippy::cast_precision_loss,
146        reason = "Precision loss is acceptable for metrics; only affects values > 2^52 bytes"
147    )]
148    pub(crate) fn update_cache_metrics(cache: &lru_mem::LruCache<LinearAddress, Self>) {
149        use firewood_metrics::firewood_set;
150        firewood_set!(registry::CACHE_MEMORY_USED, cache.current_size() as f64);
151        firewood_set!(registry::CACHE_MEMORY_LIMIT, cache.max_size() as f64);
152    }
153}
154
155/// The strategy for caching nodes that are read
156/// from the storage layer. Generally, we only want to
157/// cache write operations, but for some read-heavy workloads
158/// you can enable caching of branch reads or all reads.
159#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
160#[non_exhaustive]
161pub enum CacheReadStrategy {
162    /// Only cache writes (no reads will be cached)
163    WritesOnly,
164
165    /// Cache branch reads (reads that are not leaf nodes)
166    BranchReads,
167
168    /// Cache all reads (leaves and branches)
169    All,
170}
171
172impl Display for CacheReadStrategy {
173    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
174        write!(f, "{self:?}")
175    }
176}
177
178/// This enum encapsulates what points to the stored area.
179#[derive(Debug, Clone, PartialEq, Eq, Copy)]
180pub enum StoredAreaParent {
181    /// The stored area is a trie node
182    TrieNode(TrieNodeParent),
183    /// The stored area is a free list
184    FreeList(FreeListParent),
185}
186
187/// This enum encapsulates what points to the stored area allocated for a trie node.
188#[derive(Debug, Clone, PartialEq, Eq, Copy)]
189pub enum TrieNodeParent {
190    /// The stored area is the root of the trie, so the header points to it
191    Root,
192    /// The stored area is not the root of the trie, so a parent trie node points to it
193    Parent(LinearAddress, PathComponent),
194}
195
196/// This enum encapsulates what points to the stored area allocated for a free list.
197#[derive(Debug, Clone, PartialEq, Eq, Copy)]
198pub enum FreeListParent {
199    /// The stored area is the head of the free list, so the header points to it
200    FreeListHead(AreaIndex),
201    /// The stored area is not the head of the free list, so a previous free area points to it
202    PrevFreeArea {
203        /// The size index of the area, helps with debugging and fixing
204        area_size_idx: AreaIndex,
205        /// The address of the previous free area
206        parent_addr: LinearAddress,
207    },
208}
209
210impl LowerHex for StoredAreaParent {
211    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
212        match self {
213            StoredAreaParent::TrieNode(trie_parent) => LowerHex::fmt(trie_parent, f),
214            StoredAreaParent::FreeList(free_list_parent) => LowerHex::fmt(free_list_parent, f),
215        }
216    }
217}
218
219impl LowerHex for TrieNodeParent {
220    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
221        match self {
222            TrieNodeParent::Root => f.write_str("Root"),
223            TrieNodeParent::Parent(addr, index) => {
224                f.write_str("TrieNode@")?;
225                LowerHex::fmt(addr, f)?;
226                f.write_fmt(format_args!("[{index}]"))
227            }
228        }
229    }
230}
231
232impl LowerHex for FreeListParent {
233    fn fmt(&self, f: &mut Formatter<'_>) -> Result {
234        match self {
235            FreeListParent::FreeListHead(index) => f.write_fmt(format_args!("FreeLists[{index}]")),
236            FreeListParent::PrevFreeArea {
237                area_size_idx,
238                parent_addr,
239            } => {
240                f.write_fmt(format_args!("FreeArea[{area_size_idx}]@"))?;
241                LowerHex::fmt(parent_addr, f)
242            }
243        }
244    }
245}
246
247use derive_where::derive_where;
248
249/// Errors returned by the checker
250#[derive(thiserror::Error, Debug)]
251#[derive_where(PartialEq, Eq)]
252#[non_exhaustive]
253pub enum CheckerError {
254    /// The file size is not valid
255    #[error("Invalid DB size ({db_size}): {description}")]
256    InvalidDBSize {
257        /// The size of the db
258        db_size: u64,
259        /// The description of the error
260        description: String,
261    },
262
263    /// Hash mismatch for a node
264    #[error(
265        "Hash mismatch for node {path:?} at address {address}: parent stored {parent_stored_hash}, computed {computed_hash}"
266    )]
267    HashMismatch {
268        /// The path of the node
269        path: Path,
270        /// The address of the node
271        address: LinearAddress,
272        /// The parent of the node
273        parent: TrieNodeParent,
274        /// The hash value stored in the parent node
275        parent_stored_hash: HashType,
276        /// The hash value computed for the node
277        computed_hash: HashType,
278    },
279
280    /// The address is out of bounds
281    #[error(
282        "stored area at {start:#x} with size {size} (parent: {parent:#x}) is out of bounds ({bounds:#x?})"
283    )]
284    AreaOutOfBounds {
285        /// Start of the `StoredArea`
286        start: LinearAddress,
287        /// Size of the `StoredArea`
288        size: u64,
289        /// Valid range of addresses
290        bounds: Range<LinearAddress>,
291        /// The parent of the `StoredArea`
292        parent: StoredAreaParent,
293    },
294
295    /// Stored areas intersect
296    #[error(
297        "stored area at {start:#x} with size {size} (parent: {parent:#x}) intersects with other stored areas: {intersection:#x?}"
298    )]
299    AreaIntersects {
300        /// Start of the `StoredArea`
301        start: LinearAddress,
302        /// Size of the `StoredArea`
303        size: u64,
304        /// The intersection
305        intersection: Vec<Range<LinearAddress>>,
306        /// The parent of the `StoredArea`
307        parent: StoredAreaParent,
308    },
309
310    /// Node is larger than the area it is stored in
311    #[error(
312        "stored area at {area_start:#x} with size {area_size} (parent: {parent:#x}) stores a node of size {node_bytes}"
313    )]
314    NodeLargerThanArea {
315        /// Address of the area
316        area_start: LinearAddress,
317        /// Size of the area
318        area_size: u64,
319        /// Size of the node
320        node_bytes: u64,
321        /// The parent of the area
322        parent: TrieNodeParent,
323    },
324
325    /// Freelist area size does not match
326    #[error(
327        "Free area {address:#x} of size {size} (parent: {parent:#x}) is found in free list {actual_free_list} but it should be in freelist {expected_free_list}"
328    )]
329    FreelistAreaSizeMismatch {
330        /// Address of the free area
331        address: LinearAddress,
332        /// Actual size of the free area
333        size: u64,
334        /// Free list on which the area is stored
335        actual_free_list: AreaIndex,
336        /// Expected size of the area
337        expected_free_list: AreaIndex,
338        /// The parent of the free area
339        parent: FreeListParent,
340    },
341
342    /// The start address of a stored area is not a multiple of 16
343    #[error(
344        "The start address of a stored area (parent: {parent:#x}) is not a multiple of {}: {address:#x}",
345        nodestore::primitives::AreaIndex::MIN_AREA_SIZE
346    )]
347    AreaMisaligned {
348        /// The start address of the stored area
349        address: LinearAddress,
350        /// The parent of the `StoredArea`
351        parent: StoredAreaParent,
352    },
353
354    /// Found leaked areas
355    #[error("Found leaked areas: {0}")]
356    #[derive_where(skip_inner)]
357    AreaLeaks(checker::LinearAddressRangeSet),
358
359    /// The root is not persisted
360    #[error("The checker can only check persisted nodestores")]
361    UnpersistedRoot,
362
363    #[error(
364        "The node {key:#x} at {address:#x} (parent: {parent:#x}) has a value but its path is not 32 or 64 bytes long"
365    )]
366    /// A value is found corresponding to an invalid key.
367    /// With ethhash, keys must be 32 or 64 bytes long.
368    /// Without ethhash, keys cannot contain half-bytes (i.e., odd number of nibbles).
369    InvalidKey {
370        /// The key found, or equivalently the path of the node that stores the value
371        key: Path,
372        /// Address of the node
373        address: LinearAddress,
374        /// Parent of the node
375        parent: TrieNodeParent,
376    },
377
378    /// IO error
379    #[error("IO error reading pointer stored at {parent:#x}: {error}")]
380    #[derive_where(skip_inner)]
381    IO {
382        /// The error
383        error: FileIoError,
384        /// parent of the area
385        parent: StoredAreaParent,
386    },
387}
388
389impl CheckerError {
390    const fn parent(&self) -> Option<StoredAreaParent> {
391        match self {
392            CheckerError::InvalidDBSize { .. }
393            | CheckerError::AreaLeaks(_)
394            | CheckerError::UnpersistedRoot => None,
395            CheckerError::AreaOutOfBounds { parent, .. }
396            | CheckerError::AreaIntersects { parent, .. }
397            | CheckerError::AreaMisaligned { parent, .. }
398            | CheckerError::IO { parent, .. } => Some(*parent),
399            CheckerError::HashMismatch { parent, .. }
400            | CheckerError::NodeLargerThanArea { parent, .. }
401            | CheckerError::InvalidKey { parent, .. } => Some(StoredAreaParent::TrieNode(*parent)),
402            CheckerError::FreelistAreaSizeMismatch { parent, .. } => {
403                Some(StoredAreaParent::FreeList(*parent))
404            }
405        }
406    }
407}
408
409impl From<CheckerError> for Vec<CheckerError> {
410    fn from(error: CheckerError) -> Self {
411        vec![error]
412    }
413}