fvm_ipld_hamt/
lib.rs

1// Copyright 2021-2023 Protocol Labs
2// Copyright 2019-2022 ChainSafe Systems
3// SPDX-License-Identifier: Apache-2.0, MIT
4
5//! HAMT crate for use as rust IPLD data structure
6//!
7//! [Data structure reference](https://github.com/ipld/specs/blob/51fab05b4fe4930d3d851d50cc1e5f1a02092deb/data-structures/hashmap.md)
8//!
9//! Implementation based off the work @dignifiedquire started [here](https://github.com/dignifiedquire/rust-hamt-ipld). This implementation matched the rust HashMap interface very closely, but came at the cost of saving excess values to the database and requiring unsafe code to update the cache from the underlying store as well as discarding any errors that came in any operations. The function signatures that exist are based on this, but refactored to match the spec more closely and match the necessary implementation.
10//!
11//! The Hamt is a data structure that mimmics a HashMap which has the features of being sharded, persisted, and indexable by a Cid. The Hamt supports a variable bit width to adjust the amount of possible pointers that can exist at each height of the tree. Hamt can be modified at any point, but the underlying values are only persisted to the store when the [flush](struct.Hamt.html#method.flush) is called.
12
13mod bitfield;
14mod error;
15mod hamt;
16mod hash;
17mod hash_algorithm;
18mod hash_bits;
19mod iter;
20mod node;
21mod pointer;
22
23pub use forest_hash_utils::{BytesKey, Hash};
24use serde::{Deserialize, Serialize};
25
26pub use self::error::Error;
27pub use self::hamt::{Hamt, Hamtv0};
28pub use self::hash_algorithm::*;
29pub use self::iter::{Iter, Iterv0};
30
31/// Default bit width for indexing a hash at each depth level
32#[deprecated]
33const DEFAULT_BIT_WIDTH: u32 = 8;
34
35/// Configuration options for a HAMT instance.
36#[derive(Debug, Clone)]
37pub struct Config {
38    /// The `bit_width` drives how wide and high the tree is going to be.
39    /// Each node in the tree will have `2^bit_width` number of slots for child nodes,
40    /// and consume `bit_width` number of bits from the hashed keys at each level.
41    pub bit_width: u32,
42
43    /// The minimum depth at which the HAMT can store key-value pairs in a `Node`.
44    ///
45    /// Storing values in the nodes means we have to read and write larger chunks of data
46    /// whenever we're accessing something (be it a link or values) in any other bucket.
47    /// This is particularly costly in the root node, which is always retrieved as soon
48    /// as the HAMT is instantiated.
49    ///
50    /// This setting allows us to keep the root, and possibly a few more levels, free of
51    /// data, reserved for links. A sufficiently saturated tree will tend to contain only
52    /// links in the first levels anyway, once all the buckets have been filled and pushed
53    /// further down.
54    ///
55    /// A value of 0 means data can be put in the root node, which is the default behaviour.
56    ///
57    /// The setting makes most sense when the size of values outweigh the size of the link
58    /// pointing at them. When storing small, hash-sized values, it might not matter.
59    pub min_data_depth: u32,
60
61    /// Maximum number of key-value pairs in a bucket before it's pushed down.
62    pub max_array_width: usize,
63}
64
65impl Default for Config {
66    fn default() -> Self {
67        Self {
68            #[allow(deprecated)]
69            bit_width: DEFAULT_BIT_WIDTH,
70            min_data_depth: 0,
71            max_array_width: 3,
72        }
73    }
74}
75
76type HashedKey = [u8; 32];
77
78#[derive(Debug, Serialize, Deserialize, PartialEq)]
79struct KeyValuePair<K, V>(K, V);
80
81impl<K, V> KeyValuePair<K, V> {
82    pub fn key(&self) -> &K {
83        &self.0
84    }
85    pub fn value(&self) -> &V {
86        &self.1
87    }
88}
89
90impl<K, V> KeyValuePair<K, V> {
91    pub fn new(key: K, value: V) -> Self {
92        KeyValuePair(key, value)
93    }
94}