Skip to main content

smoldot/executor/
storage_diff.rs

1// Smoldot
2// Copyright (C) 2019-2021  Parity Technologies (UK) Ltd.
3// SPDX-License-Identifier: GPL-3.0-or-later WITH Classpath-exception-2.0
4
5// This program is free software: you can redistribute it and/or modify
6// it under the terms of the GNU General Public License as published by
7// the Free Software Foundation, either version 3 of the License, or
8// (at your option) any later version.
9
10// This program is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13// GNU General Public License for more details.
14
15// You should have received a copy of the GNU General Public License
16// along with this program.  If not, see <http://www.gnu.org/licenses/>.
17
18//! "Diff" between a trie and the next.
19//!
20//! Imagine two `HashMap<Vec<u8>, Vec<u8>>`s representing the content of a trie. This data
21//! structure contains the difference from one to the other.
22//!
23//! This data structure can be used in a variety of circumstances, such as storing the storage
24//! differences between a block and its child or storing on-going changes while a runtime call is
25//! being performed. It can also be used to store an entire trie, by representing a diff where
26//! the base is an empty trie.
27//!
28//! # About keys hashing
29//!
30//! This data structure internally uses a hash map. This hash map assumes that storage keys are
31//! already uniformly distributed and doesn't perform any additional hashing.
32//!
33//! You should be aware that a malicious runtime could perform hash collision attacks that
34//! considerably slow down this data structure.
35//!
36
37// TODO: is this module properly located?
38
39// TODO: more docs
40
41use alloc::{collections::BTreeMap, vec::Vec};
42use core::{borrow::Borrow, cmp, fmt, ops};
43use hashbrown::HashMap;
44
45#[derive(Clone)]
46pub struct TrieDiff<T = ()> {
47    /// Contains the same entries as [`TrieDiff::hashmap`], except that values are booleans
48    /// indicating whether the value updates (`true`) or deletes (`false`) the underlying
49    /// storage item.
50    btree: BTreeMap<Vec<u8>, bool>,
51
52    /// Actual diff. For each key, `Some` if the underlying storage item is updated by this diff,
53    /// and `None` if it is deleted.
54    ///
55    /// A FNV hasher is used because the runtime is supposed to guarantee a uniform distribution
56    /// of storage keys.
57    hashmap: HashMap<Vec<u8>, (Option<Vec<u8>>, T), fnv::FnvBuildHasher>,
58}
59
60impl<T> TrieDiff<T> {
61    /// Builds a new empty diff.
62    pub fn empty() -> Self {
63        Self {
64            btree: BTreeMap::default(),
65            // TODO: with_capacity?
66            hashmap: HashMap::with_capacity_and_hasher(0, Default::default()),
67        }
68    }
69
70    /// Removes all the entries within this diff.
71    pub fn clear(&mut self) {
72        self.hashmap.clear();
73        self.btree.clear();
74    }
75
76    /// Inserts the given key-value combination in the diff.
77    ///
78    /// Returns the value associated to this `key` that was previously in the diff, if any.
79    pub fn diff_insert(
80        &mut self,
81        key: impl Into<Vec<u8>>,
82        value: impl Into<Vec<u8>>,
83        user_data: T,
84    ) -> Option<(Option<Vec<u8>>, T)> {
85        let key = key.into();
86        // Note that we clone the key here. This is considered as a tolerable overhead.
87        let previous = self
88            .hashmap
89            .insert(key.clone(), (Some(value.into()), user_data));
90        match &previous {
91            Some((Some(_), _)) => {
92                // No need to update `btree`.
93                debug_assert_eq!(self.btree.get(&key), Some(&true));
94            }
95            None | Some((None, _)) => {
96                self.btree.insert(key, true);
97            }
98        }
99        previous
100    }
101
102    /// Inserts in the diff an entry at the given key that delete the value that is located in
103    /// the base storage.
104    ///
105    /// Returns the value associated to this `key` that was previously in the diff, if any.
106    pub fn diff_insert_erase(
107        &mut self,
108        key: impl Into<Vec<u8>>,
109        user_data: T,
110    ) -> Option<(Option<Vec<u8>>, T)> {
111        let key = key.into();
112        // Note that we clone the key here. This is considered as a tolerable overhead.
113        let previous = self.hashmap.insert(key.clone(), (None, user_data));
114        match &previous {
115            Some((None, _)) => {
116                // No need to update `btree`.
117                debug_assert_eq!(self.btree.get(&key), Some(&false));
118            }
119            None | Some((Some(_), _)) => {
120                self.btree.insert(key, false);
121            }
122        }
123        previous
124    }
125
126    /// Removes from the diff the entry corresponding to the given `key`.
127    ///
128    /// Returns the value associated to this `key` that was previously in the diff, if any.
129    pub fn diff_remove(&mut self, key: impl AsRef<[u8]>) -> Option<(Option<Vec<u8>>, T)> {
130        let previous = self.hashmap.remove(key.as_ref());
131        if let Some(_previous) = &previous {
132            let _in_btree = self.btree.remove(key.as_ref());
133            debug_assert_eq!(_in_btree, Some(_previous.0.is_some()));
134        }
135        previous
136    }
137
138    /// Returns the diff entry at the given key.
139    ///
140    /// Returns `None` if the diff doesn't have any entry for this key, and `Some((None, _))` if
141    /// the diff has an entry that deletes the storage item.
142    pub fn diff_get(&self, key: &[u8]) -> Option<(Option<&[u8]>, &T)> {
143        self.hashmap
144            .get(key)
145            .map(|(v, ud)| (v.as_ref().map(|v| &v[..]), ud))
146    }
147
148    /// Returns an iterator to all the entries in the diff.
149    ///
150    /// Each value is either `Some` if the diff overwrites this diff, or `None` if it erases the
151    /// underlying value.
152    pub fn diff_iter_unordered(
153        &self,
154    ) -> impl ExactSizeIterator<Item = (&[u8], Option<&[u8]>, &T)> + Clone {
155        self.hashmap
156            .iter()
157            .map(|(k, (v, ud))| (&k[..], v.as_ref().map(|v| &v[..]), ud))
158    }
159
160    /// Returns an iterator to all the entries in the diff.
161    ///
162    /// Each value is either `Some` if the diff overwrites this diff, or `None` if it erases the
163    /// underlying value.
164    pub fn diff_into_iter_unordered(
165        self,
166    ) -> impl ExactSizeIterator<Item = (Vec<u8>, Option<Vec<u8>>, T)> {
167        self.hashmap.into_iter().map(|(k, (v, ud))| (k, v, ud))
168    }
169
170    /// Returns an iterator to all the entries in the diff within a given range.
171    ///
172    /// Each iterator element is the given and a boolean. This boolean is `true` if the diff
173    /// overwrites this entry, or `false` if it erases the underlying value.
174    pub fn diff_range_ordered<U: ?Sized>(
175        &self,
176        range: impl ops::RangeBounds<U>,
177    ) -> impl Iterator<Item = (&[u8], bool)> + Clone
178    where
179        U: Ord,
180        Vec<u8>: Borrow<U>,
181    {
182        self.btree
183            .range(range)
184            .map(|(k, inserts)| (&k[..], *inserts))
185    }
186
187    /// Returns the storage key that immediately follows the provided `key`. Must be passed the
188    /// storage key that immediately follows the provided `key` according to the base storage this
189    /// diff is based upon.
190    ///
191    /// If [`StorageNextKey::Found`] is returned, it contains the desired key. If
192    /// [`StorageNextKey::NextOf`] is returned, then this function should be called again but by
193    /// passing the `key` found in the [`StorageNextKey::NextOf`] (and of course the corresponding
194    /// `in_parent_next_key`).
195    ///
196    /// # Panic
197    ///
198    /// Panics if `in_parent_next_key` is provided and is inferior or equal to `key`.
199    ///
200    pub fn storage_next_key<'a>(
201        &'a self,
202        key: &[u8],
203        in_parent_next_key: Option<&'a [u8]>,
204        or_equal: bool,
205    ) -> StorageNextKey<'a> {
206        if let Some(in_parent_next_key) = in_parent_next_key {
207            assert!(in_parent_next_key > key);
208        }
209
210        // Find the diff entry that immediately follows `key`.
211        let in_diff = self
212            .btree
213            .range::<[u8], _>((
214                if or_equal {
215                    ops::Bound::Included(key)
216                } else {
217                    ops::Bound::Excluded(key)
218                },
219                ops::Bound::Unbounded,
220            ))
221            .next();
222
223        match (in_parent_next_key, in_diff) {
224            (Some(a), Some((b, true))) if a <= &b[..] => StorageNextKey::Found(Some(a)),
225            (Some(a), Some((b, false))) if a < &b[..] => StorageNextKey::Found(Some(a)),
226            (Some(a), Some((b, false))) => {
227                debug_assert!(a >= &b[..]); // We actually expect `a == b` but let's be defensive.
228                debug_assert!(&b[..] > key || or_equal);
229
230                // The next key according to the parent storage has been erased in this diff. It
231                // is necessary to ask the user again, this time for the key after the one that
232                // has been erased.
233
234                // Note that there is probably something wrong here if `a != b`, but we ignore
235                // that here.
236
237                StorageNextKey::NextOf(b)
238            }
239            (Some(a), Some((b, true))) => {
240                debug_assert!(a >= &b[..]);
241                StorageNextKey::Found(Some(&b[..]))
242            }
243
244            (Some(a), None) => StorageNextKey::Found(Some(a)),
245            (None, Some((b, true))) => StorageNextKey::Found(Some(&b[..])),
246            (None, Some((b, false))) => {
247                debug_assert!(&b[..] > key || or_equal);
248                let found = self
249                    .btree
250                    .range::<[u8], _>((ops::Bound::Excluded(&b[..]), ops::Bound::Unbounded))
251                    .find(|(_, value)| **value)
252                    .map(|(key, _)| &key[..]);
253                StorageNextKey::Found(found)
254            }
255            (None, None) => StorageNextKey::Found(None),
256        }
257    }
258
259    /// Applies the given diff on top of the current one.
260    pub fn merge(&mut self, other: &TrieDiff<T>)
261    where
262        T: Clone,
263    {
264        self.merge_map(other, |v| v.clone())
265    }
266
267    /// Applies the given diff on top of the current one.
268    ///
269    /// Each user data in the other diff is first passed through the map.
270    pub fn merge_map<U>(&mut self, other: &TrieDiff<U>, mut map: impl FnMut(&U) -> T) {
271        // TODO: provide an alternative method that consumes `other` as well?
272        for (key, (value, user_data)) in &other.hashmap {
273            self.hashmap
274                .insert(key.clone(), (value.clone(), map(user_data)));
275            self.btree.insert(key.clone(), value.is_some());
276        }
277    }
278}
279
280impl<T> fmt::Debug for TrieDiff<T>
281where
282    T: fmt::Debug,
283{
284    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
285        // Delegate to `self.inner`
286        fmt::Debug::fmt(&self.hashmap, f)
287    }
288}
289
290// We implement `PartialEq` manually, because deriving it would mean that both the hash map and
291// the tree are compared.
292impl<T, U> cmp::PartialEq<TrieDiff<U>> for TrieDiff<T>
293where
294    T: cmp::PartialEq<U>,
295{
296    fn eq(&self, other: &TrieDiff<U>) -> bool {
297        // As of the writing of this code, HashMap<K, T> doesn't implement
298        // PartialEq<HashMap<K, U>> where T: PartialEq<U>, so we have to implement it manually. The
299        // way this is implemented matches what the underlying implementation of HashMap does.
300        if self.hashmap.len() != other.hashmap.len() {
301            return false;
302        }
303
304        self.hashmap.iter().all(|(key, (v1, u1))| {
305            other
306                .hashmap
307                .get(key)
308                .map_or(false, |(v2, u2)| *v1 == *v2 && *u1 == *u2)
309        })
310    }
311}
312
313impl<T> cmp::Eq for TrieDiff<T> where T: cmp::Eq {}
314
315impl<T> Default for TrieDiff<T> {
316    fn default() -> Self {
317        TrieDiff::empty()
318    }
319}
320
321impl<T> FromIterator<(Vec<u8>, Option<Vec<u8>>, T)> for TrieDiff<T> {
322    fn from_iter<I>(iter: I) -> Self
323    where
324        I: IntoIterator<Item = (Vec<u8>, Option<Vec<u8>>, T)>,
325    {
326        let hashmap = iter
327            .into_iter()
328            .map(|(k, v, ud)| (k, (v, ud)))
329            .collect::<HashMap<Vec<u8>, (Option<Vec<u8>>, T), fnv::FnvBuildHasher>>();
330        let btree = hashmap
331            .iter()
332            .map(|(k, (v, _))| (k.clone(), v.is_some()))
333            .collect();
334
335        Self { btree, hashmap }
336    }
337}
338
339pub enum StorageNextKey<'a> {
340    Found(Option<&'a [u8]>),
341    NextOf(&'a [u8]),
342}