smoldot/executor/storage_diff.rs
1// Smoldot
2// Copyright (C) 2019-2021 Parity Technologies (UK) Ltd.
3// SPDX-License-Identifier: GPL-3.0-or-later WITH Classpath-exception-2.0
4
5// This program is free software: you can redistribute it and/or modify
6// it under the terms of the GNU General Public License as published by
7// the Free Software Foundation, either version 3 of the License, or
8// (at your option) any later version.
9
10// This program is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13// GNU General Public License for more details.
14
15// You should have received a copy of the GNU General Public License
16// along with this program. If not, see <http://www.gnu.org/licenses/>.
17
18//! "Diff" between a trie and the next.
19//!
20//! Imagine two `HashMap<Vec<u8>, Vec<u8>>`s representing the content of a trie. This data
21//! structure contains the difference from one to the other.
22//!
23//! This data structure can be used in a variety of circumstances, such as storing the storage
24//! differences between a block and its child or storing on-going changes while a runtime call is
25//! being performed. It can also be used to store an entire trie, by representing a diff where
26//! the base is an empty trie.
27//!
28//! # About keys hashing
29//!
30//! This data structure internally uses a hash map. This hash map assumes that storage keys are
31//! already uniformly distributed and doesn't perform any additional hashing.
32//!
33//! You should be aware that a malicious runtime could perform hash collision attacks that
34//! considerably slow down this data structure.
35//!
36
37// TODO: is this module properly located?
38
39// TODO: more docs
40
41use alloc::{collections::BTreeMap, vec::Vec};
42use core::{borrow::Borrow, cmp, fmt, ops};
43use hashbrown::HashMap;
44
45#[derive(Clone)]
46pub struct TrieDiff<T = ()> {
47 /// Contains the same entries as [`TrieDiff::hashmap`], except that values are booleans
48 /// indicating whether the value updates (`true`) or deletes (`false`) the underlying
49 /// storage item.
50 btree: BTreeMap<Vec<u8>, bool>,
51
52 /// Actual diff. For each key, `Some` if the underlying storage item is updated by this diff,
53 /// and `None` if it is deleted.
54 ///
55 /// A FNV hasher is used because the runtime is supposed to guarantee a uniform distribution
56 /// of storage keys.
57 hashmap: HashMap<Vec<u8>, (Option<Vec<u8>>, T), fnv::FnvBuildHasher>,
58}
59
60impl<T> TrieDiff<T> {
61 /// Builds a new empty diff.
62 pub fn empty() -> Self {
63 Self {
64 btree: BTreeMap::default(),
65 // TODO: with_capacity?
66 hashmap: HashMap::with_capacity_and_hasher(0, Default::default()),
67 }
68 }
69
70 /// Removes all the entries within this diff.
71 pub fn clear(&mut self) {
72 self.hashmap.clear();
73 self.btree.clear();
74 }
75
76 /// Inserts the given key-value combination in the diff.
77 ///
78 /// Returns the value associated to this `key` that was previously in the diff, if any.
79 pub fn diff_insert(
80 &mut self,
81 key: impl Into<Vec<u8>>,
82 value: impl Into<Vec<u8>>,
83 user_data: T,
84 ) -> Option<(Option<Vec<u8>>, T)> {
85 let key = key.into();
86 // Note that we clone the key here. This is considered as a tolerable overhead.
87 let previous = self
88 .hashmap
89 .insert(key.clone(), (Some(value.into()), user_data));
90 match &previous {
91 Some((Some(_), _)) => {
92 // No need to update `btree`.
93 debug_assert_eq!(self.btree.get(&key), Some(&true));
94 }
95 None | Some((None, _)) => {
96 self.btree.insert(key, true);
97 }
98 }
99 previous
100 }
101
102 /// Inserts in the diff an entry at the given key that delete the value that is located in
103 /// the base storage.
104 ///
105 /// Returns the value associated to this `key` that was previously in the diff, if any.
106 pub fn diff_insert_erase(
107 &mut self,
108 key: impl Into<Vec<u8>>,
109 user_data: T,
110 ) -> Option<(Option<Vec<u8>>, T)> {
111 let key = key.into();
112 // Note that we clone the key here. This is considered as a tolerable overhead.
113 let previous = self.hashmap.insert(key.clone(), (None, user_data));
114 match &previous {
115 Some((None, _)) => {
116 // No need to update `btree`.
117 debug_assert_eq!(self.btree.get(&key), Some(&false));
118 }
119 None | Some((Some(_), _)) => {
120 self.btree.insert(key, false);
121 }
122 }
123 previous
124 }
125
126 /// Removes from the diff the entry corresponding to the given `key`.
127 ///
128 /// Returns the value associated to this `key` that was previously in the diff, if any.
129 pub fn diff_remove(&mut self, key: impl AsRef<[u8]>) -> Option<(Option<Vec<u8>>, T)> {
130 let previous = self.hashmap.remove(key.as_ref());
131 if let Some(_previous) = &previous {
132 let _in_btree = self.btree.remove(key.as_ref());
133 debug_assert_eq!(_in_btree, Some(_previous.0.is_some()));
134 }
135 previous
136 }
137
138 /// Returns the diff entry at the given key.
139 ///
140 /// Returns `None` if the diff doesn't have any entry for this key, and `Some((None, _))` if
141 /// the diff has an entry that deletes the storage item.
142 pub fn diff_get(&self, key: &[u8]) -> Option<(Option<&[u8]>, &T)> {
143 self.hashmap
144 .get(key)
145 .map(|(v, ud)| (v.as_ref().map(|v| &v[..]), ud))
146 }
147
148 /// Returns an iterator to all the entries in the diff.
149 ///
150 /// Each value is either `Some` if the diff overwrites this diff, or `None` if it erases the
151 /// underlying value.
152 pub fn diff_iter_unordered(
153 &self,
154 ) -> impl ExactSizeIterator<Item = (&[u8], Option<&[u8]>, &T)> + Clone {
155 self.hashmap
156 .iter()
157 .map(|(k, (v, ud))| (&k[..], v.as_ref().map(|v| &v[..]), ud))
158 }
159
160 /// Returns an iterator to all the entries in the diff.
161 ///
162 /// Each value is either `Some` if the diff overwrites this diff, or `None` if it erases the
163 /// underlying value.
164 pub fn diff_into_iter_unordered(
165 self,
166 ) -> impl ExactSizeIterator<Item = (Vec<u8>, Option<Vec<u8>>, T)> {
167 self.hashmap.into_iter().map(|(k, (v, ud))| (k, v, ud))
168 }
169
170 /// Returns an iterator to all the entries in the diff within a given range.
171 ///
172 /// Each iterator element is the given and a boolean. This boolean is `true` if the diff
173 /// overwrites this entry, or `false` if it erases the underlying value.
174 pub fn diff_range_ordered<U: ?Sized>(
175 &self,
176 range: impl ops::RangeBounds<U>,
177 ) -> impl Iterator<Item = (&[u8], bool)> + Clone
178 where
179 U: Ord,
180 Vec<u8>: Borrow<U>,
181 {
182 self.btree
183 .range(range)
184 .map(|(k, inserts)| (&k[..], *inserts))
185 }
186
187 /// Returns the storage key that immediately follows the provided `key`. Must be passed the
188 /// storage key that immediately follows the provided `key` according to the base storage this
189 /// diff is based upon.
190 ///
191 /// If [`StorageNextKey::Found`] is returned, it contains the desired key. If
192 /// [`StorageNextKey::NextOf`] is returned, then this function should be called again but by
193 /// passing the `key` found in the [`StorageNextKey::NextOf`] (and of course the corresponding
194 /// `in_parent_next_key`).
195 ///
196 /// # Panic
197 ///
198 /// Panics if `in_parent_next_key` is provided and is inferior or equal to `key`.
199 ///
200 pub fn storage_next_key<'a>(
201 &'a self,
202 key: &[u8],
203 in_parent_next_key: Option<&'a [u8]>,
204 or_equal: bool,
205 ) -> StorageNextKey<'a> {
206 if let Some(in_parent_next_key) = in_parent_next_key {
207 assert!(in_parent_next_key > key);
208 }
209
210 // Find the diff entry that immediately follows `key`.
211 let in_diff = self
212 .btree
213 .range::<[u8], _>((
214 if or_equal {
215 ops::Bound::Included(key)
216 } else {
217 ops::Bound::Excluded(key)
218 },
219 ops::Bound::Unbounded,
220 ))
221 .next();
222
223 match (in_parent_next_key, in_diff) {
224 (Some(a), Some((b, true))) if a <= &b[..] => StorageNextKey::Found(Some(a)),
225 (Some(a), Some((b, false))) if a < &b[..] => StorageNextKey::Found(Some(a)),
226 (Some(a), Some((b, false))) => {
227 debug_assert!(a >= &b[..]); // We actually expect `a == b` but let's be defensive.
228 debug_assert!(&b[..] > key || or_equal);
229
230 // The next key according to the parent storage has been erased in this diff. It
231 // is necessary to ask the user again, this time for the key after the one that
232 // has been erased.
233
234 // Note that there is probably something wrong here if `a != b`, but we ignore
235 // that here.
236
237 StorageNextKey::NextOf(b)
238 }
239 (Some(a), Some((b, true))) => {
240 debug_assert!(a >= &b[..]);
241 StorageNextKey::Found(Some(&b[..]))
242 }
243
244 (Some(a), None) => StorageNextKey::Found(Some(a)),
245 (None, Some((b, true))) => StorageNextKey::Found(Some(&b[..])),
246 (None, Some((b, false))) => {
247 debug_assert!(&b[..] > key || or_equal);
248 let found = self
249 .btree
250 .range::<[u8], _>((ops::Bound::Excluded(&b[..]), ops::Bound::Unbounded))
251 .find(|(_, value)| **value)
252 .map(|(key, _)| &key[..]);
253 StorageNextKey::Found(found)
254 }
255 (None, None) => StorageNextKey::Found(None),
256 }
257 }
258
259 /// Applies the given diff on top of the current one.
260 pub fn merge(&mut self, other: &TrieDiff<T>)
261 where
262 T: Clone,
263 {
264 self.merge_map(other, |v| v.clone())
265 }
266
267 /// Applies the given diff on top of the current one.
268 ///
269 /// Each user data in the other diff is first passed through the map.
270 pub fn merge_map<U>(&mut self, other: &TrieDiff<U>, mut map: impl FnMut(&U) -> T) {
271 // TODO: provide an alternative method that consumes `other` as well?
272 for (key, (value, user_data)) in &other.hashmap {
273 self.hashmap
274 .insert(key.clone(), (value.clone(), map(user_data)));
275 self.btree.insert(key.clone(), value.is_some());
276 }
277 }
278}
279
280impl<T> fmt::Debug for TrieDiff<T>
281where
282 T: fmt::Debug,
283{
284 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
285 // Delegate to `self.inner`
286 fmt::Debug::fmt(&self.hashmap, f)
287 }
288}
289
290// We implement `PartialEq` manually, because deriving it would mean that both the hash map and
291// the tree are compared.
292impl<T, U> cmp::PartialEq<TrieDiff<U>> for TrieDiff<T>
293where
294 T: cmp::PartialEq<U>,
295{
296 fn eq(&self, other: &TrieDiff<U>) -> bool {
297 // As of the writing of this code, HashMap<K, T> doesn't implement
298 // PartialEq<HashMap<K, U>> where T: PartialEq<U>, so we have to implement it manually. The
299 // way this is implemented matches what the underlying implementation of HashMap does.
300 if self.hashmap.len() != other.hashmap.len() {
301 return false;
302 }
303
304 self.hashmap.iter().all(|(key, (v1, u1))| {
305 other
306 .hashmap
307 .get(key)
308 .map_or(false, |(v2, u2)| *v1 == *v2 && *u1 == *u2)
309 })
310 }
311}
312
313impl<T> cmp::Eq for TrieDiff<T> where T: cmp::Eq {}
314
315impl<T> Default for TrieDiff<T> {
316 fn default() -> Self {
317 TrieDiff::empty()
318 }
319}
320
321impl<T> FromIterator<(Vec<u8>, Option<Vec<u8>>, T)> for TrieDiff<T> {
322 fn from_iter<I>(iter: I) -> Self
323 where
324 I: IntoIterator<Item = (Vec<u8>, Option<Vec<u8>>, T)>,
325 {
326 let hashmap = iter
327 .into_iter()
328 .map(|(k, v, ud)| (k, (v, ud)))
329 .collect::<HashMap<Vec<u8>, (Option<Vec<u8>>, T), fnv::FnvBuildHasher>>();
330 let btree = hashmap
331 .iter()
332 .map(|(k, (v, _))| (k.clone(), v.is_some()))
333 .collect();
334
335 Self { btree, hashmap }
336 }
337}
338
339pub enum StorageNextKey<'a> {
340 Found(Option<&'a [u8]>),
341 NextOf(&'a [u8]),
342}