tetsy_trie_db/
iter_build.rs

1// Copyright 2017, 2020 Parity Technologies
2//
3// Licensed under the Apache License, Version .0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Alternative tools for working with key value ordered iterator without recursion.
16//! This is iterative implementation of `tetsy_trie_root` algorithm, using `NodeCodec`
17//! implementation.
18//! See `trie_visit` function.
19
20use tetsy_hash_db::{Hasher, HashDB, Prefix};
21use crate::rstd::{cmp::max, marker::PhantomData, vec::Vec};
22use crate::triedbmut::{ChildReference};
23use crate::nibble::NibbleSlice;
24use crate::nibble::nibble_ops;
25use crate::node_codec::NodeCodec;
26use crate::{TrieLayout, TrieHash};
27
28macro_rules! exponential_out {
29	(@3, [$($inpp:expr),*]) => { exponential_out!(@2, [$($inpp,)* $($inpp),*]) };
30	(@2, [$($inpp:expr),*]) => { exponential_out!(@1, [$($inpp,)* $($inpp),*]) };
31	(@1, [$($inpp:expr),*]) => { [$($inpp,)* $($inpp),*] };
32}
33
34type CacheNode<HO> = Option<ChildReference<HO>>;
35
36#[inline(always)]
37fn new_vec_slice_buffer<HO>() -> [CacheNode<HO>; 16] {
38	exponential_out!(@3, [None, None])
39}
40
41type ArrayNode<T> = [CacheNode<TrieHash<T>>; 16];
42
43/// Struct containing iteration cache, can be at most the length of the lowest nibble.
44///
45/// Note that it is not memory optimal (all depth are allocated even if some are empty due
46/// to node partial).
47/// Three field are used, a cache over the children, an optional associated value and the depth.
48struct CacheAccum<T: TrieLayout, V> (Vec<(ArrayNode<T>, Option<V>, usize)>, PhantomData<T>);
49
50/// Initially allocated cache depth.
51const INITIAL_DEPTH: usize = 10;
52
53impl<T, V> CacheAccum<T, V>
54	where
55		T: TrieLayout,
56		V: AsRef<[u8]>,
57{
58
59	fn new() -> Self {
60		let v = Vec::with_capacity(INITIAL_DEPTH);
61		CacheAccum(v, PhantomData)
62	}
63
64	#[inline(always)]
65	fn set_cache_value(&mut self, depth:usize, value: Option<V>) {
66		if self.0.is_empty() || self.0[self.0.len() - 1].2 < depth {
67			self.0.push((new_vec_slice_buffer(), None, depth));
68		}
69		let last = self.0.len() - 1;
70		debug_assert!(self.0[last].2 <= depth);
71		self.0[last].1 = value;
72	}
73
74	#[inline(always)]
75	fn set_node(&mut self, depth: usize, nibble_index: usize, node: CacheNode<TrieHash<T>>) {
76		if self.0.is_empty() || self.0[self.0.len() - 1].2 < depth {
77			self.0.push((new_vec_slice_buffer(), None, depth));
78		}
79
80		let last = self.0.len() - 1;
81		debug_assert!(self.0[last].2 == depth);
82
83		self.0[last].0.as_mut()[nibble_index] = node;
84	}
85
86	#[inline(always)]
87	fn last_depth(&self) -> usize {
88		let ix = self.0.len();
89		if ix > 0 {
90			let last = ix - 1;
91			self.0[last].2
92		} else {
93			0
94		}
95	}
96
97	#[inline(always)]
98	fn last_last_depth(&self) -> usize {
99		let ix = self.0.len();
100		if ix > 1 {
101			let last = ix - 2;
102			self.0[last].2
103		} else {
104			0
105		}
106	}
107
108	#[inline(always)]
109	fn is_empty(&self) -> bool {
110		self.0.is_empty()
111	}
112	#[inline(always)]
113	fn is_one(&self) -> bool {
114		self.0.len() == 1
115	}
116
117	#[inline(always)]
118	fn reset_depth(&mut self, depth: usize) {
119		debug_assert!(self.0[self.0.len() - 1].2 == depth);
120		self.0.pop();
121	}
122
123	fn flush_value (
124		&mut self,
125		callback: &mut impl ProcessEncodedNode<TrieHash<T>>,
126		target_depth: usize,
127		(k2, v2): &(impl AsRef<[u8]>, impl AsRef<[u8]>),
128	) {
129		let nibble_value = nibble_ops::left_nibble_at(&k2.as_ref()[..], target_depth);
130		// is it a branch value (two candidate same ix)
131		let nkey = NibbleSlice::new_offset(&k2.as_ref()[..], target_depth + 1);
132		let encoded = T::Codec::leaf_node(nkey.right(), &v2.as_ref()[..]);
133		let pr = NibbleSlice::new_offset(
134			&k2.as_ref()[..],
135			k2.as_ref().len() * nibble_ops::NIBBLE_PER_BYTE - nkey.len(),
136		);
137		let hash = callback.process(pr.left(), encoded, false);
138
139		// insert hash in branch (first level branch only at this point)
140		self.set_node(target_depth, nibble_value as usize, Some(hash));
141	}
142
143	fn flush_branch(
144		&mut self,
145		no_extension: bool,
146		callback: &mut impl ProcessEncodedNode<TrieHash<T>>,
147		ref_branch: impl AsRef<[u8]> + Ord,
148		new_depth: usize,
149		is_last: bool,
150	) {
151
152		while self.last_depth() > new_depth || is_last && !self.is_empty() {
153
154			let lix = self.last_depth();
155			let llix = max(self.last_last_depth(), new_depth);
156
157			let (offset, slice_size, is_root) =
158				if llix == 0 && is_last && self.is_one() {
159				// branch root
160				(llix, lix - llix, true)
161			} else {
162				(llix + 1, lix - llix - 1, false)
163			};
164			let nkey = if slice_size > 0 {
165				Some((offset, slice_size))
166			} else {
167				None
168			};
169
170			let h = if no_extension {
171				// encode branch
172				self.no_extension(&ref_branch.as_ref()[..], callback, lix, is_root, nkey)
173			} else {
174				self.standard_extension(&ref_branch.as_ref()[..], callback, lix, is_root, nkey)
175			};
176			if !is_root {
177				// put hash in parent
178				let nibble: u8 = nibble_ops::left_nibble_at(&ref_branch.as_ref()[..], llix);
179				self.set_node(llix, nibble as usize, Some(h));
180			}
181		}
182	}
183
184	#[inline(always)]
185	fn standard_extension(
186		&mut self,
187		key_branch: &[u8],
188		callback: &mut impl ProcessEncodedNode<TrieHash<T>>,
189		branch_d: usize,
190		is_root: bool,
191		nkey: Option<(usize, usize)>,
192	) -> ChildReference<TrieHash<T>> {
193		let last = self.0.len() - 1;
194		assert_eq!(self.0[last].2, branch_d);
195
196		// encode branch
197		let v = self.0[last].1.take();
198		let encoded = T::Codec::branch_node(
199			self.0[last].0.as_ref().iter(),
200			v.as_ref().map(|v| v.as_ref()),
201		);
202		self.reset_depth(branch_d);
203		let pr = NibbleSlice::new_offset(&key_branch, branch_d);
204		let branch_hash = callback.process(pr.left(), encoded, is_root && nkey.is_none());
205
206		if let Some(nkeyix) = nkey {
207			let pr = NibbleSlice::new_offset(&key_branch, nkeyix.0);
208			let nib = pr.right_range_iter(nkeyix.1);
209			let encoded = T::Codec::extension_node(nib, nkeyix.1, branch_hash);
210			callback.process(pr.left(), encoded, is_root)
211		} else {
212			branch_hash
213		}
214	}
215
216	#[inline(always)]
217	fn no_extension(
218		&mut self,
219		key_branch: &[u8],
220		callback: &mut impl ProcessEncodedNode<TrieHash<T>>,
221		branch_d: usize,
222		is_root: bool,
223		nkey: Option<(usize, usize)>,
224		) -> ChildReference<TrieHash<T>> {
225		let last = self.0.len() - 1;
226		debug_assert!(self.0[last].2 == branch_d);
227		// encode branch
228		let v = self.0[last].1.take();
229		let nkeyix = nkey.unwrap_or((0, 0));
230		let pr = NibbleSlice::new_offset(&key_branch, nkeyix.0);
231		let encoded = T::Codec::branch_node_nibbled(
232			pr.right_range_iter(nkeyix.1),
233			nkeyix.1,
234			self.0[last].0.as_ref().iter(), v.as_ref().map(|v| v.as_ref()));
235		self.reset_depth(branch_d);
236		let ext_length = nkey.as_ref().map(|nkeyix| nkeyix.0).unwrap_or(0);
237		let pr = NibbleSlice::new_offset(
238			&key_branch,
239			branch_d - ext_length,
240		);
241		callback.process(pr.left(), encoded, is_root)
242	}
243
244}
245
246/// Function visiting trie from key value inputs with a `ProccessEncodedNode` callback.
247/// This is the main entry point of this module.
248/// Calls to each node occurs ordered by byte key value but with longest keys first (from node to
249/// branch to root), this differs from standard byte array ordering a bit.
250pub fn trie_visit<T, I, A, B, F>(input: I, callback: &mut F)
251	where
252		T: TrieLayout,
253		I: IntoIterator<Item = (A, B)>,
254		A: AsRef<[u8]> + Ord,
255		B: AsRef<[u8]>,
256		F: ProcessEncodedNode<TrieHash<T>>,
257{
258	let no_extension = !T::USE_EXTENSION;
259	let mut depth_queue = CacheAccum::<T, B>::new();
260	// compare iter ordering
261	let mut iter_input = input.into_iter();
262	if let Some(mut previous_value) = iter_input.next() {
263		// depth of last item
264		let mut last_depth = 0;
265
266		let mut single = true;
267		for (k, v) in iter_input {
268			single = false;
269			let common_depth = nibble_ops::biggest_depth(&previous_value.0.as_ref()[..], &k.as_ref()[..]);
270			// 0 is a reserved value : could use option
271			let depth_item = common_depth;
272			if common_depth == previous_value.0.as_ref().len() * nibble_ops::NIBBLE_PER_BYTE {
273				// the new key include the previous one : branch value case
274				// just stored value at branch depth
275				depth_queue.set_cache_value(common_depth, Some(previous_value.1));
276			} else if depth_item >= last_depth {
277				// put previous with next (common branch previous value can be flush)
278				depth_queue.flush_value(callback, depth_item, &previous_value);
279			} else if depth_item < last_depth {
280				// do not put with next, previous is last of a branch
281				depth_queue.flush_value(callback, last_depth, &previous_value);
282				let ref_branches = previous_value.0;
283				depth_queue.flush_branch(no_extension, callback, ref_branches, depth_item, false);
284			}
285
286			previous_value = (k, v);
287			last_depth = depth_item;
288		}
289		// last pendings
290		if single {
291			// one single element corner case
292			let (k2, v2) = previous_value;
293			let nkey = NibbleSlice::new_offset(&k2.as_ref()[..], last_depth);
294			let encoded = T::Codec::leaf_node(nkey.right(), &v2.as_ref()[..]);
295			let pr = NibbleSlice::new_offset(
296				&k2.as_ref()[..],
297				k2.as_ref().len() * nibble_ops::NIBBLE_PER_BYTE - nkey.len(),
298			);
299			callback.process(pr.left(), encoded, true);
300		} else {
301			depth_queue.flush_value(callback, last_depth, &previous_value);
302			let ref_branches = previous_value.0;
303			depth_queue.flush_branch(no_extension, callback, ref_branches, 0, true);
304		}
305	} else {
306		// nothing null root corner case
307		callback.process(tetsy_hash_db::EMPTY_PREFIX, T::Codec::empty_node().to_vec(), true);
308	}
309}
310
311/// Visitor trait to implement when using `trie_visit`.
312pub trait ProcessEncodedNode<HO> {
313	/// Function call with prefix, encoded value and a boolean indicating if the
314	/// node is the root for each node of the trie.
315	///
316	/// Note that the returned value can change depending on implementation,
317	/// but usually it should be the Hash of encoded node.
318	/// This is not something direcly related to encoding but is here for
319	/// optimisation purpose (builder tetsy_hash_db does return this value).
320	fn process(&mut self, prefix: Prefix, encoded_node: Vec<u8>, is_root: bool) -> ChildReference<HO>;
321}
322
323/// Get trie root and insert visited node in a tetsy_hash_db.
324/// As for all `ProcessEncodedNode` implementation, it
325/// is only for full trie parsing (not existing trie).
326pub struct TrieBuilder<'a, H, HO, V, DB> {
327	db: &'a mut DB,
328	pub root: Option<HO>,
329	_ph: PhantomData<(H, V)>,
330}
331
332impl<'a, H, HO, V, DB> TrieBuilder<'a, H, HO, V, DB> {
333	pub fn new(db: &'a mut DB) -> Self {
334		TrieBuilder { db, root: None, _ph: PhantomData }
335	}
336}
337
338impl<'a, H: Hasher, V, DB: HashDB<H, V>> ProcessEncodedNode<<H as Hasher>::Out>
339	for TrieBuilder<'a, H, <H as Hasher>::Out, V, DB> {
340	fn process(
341		&mut self,
342		prefix: Prefix,
343		encoded_node: Vec<u8>,
344		is_root: bool,
345	) -> ChildReference<<H as Hasher>::Out> {
346		let len = encoded_node.len();
347		if !is_root && len < <H as Hasher>::LENGTH {
348			let mut h = <<H as Hasher>::Out as Default>::default();
349			h.as_mut()[..len].copy_from_slice(&encoded_node[..len]);
350
351			return ChildReference::Inline(h, len);
352		}
353		let hash = self.db.insert(prefix, &encoded_node[..]);
354		if is_root {
355			self.root = Some(hash);
356		};
357		ChildReference::Hash(hash)
358	}
359}
360
361/// Calculate the trie root of the trie.
362pub struct TrieRoot<H, HO> {
363	/// The resulting root.
364	pub root: Option<HO>,
365	_ph: PhantomData<H>,
366}
367
368impl<H, HO> Default for TrieRoot<H, HO> {
369	fn default() -> Self {
370		TrieRoot { root: None, _ph: PhantomData }
371	}
372}
373
374impl<H: Hasher> ProcessEncodedNode<<H as Hasher>::Out> for TrieRoot<H, <H as Hasher>::Out> {
375	fn process(
376		&mut self,
377		_: Prefix,
378		encoded_node: Vec<u8>,
379		is_root: bool,
380	) -> ChildReference<<H as Hasher>::Out> {
381		let len = encoded_node.len();
382		if !is_root && len < <H as Hasher>::LENGTH {
383			let mut h = <<H as Hasher>::Out as Default>::default();
384			h.as_mut()[..len].copy_from_slice(&encoded_node[..len]);
385
386			return ChildReference::Inline(h, len);
387		}
388		let hash = <H as Hasher>::hash(&encoded_node[..]);
389		if is_root {
390			self.root = Some(hash);
391		};
392		ChildReference::Hash(hash)
393	}
394}
395
396/// Get the trie root node encoding.
397pub struct TrieRootUnhashed<H> {
398	/// The resulting encoded root.
399	pub root: Option<Vec<u8>>,
400	_ph: PhantomData<H>,
401}
402
403impl<H> Default for TrieRootUnhashed<H> {
404	fn default() -> Self {
405		TrieRootUnhashed { root: None, _ph: PhantomData }
406	}
407}
408
409#[cfg(feature = "std")]
410/// Calculate the trie root of the trie.
411/// Print a debug trace.
412pub struct TrieRootPrint<H, HO> {
413	/// The resulting root.
414	pub root: Option<HO>,
415	_ph: PhantomData<H>,
416}
417
418#[cfg(feature = "std")]
419impl<H, HO> Default for TrieRootPrint<H, HO> {
420	fn default() -> Self {
421		TrieRootPrint { root: None, _ph: PhantomData }
422	}
423}
424
425#[cfg(feature = "std")]
426impl<H: Hasher> ProcessEncodedNode<<H as Hasher>::Out> for TrieRootPrint<H, <H as Hasher>::Out> {
427	fn process(
428		&mut self,
429		p: Prefix,
430		encoded_node: Vec<u8>,
431		is_root: bool,
432	) -> ChildReference<<H as Hasher>::Out> {
433		println!("Encoded node: {:x?}", &encoded_node);
434		println!("	with prefix: {:x?}", &p);
435		let len = encoded_node.len();
436		if !is_root && len < <H as Hasher>::LENGTH {
437			let mut h = <<H as Hasher>::Out as Default>::default();
438			h.as_mut()[..len].copy_from_slice(&encoded_node[..len]);
439
440			println!("	inline len {}", len);
441			return ChildReference::Inline(h, len);
442		}
443		let hash = <H as Hasher>::hash(&encoded_node[..]);
444		if is_root {
445			self.root = Some(hash);
446		};
447		println!("	hashed to {:x?}", hash.as_ref());
448		ChildReference::Hash(hash)
449	}
450}
451
452impl<H: Hasher> ProcessEncodedNode<<H as Hasher>::Out> for TrieRootUnhashed<H> {
453	fn process(
454		&mut self,
455		_: Prefix,
456		encoded_node: Vec<u8>,
457		is_root: bool,
458	) -> ChildReference<<H as Hasher>::Out> {
459		let len = encoded_node.len();
460		if !is_root && len < <H as Hasher>::LENGTH {
461			let mut h = <<H as Hasher>::Out as Default>::default();
462			h.as_mut()[..len].copy_from_slice(&encoded_node[..len]);
463
464			return ChildReference::Inline(h, len);
465		}
466		let hash = <H as Hasher>::hash(&encoded_node[..]);
467		if is_root {
468			self.root = Some(encoded_node);
469		};
470		ChildReference::Hash(hash)
471	}
472}