tetsy_trie_db/
trie_codec.rs

1// Copyright 2019, 2020 Parity Technologies
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Compact encoding/decoding functions for partial Merkle-Patricia tries.
16//!
17//! A partial trie is a subset of the nodes in a complete trie, which can still be used to
18//! perform authenticated lookups on a subset of keys. A naive encoding is the set of encoded nodes
19//! in the partial trie. This, however, includes redundant hashes of other nodes in the partial
20//! trie which could be computed directly. The compact encoding strips out all hash child
21//! references to other nodes in the partial trie and replaces them with empty inline references,
22//! indicating that the child reference is omitted. The nodes are then ordered in pre-order
23//! traversal order so that the full nodes can be efficiently reconstructed recursively. Note that
24//! hash references to nodes not in the partial trie are left intact. The compact encoding can be
25//! expected to save roughly (n - 1) hashes in size where n is the number of nodes in the partial
26//! trie.
27
28use tetsy_hash_db::HashDB;
29use crate::{
30	CError, ChildReference, DBValue, NibbleVec, NodeCodec, Result,
31	TrieHash, TrieError, TrieDB, TrieDBNodeIterator, TrieLayout,
32	nibble_ops::NIBBLE_LENGTH, node::{Node, NodeHandle, NodeHandlePlan, NodePlan, OwnedNode},
33};
34use crate::rstd::{
35	boxed::Box, convert::TryInto, marker::PhantomData, rc::Rc, result, vec, vec::Vec,
36};
37
38struct EncoderStackEntry<C: NodeCodec> {
39	/// The prefix is the nibble path to the node in the trie.
40	prefix: NibbleVec,
41	node: Rc<OwnedNode<DBValue>>,
42	/// The next entry in the stack is a child of the preceding entry at this index. For branch
43	/// nodes, the index is in [0, NIBBLE_LENGTH] and for extension nodes, the index is in [0, 1].
44	child_index: usize,
45	/// Flags indicating whether each child is omitted in the encoded node.
46	omit_children: Vec<bool>,
47	/// The encoding of the subtrie nodes rooted at this entry, which is built up in
48	/// `encode_compact`.
49	output_index: usize,
50	_marker: PhantomData<C>,
51}
52
53impl<C: NodeCodec> EncoderStackEntry<C> {
54	/// Given the prefix of the next child node, identify its index and advance `child_index` to
55	/// that. For a given entry, this must be called sequentially only with strictly increasing
56	/// child prefixes. Returns an error if the child prefix is not a child of this entry or if
57	/// called with children out of order.
58	///
59	/// Preconditions:
60	/// - self.prefix + partial must be a prefix of child_prefix.
61	/// - if self.node is a branch, then child_prefix must be longer than self.prefix + partial.
62	fn advance_child_index(&mut self, child_prefix: &NibbleVec)
63		-> result::Result<(), &'static str>
64	{
65		match self.node.node_plan() {
66			NodePlan::Empty | NodePlan::Leaf { .. } =>
67				return Err("empty and leaf nodes have no children"),
68			NodePlan::Extension { .. } => {
69				if self.child_index != 0 {
70					return Err("extension node cannot have multiple children")
71				}
72			}
73			NodePlan::Branch { .. } => {
74				if child_prefix.len() <= self.prefix.len() {
75					return Err("child_prefix does not contain prefix");
76				}
77				let child_index = child_prefix.at(self.prefix.len()) as usize;
78				if child_index < self.child_index {
79					return Err("iterator returned children in non-ascending order by prefix");
80				}
81				self.child_index = child_index;
82			}
83			NodePlan::NibbledBranch { partial, .. } => {
84				if child_prefix.len() <= self.prefix.len() + partial.len() {
85					return Err("child_prefix does not contain prefix and node partial");
86				}
87				let child_index = child_prefix.at(self.prefix.len() + partial.len()) as usize;
88				if child_index < self.child_index {
89					return Err("iterator returned children in non-ascending order by prefix");
90				}
91				self.child_index = child_index;
92			}
93		}
94		Ok(())
95	}
96
97	/// Generates the encoding of the subtrie rooted at this entry.
98	fn encode_node(&self) -> Result<Vec<u8>, C::HashOut, C::Error> {
99		let node_data = self.node.data();
100		Ok(match self.node.node_plan() {
101			NodePlan::Empty | NodePlan::Leaf { .. } => node_data.to_vec(),
102			NodePlan::Extension { partial, child: _ } => {
103				if !self.omit_children[0] {
104					node_data.to_vec()
105				} else {
106					let partial = partial.build(node_data);
107					let empty_child = ChildReference::Inline(C::HashOut::default(), 0);
108					C::extension_node(partial.right_iter(), partial.len(), empty_child)
109				}
110			}
111			NodePlan::Branch { value, children } => {
112				C::branch_node(
113					Self::branch_children(node_data, &children, &self.omit_children)?.iter(),
114					value.clone().map(|range| &node_data[range])
115				)
116			}
117			NodePlan::NibbledBranch { partial, value, children } => {
118				let partial = partial.build(node_data);
119				C::branch_node_nibbled(
120					partial.right_iter(),
121					partial.len(),
122					Self::branch_children(node_data, &children, &self.omit_children)?.iter(),
123					value.clone().map(|range| &node_data[range])
124				)
125			}
126		})
127	}
128
129	/// Generate the list of child references for a branch node with certain children omitted.
130	///
131	/// Preconditions:
132	/// - omit_children has size NIBBLE_LENGTH.
133	/// - omit_children[i] is only true if child_handles[i] is Some
134	fn branch_children(
135		node_data: &[u8],
136		child_handles: &[Option<NodeHandlePlan>; NIBBLE_LENGTH],
137		omit_children: &[bool],
138	) -> Result<[Option<ChildReference<C::HashOut>>; NIBBLE_LENGTH], C::HashOut, C::Error>
139	{
140		let empty_child = ChildReference::Inline(C::HashOut::default(), 0);
141		let mut children = [None; NIBBLE_LENGTH];
142		for i in 0..NIBBLE_LENGTH {
143			children[i] = if omit_children[i] {
144				Some(empty_child)
145			} else if let Some(child_plan) = &child_handles[i] {
146				let child_ref = child_plan
147					.build(node_data)
148					.try_into()
149					.map_err(|hash| Box::new(
150						TrieError::InvalidHash(C::HashOut::default(), hash)
151					))?;
152				Some(child_ref)
153			} else {
154				None
155			};
156		}
157		Ok(children)
158	}
159}
160
161/// Generates a compact representation of the partial trie stored in the given DB. The encoding
162/// is a vector of mutated trie nodes with those child references omitted. The mutated trie nodes
163/// are listed in pre-order traversal order so that the full nodes can be efficiently
164/// reconstructed recursively.
165///
166/// This function makes the assumption that all child references in an inline trie node are inline
167/// references.
168pub fn encode_compact<L>(db: &TrieDB<L>) -> Result<Vec<Vec<u8>>, TrieHash<L>, CError<L>>
169	where
170		L: TrieLayout
171{
172	let mut output = Vec::new();
173
174	// The stack of nodes through a path in the trie. Each entry is a child node of the preceding
175	// entry.
176	let mut stack: Vec<EncoderStackEntry<L::Codec>> = Vec::new();
177
178	// TrieDBNodeIterator guarantees that:
179	// - It yields at least one node.
180	// - The first node yielded is the root node with an empty prefix and is not inline.
181	// - The prefixes yielded are in strictly increasing lexographic order.
182	let iter = TrieDBNodeIterator::new(db)?;
183
184	// Following from the guarantees about TrieDBNodeIterator, we guarantee that after the first
185	// iteration of the loop below, the stack always has at least one entry and the bottom (front)
186	// of the stack is the root node, which is not inline. Furthermore, the iterator is not empty,
187	// so at least one iteration always occurs.
188	for item in iter {
189		match item {
190			Ok((prefix, node_hash, node)) => {
191				// Skip inline nodes, as they cannot contain hash references to other nodes by
192				// assumption.
193				if node_hash.is_none() {
194					continue;
195				}
196
197				// Unwind the stack until the new entry is a child of the last entry on the stack.
198				// If the stack entry prefix is a prefix of the new entry prefix, then it must be a
199				// direct parent as the nodes are yielded from the iterator in pre-order traversal
200				// order.
201				while let Some(mut last_entry) = stack.pop() {
202					if prefix.starts_with(&last_entry.prefix) {
203						// advance_child_index preconditions are satisfied because of iterator
204						// correctness.
205						last_entry.advance_child_index(&prefix)
206							.expect(
207								"all errors from advance_child_index indicate bugs with \
208								TrieDBNodeIterator or this function"
209							);
210						last_entry.omit_children[last_entry.child_index] = true;
211						last_entry.child_index += 1;
212						stack.push(last_entry);
213						break;
214					} else {
215						output[last_entry.output_index] = last_entry.encode_node()?;
216					}
217				}
218
219				let children_len = match node.node_plan() {
220					NodePlan::Empty | NodePlan::Leaf { .. } => 0,
221					NodePlan::Extension { .. } => 1,
222					NodePlan::Branch { .. } | NodePlan::NibbledBranch { .. } => NIBBLE_LENGTH,
223				};
224				stack.push(EncoderStackEntry {
225					prefix,
226					node,
227					child_index: 0,
228					omit_children: vec![false; children_len],
229					output_index: output.len(),
230					_marker: PhantomData::default(),
231				});
232				// Insert a placeholder into output which will be replaced when this new entry is
233				// popped from the stack.
234				output.push(Vec::new());
235			}
236			Err(err) => match *err {
237				// If we hit an IncompleteDatabaseError, just ignore it and continue encoding the
238				// incomplete trie. This encoding must support partial tries, which can be used for
239				// space-efficient storage proofs.
240				TrieError::IncompleteDatabase(_) => {},
241				_ => return Err(err),
242			}
243		}
244	}
245
246	while let Some(entry) = stack.pop() {
247		output[entry.output_index] = entry.encode_node()?;
248	}
249
250	Ok(output)
251}
252
253struct DecoderStackEntry<'a, C: NodeCodec> {
254	node: Node<'a>,
255	/// The next entry in the stack is a child of the preceding entry at this index. For branch
256	/// nodes, the index is in [0, NIBBLE_LENGTH] and for extension nodes, the index is in [0, 1].
257	child_index: usize,
258	/// The reconstructed child references.
259	children: Vec<Option<ChildReference<C::HashOut>>>,
260	_marker: PhantomData<C>,
261}
262
263impl<'a, C: NodeCodec> DecoderStackEntry<'a, C> {
264	/// Advance the child index until either it exceeds the number of children or the child is
265	/// marked as omitted. Omitted children are indicated by an empty inline reference. For each
266	/// child that is passed over and not omitted, copy over the child reference from the node to
267	/// this entries `children` list.
268	///
269	/// Returns true if the child index is past the last child, meaning the `children` references
270	/// list is complete. If this returns true and the entry is an extension node, then
271	/// `children[0]` is guaranteed to be Some.
272	fn advance_child_index(&mut self) -> Result<bool, C::HashOut, C::Error> {
273		match self.node {
274			Node::Extension(_, child) if self.child_index == 0 => {
275				match child {
276					NodeHandle::Inline(data) if data.is_empty() =>
277						return Ok(false),
278					_ => {
279						let child_ref = child.try_into()
280							.map_err(|hash| Box::new(
281								TrieError::InvalidHash(C::HashOut::default(), hash)
282							))?;
283						self.children[self.child_index] = Some(child_ref);
284					}
285				}
286				self.child_index += 1;
287			}
288			Node::Branch(children, _) | Node::NibbledBranch(_, children, _) => {
289				while self.child_index < NIBBLE_LENGTH {
290					match children[self.child_index] {
291						Some(NodeHandle::Inline(data)) if data.is_empty() =>
292							return Ok(false),
293						Some(child) => {
294							let child_ref = child.try_into()
295								.map_err(|hash| Box::new(
296									TrieError::InvalidHash(C::HashOut::default(), hash)
297								))?;
298							self.children[self.child_index] = Some(child_ref);
299						}
300						None => {}
301					}
302					self.child_index += 1;
303				}
304			}
305			_ => {}
306		}
307		Ok(true)
308	}
309
310	/// Push the partial key of this entry's node (including the branch nibble) to the given
311	/// prefix.
312	fn push_to_prefix(&self, prefix: &mut NibbleVec) {
313		match self.node {
314			Node::Empty => {}
315			Node::Leaf(partial, _) | Node::Extension(partial, _) => {
316				prefix.append_partial(partial.right());
317			}
318			Node::Branch(_, _) => {
319				prefix.push(self.child_index as u8);
320			}
321			Node::NibbledBranch(partial, _, _) => {
322				prefix.append_partial(partial.right());
323				prefix.push(self.child_index as u8);
324			}
325		}
326	}
327
328	/// Pop the partial key of this entry's node (including the branch nibble) from the given
329	/// prefix.
330	fn pop_from_prefix(&self, prefix: &mut NibbleVec) {
331		match self.node {
332			Node::Empty => {}
333			Node::Leaf(partial, _) | Node::Extension(partial, _) => {
334				prefix.drop_lasts(partial.len());
335			}
336			Node::Branch(_, _) => {
337				prefix.pop();
338			}
339			Node::NibbledBranch(partial, _, _) => {
340				prefix.pop();
341				prefix.drop_lasts(partial.len());
342			}
343		}
344	}
345
346	/// Reconstruct the encoded full trie node from the node and the entry's child references.
347	///
348	/// Preconditions:
349	/// - if node is an extension node, then `children[0]` is Some.
350	fn encode_node(self) -> Vec<u8> {
351		match self.node {
352			Node::Empty =>
353				C::empty_node().to_vec(),
354			Node::Leaf(partial, value) =>
355				C::leaf_node(partial.right(), value),
356			Node::Extension(partial, _) =>
357				C::extension_node(
358					partial.right_iter(),
359					partial.len(),
360					self.children[0]
361						.expect("required by method precondition; qed"),
362				),
363			Node::Branch(_, value) =>
364				C::branch_node(self.children.into_iter(), value),
365			Node::NibbledBranch(partial, _, value) =>
366				C::branch_node_nibbled(
367					partial.right_iter(),
368					partial.len(),
369					self.children.iter(),
370					value,
371				),
372		}
373	}
374}
375
376/// Reconstructs a partial trie DB from a compact representation. The encoding is a vector of
377/// mutated trie nodes with those child references omitted. The decode function reads them in order
378/// from the given slice, reconstructing the full nodes and inserting them into the given `HashDB`.
379/// It stops after fully constructing one partial trie and returns the root hash and the number of
380/// nodes read. If an error occurs during decoding, there are no guarantees about which entries
381/// were or were not added to the DB.
382///
383/// The number of nodes read may be fewer than the total number of items in `encoded`. This allows
384/// one to concatenate multiple compact encodings together and still reconstruct them all.
385//
386/// This function makes the assumption that all child references in an inline trie node are inline
387/// references.
388pub fn decode_compact<L, DB, T>(db: &mut DB, encoded: &[Vec<u8>])
389	-> Result<(TrieHash<L>, usize), TrieHash<L>, CError<L>>
390	where
391		L: TrieLayout,
392		DB: HashDB<L::Hash, T>,
393{
394	decode_compact_from_iter::<L, DB, T, _>(db, encoded.iter().map(Vec::as_slice))
395}
396
397/// Variant of 'decode_compact' that accept an iterator of encoded nodes as input.
398pub fn decode_compact_from_iter<'a, L, DB, T, I>(db: &mut DB, encoded: I)
399	-> Result<(TrieHash<L>, usize), TrieHash<L>, CError<L>>
400	where
401		L: TrieLayout,
402		DB: HashDB<L::Hash, T>,
403		I: IntoIterator<Item = &'a [u8]>,
404{
405	// The stack of nodes through a path in the trie. Each entry is a child node of the preceding
406	// entry.
407	let mut stack: Vec<DecoderStackEntry<L::Codec>> = Vec::new();
408
409	// The prefix of the next item to be read from the slice of encoded items.
410	let mut prefix = NibbleVec::new();
411
412	for (i, encoded_node) in encoded.into_iter().enumerate() {
413		let node = L::Codec::decode(encoded_node)
414			.map_err(|err| Box::new(TrieError::DecoderError(<TrieHash<L>>::default(), err)))?;
415
416		let children_len = match node {
417			Node::Empty | Node::Leaf(..) => 0,
418			Node::Extension(..) => 1,
419			Node::Branch(..) | Node::NibbledBranch(..) => NIBBLE_LENGTH,
420		};
421		let mut last_entry = DecoderStackEntry {
422			node,
423			child_index: 0,
424			children: vec![None; children_len],
425			_marker: PhantomData::default(),
426		};
427
428		loop {
429			if !last_entry.advance_child_index()? {
430				last_entry.push_to_prefix(&mut prefix);
431				stack.push(last_entry);
432				break;
433			}
434
435			// Since `advance_child_index` returned true, the preconditions for `encode_node` are
436			// satisfied.
437			let node_data = last_entry.encode_node();
438			let node_hash = db.insert(prefix.as_prefix(), node_data.as_ref());
439
440			if let Some(entry) = stack.pop() {
441				last_entry = entry;
442				last_entry.pop_from_prefix(&mut prefix);
443				last_entry.children[last_entry.child_index] =
444					Some(ChildReference::Hash(node_hash));
445				last_entry.child_index += 1;
446			} else {
447				return Ok((node_hash, i + 1));
448			}
449		}
450	}
451
452	Err(Box::new(TrieError::IncompleteDatabase(<TrieHash<L>>::default())))
453}