polkadot_erasure_coding/
lib.rs

1// Copyright (C) Parity Technologies (UK) Ltd.
2// This file is part of Polkadot.
3
4// Polkadot is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8
9// Polkadot is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13
14// You should have received a copy of the GNU General Public License
15// along with Polkadot.  If not, see <http://www.gnu.org/licenses/>.
16
17//! As part of Polkadot's availability system, certain pieces of data
18//! for each block are required to be kept available.
19//!
20//! The way we accomplish this is by erasure coding the data into n pieces
21//! and constructing a merkle root of the data.
22//!
23//! Each of n validators stores their piece of data. We assume `n = 3f + k`, `0 < k ≤ 3`.
24//! f is the maximum number of faulty validators in the system.
25//! The data is coded so any f+1 chunks can be used to reconstruct the full data.
26
27use codec::{Decode, Encode};
28use polkadot_node_primitives::{AvailableData, Proof};
29use polkadot_primitives::{BlakeTwo256, Hash as H256, HashT};
30use sp_core::Blake2Hasher;
31use sp_trie::{
32	trie_types::{TrieDBBuilder, TrieDBMutBuilderV0 as TrieDBMutBuilder},
33	LayoutV0, MemoryDB, Trie, TrieMut, EMPTY_PREFIX,
34};
35use thiserror::Error;
36
37use novelpoly::{CodeParams, WrappedShard};
38
39// we are limited to the field order of GF(2^16), which is 65536
40const MAX_VALIDATORS: usize = novelpoly::f2e16::FIELD_SIZE;
41
42/// Errors in erasure coding.
43#[derive(Debug, Clone, PartialEq, Error)]
44pub enum Error {
45	/// Returned when there are too many validators.
46	#[error("There are too many validators")]
47	TooManyValidators,
48	/// Cannot encode something for zero or one validator
49	#[error("Expected at least 2 validators")]
50	NotEnoughValidators,
51	/// Cannot reconstruct: wrong number of validators.
52	#[error("Validator count mismatches between encoding and decoding")]
53	WrongValidatorCount,
54	/// Not enough chunks present.
55	#[error("Not enough chunks to reconstruct message")]
56	NotEnoughChunks,
57	/// Too many chunks present.
58	#[error("Too many chunks present")]
59	TooManyChunks,
60	/// Chunks not of uniform length or the chunks are empty.
61	#[error("Chunks are not uniform, mismatch in length or are zero sized")]
62	NonUniformChunks,
63	/// An uneven byte-length of a shard is not valid for `GF(2^16)` encoding.
64	#[error("Uneven length is not valid for field GF(2^16)")]
65	UnevenLength,
66	/// Chunk index out of bounds.
67	#[error("Chunk is out of bounds: {chunk_index} not included in 0..{n_validators}")]
68	ChunkIndexOutOfBounds { chunk_index: usize, n_validators: usize },
69	/// Bad payload in reconstructed bytes.
70	#[error("Reconstructed payload invalid")]
71	BadPayload,
72	/// Unable to decode reconstructed bytes.
73	#[error("Unable to decode reconstructed payload: {0}")]
74	Decode(#[source] codec::Error),
75	/// Invalid branch proof.
76	#[error("Invalid branch proof")]
77	InvalidBranchProof,
78	/// Branch out of bounds.
79	#[error("Branch is out of bounds")]
80	BranchOutOfBounds,
81	/// Unknown error
82	#[error("An unknown error has appeared when reconstructing erasure code chunks")]
83	UnknownReconstruction,
84	/// Unknown error
85	#[error("An unknown error has appeared when deriving code parameters from validator count")]
86	UnknownCodeParam,
87}
88
89impl From<novelpoly::Error> for Error {
90	fn from(error: novelpoly::Error) -> Self {
91		match error {
92			novelpoly::Error::NeedMoreShards { .. } => Self::NotEnoughChunks,
93			novelpoly::Error::ParamterMustBePowerOf2 { .. } => Self::UnevenLength,
94			novelpoly::Error::WantedShardCountTooHigh(_) => Self::TooManyValidators,
95			novelpoly::Error::WantedShardCountTooLow(_) => Self::NotEnoughValidators,
96			novelpoly::Error::PayloadSizeIsZero { .. } => Self::BadPayload,
97			novelpoly::Error::InconsistentShardLengths { .. } => Self::NonUniformChunks,
98			_ => Self::UnknownReconstruction,
99		}
100	}
101}
102
103/// Obtain a threshold of chunks that should be enough to recover the data.
104pub const fn recovery_threshold(n_validators: usize) -> Result<usize, Error> {
105	if n_validators > MAX_VALIDATORS {
106		return Err(Error::TooManyValidators)
107	}
108	if n_validators <= 1 {
109		return Err(Error::NotEnoughValidators)
110	}
111
112	let needed = n_validators.saturating_sub(1) / 3;
113	Ok(needed + 1)
114}
115
116/// Obtain the threshold of systematic chunks that should be enough to recover the data.
117///
118/// If the regular `recovery_threshold` is a power of two, then it returns the same value.
119/// Otherwise, it returns the next lower power of two.
120pub fn systematic_recovery_threshold(n_validators: usize) -> Result<usize, Error> {
121	code_params(n_validators).map(|params| params.k())
122}
123
124fn code_params(n_validators: usize) -> Result<CodeParams, Error> {
125	// we need to be able to reconstruct from 1/3 - eps
126
127	let n_wanted = n_validators;
128	let k_wanted = recovery_threshold(n_wanted)?;
129
130	if n_wanted > MAX_VALIDATORS as usize {
131		return Err(Error::TooManyValidators)
132	}
133
134	CodeParams::derive_parameters(n_wanted, k_wanted).map_err(|e| match e {
135		novelpoly::Error::WantedShardCountTooHigh(_) => Error::TooManyValidators,
136		novelpoly::Error::WantedShardCountTooLow(_) => Error::NotEnoughValidators,
137		_ => Error::UnknownCodeParam,
138	})
139}
140
141/// Reconstruct the v1 available data from the set of systematic chunks.
142///
143/// Provide a vector containing chunk data. If too few chunks are provided, recovery is not
144/// possible.
145pub fn reconstruct_from_systematic_v1(
146	n_validators: usize,
147	chunks: Vec<Vec<u8>>,
148) -> Result<AvailableData, Error> {
149	reconstruct_from_systematic(n_validators, chunks)
150}
151
152/// Reconstruct the available data from the set of systematic chunks.
153///
154/// Provide a vector containing the first k chunks in order. If too few chunks are provided,
155/// recovery is not possible.
156pub fn reconstruct_from_systematic<T: Decode>(
157	n_validators: usize,
158	chunks: Vec<Vec<u8>>,
159) -> Result<T, Error> {
160	let code_params = code_params(n_validators)?;
161	let k = code_params.k();
162
163	for chunk_data in chunks.iter().take(k) {
164		if chunk_data.len() % 2 != 0 {
165			return Err(Error::UnevenLength)
166		}
167	}
168
169	let bytes = code_params.make_encoder().reconstruct_from_systematic(
170		chunks.into_iter().take(k).map(|data| WrappedShard::new(data)).collect(),
171	)?;
172
173	Decode::decode(&mut &bytes[..]).map_err(|err| Error::Decode(err))
174}
175
176/// Obtain erasure-coded chunks for v1 `AvailableData`, one for each validator.
177///
178/// Works only up to 65536 validators, and `n_validators` must be non-zero.
179pub fn obtain_chunks_v1(n_validators: usize, data: &AvailableData) -> Result<Vec<Vec<u8>>, Error> {
180	obtain_chunks(n_validators, data)
181}
182
183/// Obtain erasure-coded chunks, one for each validator.
184///
185/// Works only up to 65536 validators, and `n_validators` must be non-zero.
186pub fn obtain_chunks<T: Encode>(n_validators: usize, data: &T) -> Result<Vec<Vec<u8>>, Error> {
187	let params = code_params(n_validators)?;
188	let encoded = data.encode();
189
190	if encoded.is_empty() {
191		return Err(Error::BadPayload)
192	}
193
194	let shards = params
195		.make_encoder()
196		.encode::<WrappedShard>(&encoded[..])
197		.expect("Payload non-empty, shard sizes are uniform, and validator numbers checked; qed");
198
199	Ok(shards.into_iter().map(|w: WrappedShard| w.into_inner()).collect())
200}
201
202/// Reconstruct the v1 available data from a set of chunks.
203///
204/// Provide an iterator containing chunk data and the corresponding index.
205/// The indices of the present chunks must be indicated. If too few chunks
206/// are provided, recovery is not possible.
207///
208/// Works only up to 65536 validators, and `n_validators` must be non-zero.
209pub fn reconstruct_v1<'a, I: 'a>(n_validators: usize, chunks: I) -> Result<AvailableData, Error>
210where
211	I: IntoIterator<Item = (&'a [u8], usize)>,
212{
213	reconstruct(n_validators, chunks)
214}
215
216/// Reconstruct decodable data from a set of chunks.
217///
218/// Provide an iterator containing chunk data and the corresponding index.
219/// The indices of the present chunks must be indicated. If too few chunks
220/// are provided, recovery is not possible.
221///
222/// Works only up to 65536 validators, and `n_validators` must be non-zero.
223pub fn reconstruct<'a, I: 'a, T: Decode>(n_validators: usize, chunks: I) -> Result<T, Error>
224where
225	I: IntoIterator<Item = (&'a [u8], usize)>,
226{
227	let params = code_params(n_validators)?;
228	let mut received_shards: Vec<Option<WrappedShard>> = vec![None; n_validators];
229	for (chunk_data, chunk_idx) in chunks.into_iter().take(n_validators) {
230		if chunk_data.len() % 2 != 0 {
231			return Err(Error::UnevenLength)
232		}
233
234		received_shards[chunk_idx] = Some(WrappedShard::new(chunk_data.to_vec()));
235	}
236
237	let payload_bytes = params.make_encoder().reconstruct(received_shards)?;
238
239	Decode::decode(&mut &payload_bytes[..]).map_err(|_| Error::BadPayload)
240}
241
242/// An iterator that yields merkle branches and chunk data for all chunks to
243/// be sent to other validators.
244pub struct Branches<'a, I> {
245	trie_storage: MemoryDB<Blake2Hasher>,
246	root: H256,
247	chunks: &'a [I],
248	current_pos: usize,
249}
250
251impl<'a, I: AsRef<[u8]>> Branches<'a, I> {
252	/// Get the trie root.
253	pub fn root(&self) -> H256 {
254		self.root
255	}
256}
257
258impl<'a, I: AsRef<[u8]>> Iterator for Branches<'a, I> {
259	type Item = (Proof, &'a [u8]);
260
261	fn next(&mut self) -> Option<Self::Item> {
262		use sp_trie::Recorder;
263
264		let mut recorder = Recorder::<LayoutV0<Blake2Hasher>>::new();
265		let res = {
266			let trie = TrieDBBuilder::new(&self.trie_storage, &self.root)
267				.with_recorder(&mut recorder)
268				.build();
269
270			(self.current_pos as u32).using_encoded(|s| trie.get(s))
271		};
272
273		match res.expect("all nodes in trie present; qed") {
274			Some(_) => {
275				let nodes: Vec<Vec<u8>> = recorder.drain().into_iter().map(|r| r.data).collect();
276				let chunk = self.chunks.get(self.current_pos).expect(
277					"there is a one-to-one mapping of chunks to valid merkle branches; qed",
278				);
279				self.current_pos += 1;
280				Proof::try_from(nodes).ok().map(|proof| (proof, chunk.as_ref()))
281			},
282			None => None,
283		}
284	}
285}
286
287/// Construct a trie from chunks of an erasure-coded value. This returns the root hash and an
288/// iterator of merkle proofs, one for each validator.
289pub fn branches<'a, I: 'a>(chunks: &'a [I]) -> Branches<'a, I>
290where
291	I: AsRef<[u8]>,
292{
293	let mut trie_storage: MemoryDB<Blake2Hasher> = MemoryDB::default();
294	let mut root = H256::default();
295
296	// construct trie mapping each chunk's index to its hash.
297	{
298		let mut trie = TrieDBMutBuilder::new(&mut trie_storage, &mut root).build();
299		for (i, chunk) in chunks.as_ref().iter().enumerate() {
300			(i as u32).using_encoded(|encoded_index| {
301				let chunk_hash = BlakeTwo256::hash(chunk.as_ref());
302				trie.insert(encoded_index, chunk_hash.as_ref())
303					.expect("a fresh trie stored in memory cannot have errors loading nodes; qed");
304			})
305		}
306	}
307
308	Branches { trie_storage, root, chunks, current_pos: 0 }
309}
310
311/// Verify a merkle branch, yielding the chunk hash meant to be present at that
312/// index.
313pub fn branch_hash(root: &H256, branch_nodes: &Proof, index: usize) -> Result<H256, Error> {
314	let mut trie_storage: MemoryDB<Blake2Hasher> = MemoryDB::default();
315	for node in branch_nodes.iter() {
316		(&mut trie_storage as &mut sp_trie::HashDB<_>).insert(EMPTY_PREFIX, node);
317	}
318
319	let trie = TrieDBBuilder::new(&trie_storage, &root).build();
320	let res = (index as u32).using_encoded(|key| {
321		trie.get_with(key, |raw_hash: &[u8]| H256::decode(&mut &raw_hash[..]))
322	});
323
324	match res {
325		Ok(Some(Ok(hash))) => Ok(hash),
326		Ok(Some(Err(_))) => Err(Error::InvalidBranchProof), // hash failed to decode
327		Ok(None) => Err(Error::BranchOutOfBounds),
328		Err(_) => Err(Error::InvalidBranchProof),
329	}
330}
331
332#[cfg(test)]
333mod tests {
334	use std::sync::Arc;
335
336	use super::*;
337	use polkadot_node_primitives::{AvailableData, BlockData, PoV};
338	use polkadot_primitives::{HeadData, PersistedValidationData};
339	use quickcheck::{Arbitrary, Gen, QuickCheck};
340
341	// In order to adequately compute the number of entries in the Merkle
342	// trie, we must account for the fixed 16-ary trie structure.
343	const KEY_INDEX_NIBBLE_SIZE: usize = 4;
344
345	#[derive(Clone, Debug)]
346	struct ArbitraryAvailableData(AvailableData);
347
348	impl Arbitrary for ArbitraryAvailableData {
349		fn arbitrary(g: &mut Gen) -> Self {
350			// Limit the POV len to 1 mib, otherwise the test will take forever
351			let pov_len = (u32::arbitrary(g) % (1024 * 1024)).max(2);
352
353			let pov = (0..pov_len).map(|_| u8::arbitrary(g)).collect();
354
355			let pvd = PersistedValidationData {
356				parent_head: HeadData((0..u16::arbitrary(g)).map(|_| u8::arbitrary(g)).collect()),
357				relay_parent_number: u32::arbitrary(g),
358				relay_parent_storage_root: [u8::arbitrary(g); 32].into(),
359				max_pov_size: u32::arbitrary(g),
360			};
361
362			ArbitraryAvailableData(AvailableData {
363				pov: Arc::new(PoV { block_data: BlockData(pov) }),
364				validation_data: pvd,
365			})
366		}
367	}
368
369	#[test]
370	fn field_order_is_right_size() {
371		assert_eq!(MAX_VALIDATORS, 65536);
372	}
373
374	#[test]
375	fn round_trip_works() {
376		let pov = PoV { block_data: BlockData((0..255).collect()) };
377
378		let available_data = AvailableData { pov: pov.into(), validation_data: Default::default() };
379		let chunks = obtain_chunks(10, &available_data).unwrap();
380
381		assert_eq!(chunks.len(), 10);
382
383		// any 4 chunks should work.
384		let reconstructed: AvailableData = reconstruct(
385			10,
386			[(&*chunks[1], 1), (&*chunks[4], 4), (&*chunks[6], 6), (&*chunks[9], 9)]
387				.iter()
388				.cloned(),
389		)
390		.unwrap();
391
392		assert_eq!(reconstructed, available_data);
393	}
394
395	#[test]
396	fn round_trip_systematic_works() {
397		fn property(available_data: ArbitraryAvailableData, n_validators: u16) {
398			let n_validators = n_validators.max(2);
399			let kpow2 = systematic_recovery_threshold(n_validators as usize).unwrap();
400			let chunks = obtain_chunks(n_validators as usize, &available_data.0).unwrap();
401			assert_eq!(
402				reconstruct_from_systematic_v1(
403					n_validators as usize,
404					chunks.into_iter().take(kpow2).collect()
405				)
406				.unwrap(),
407				available_data.0
408			);
409		}
410
411		QuickCheck::new().quickcheck(property as fn(ArbitraryAvailableData, u16))
412	}
413
414	#[test]
415	fn reconstruct_does_not_panic_on_low_validator_count() {
416		let reconstructed = reconstruct_v1(1, [].iter().cloned());
417		assert_eq!(reconstructed, Err(Error::NotEnoughValidators));
418	}
419
420	fn generate_trie_and_generate_proofs(magnitude: u32) {
421		let n_validators = 2_u32.pow(magnitude) as usize;
422		let pov = PoV { block_data: BlockData(vec![2; n_validators / KEY_INDEX_NIBBLE_SIZE]) };
423
424		let available_data = AvailableData { pov: pov.into(), validation_data: Default::default() };
425
426		let chunks = obtain_chunks(magnitude as usize, &available_data).unwrap();
427
428		assert_eq!(chunks.len() as u32, magnitude);
429
430		let branches = branches(chunks.as_ref());
431		let root = branches.root();
432
433		let proofs: Vec<_> = branches.map(|(proof, _)| proof).collect();
434		assert_eq!(proofs.len() as u32, magnitude);
435		for (i, proof) in proofs.into_iter().enumerate() {
436			let encode = Encode::encode(&proof);
437			let decode = Decode::decode(&mut &encode[..]).unwrap();
438			assert_eq!(proof, decode);
439			assert_eq!(encode, Encode::encode(&decode));
440
441			assert_eq!(branch_hash(&root, &proof, i).unwrap(), BlakeTwo256::hash(&chunks[i]));
442		}
443	}
444
445	#[test]
446	fn roundtrip_proof_encoding() {
447		for i in 2..16 {
448			generate_trie_and_generate_proofs(i);
449		}
450	}
451}