commonware_coding/lib.rs
1//! Encode data to enable recovery from a subset of fragments.
2//!
3//! # Status
4//!
5//! Stability varies by primitive. See [README](https://github.com/commonwarexyz/monorepo#stability) for details.
6
7#![doc(
8 html_logo_url = "https://commonware.xyz/imgs/rustdoc_logo.svg",
9 html_favicon_url = "https://commonware.xyz/favicon.ico"
10)]
11
12commonware_macros::stability_scope!(ALPHA {
13 use bytes::Buf;
14 use commonware_codec::{Codec, FixedSize, Read, Write};
15 use commonware_cryptography::Digest;
16 use commonware_parallel::Strategy;
17 use std::{fmt::Debug, num::NonZeroU16};
18
19 mod no_coding;
20 pub use no_coding::{Error as NoCodingError, NoCoding};
21
22 mod reed_solomon;
23 pub use reed_solomon::{Error as ReedSolomonError, ReedSolomon};
24
25 mod zoda;
26 pub use zoda::{Error as ZodaError, Zoda};
27
28 /// Configuration common to all encoding schemes.
29 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
30 pub struct Config {
31 /// The minimum number of shards needed to encode the data.
32 pub minimum_shards: NonZeroU16,
33 /// Extra shards beyond the minimum number.
34 ///
35 /// Alternatively, one can think of the configuration as having a total number
36 /// `N = extra_shards + minimum_shards`, but by specifying the `extra_shards`
37 /// rather than `N`, we avoid needing to check that `minimum_shards <= N`.
38 pub extra_shards: NonZeroU16,
39 }
40
41 impl Config {
42 /// Returns the total number of shards produced by this configuration.
43 pub fn total_shards(&self) -> u32 {
44 u32::from(self.minimum_shards.get()) + u32::from(self.extra_shards.get())
45 }
46 }
47
48 impl FixedSize for Config {
49 const SIZE: usize = 2 * <NonZeroU16 as FixedSize>::SIZE;
50 }
51
52 impl Write for Config {
53 fn write(&self, buf: &mut impl bytes::BufMut) {
54 self.minimum_shards.write(buf);
55 self.extra_shards.write(buf);
56 }
57 }
58
59 impl Read for Config {
60 type Cfg = ();
61
62 fn read_cfg(buf: &mut impl Buf, cfg: &Self::Cfg) -> Result<Self, commonware_codec::Error> {
63 Ok(Self {
64 minimum_shards: NonZeroU16::read_cfg(buf, cfg)?,
65 extra_shards: NonZeroU16::read_cfg(buf, cfg)?,
66 })
67 }
68 }
69
70 #[cfg(feature = "arbitrary")]
71 impl<'a> arbitrary::Arbitrary<'a> for Config {
72 fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
73 let minimum_shards = commonware_utils::NZU16!(u.int_in_range(1..=u16::MAX)?);
74 let extra_shards = commonware_utils::NZU16!(u.int_in_range(1..=u16::MAX)?);
75 Ok(Self {
76 minimum_shards,
77 extra_shards,
78 })
79 }
80 }
81
82 /// The configuration for decoding shard data.
83 #[derive(Clone, Debug)]
84 pub struct CodecConfig {
85 /// The maximum number of bytes a shard is expected to contain.
86 ///
87 /// This can be an upper bound, and only constrains the non-fixed-size portion
88 /// of shard data.
89 pub maximum_shard_size: usize,
90 }
91
92 /// A scheme for encoding data into pieces, and recovering the data from those pieces.
93 ///
94 /// # Example
95 /// ```
96 /// use commonware_coding::{Config, ReedSolomon, Scheme as _};
97 /// use commonware_cryptography::Sha256;
98 /// use commonware_parallel::Sequential;
99 /// use commonware_utils::NZU16;
100 ///
101 /// const STRATEGY: Sequential = Sequential;
102 ///
103 /// type RS = ReedSolomon<Sha256>;
104 ///
105 /// let config = Config {
106 /// minimum_shards: NZU16!(2),
107 /// extra_shards: NZU16!(1),
108 /// };
109 /// let data = b"Hello!";
110 /// // Turn the data into shards, and a commitment to those shards.
111 /// let (commitment, shards) =
112 /// RS::encode(&config, data.as_slice(), &STRATEGY).unwrap();
113 ///
114 /// // Each person produces weak shards, their own checked shard, and checking data
115 /// // to check other peoples weak shards.
116 /// let (mut checking_data_w_shard, weak_shards): (Vec<_>, Vec<_>) = shards
117 /// .into_iter()
118 /// .enumerate()
119 /// .map(|(i, shard)| {
120 /// let (checking_data, checked_shard, weak_shard) = RS::weaken(&config, &commitment, i as u16, shard).unwrap();
121 /// ((checking_data, checked_shard), weak_shard)
122 /// })
123 /// .collect();
124 /// // Let's pretend that the last item is "ours"
125 /// let (checking_data, checked_shard) = checking_data_w_shard.pop().unwrap();
126 /// // We can use this checking_data to check the other shards.
127 /// let mut checked_shards = Vec::new();
128 /// checked_shards.push(checked_shard);
129 /// for (i, weak_shard) in weak_shards.into_iter().enumerate().skip(1) {
130 /// checked_shards.push(RS::check(&config, &commitment, &checking_data, i as u16, weak_shard).unwrap())
131 /// }
132 ///
133 /// let data2 = RS::decode(&config, &commitment, checking_data, &checked_shards[..2], &STRATEGY).unwrap();
134 /// assert_eq!(&data[..], &data2[..]);
135 ///
136 /// // Decoding works with different shards, with a guarantee to get the same result.
137 /// let data3 = RS::decode(&config, &commitment, checking_data, &checked_shards[1..], &STRATEGY).unwrap();
138 /// assert_eq!(&data[..], &data3[..]);
139 /// ```
140 ///
141 /// # Guarantees
142 ///
143 /// Here are additional properties that implementors of this trait need to
144 /// consider, and that users of this trait can rely on.
145 ///
146 /// ## Weaken vs Check
147 ///
148 /// [`Scheme::weaken`] and [`Scheme::check`] should agree, even for malicious encoders.
149 ///
150 /// It should not be possible for parties A and B to call `weaken` successfully,
151 /// but then have either of them fail on the other's shard when calling `check`.
152 ///
153 /// In other words, if an honest party considers their shard to be correctly
154 /// formed, then other honest parties which have successfully constructed their
155 /// checking data will also agree with the shard being correct.
156 ///
157 /// A violation of this property would be, for example, if a malicious payload
158 /// could convince two parties that they both have valid shards, but then the
159 /// checking data they produce from the malicious payload reports issues with
160 /// those shards.
161 pub trait Scheme: Debug + Clone + Send + Sync + 'static {
162 /// A commitment attesting to the shards of data.
163 type Commitment: Digest;
164 /// A strong shard of data, to be received by a participant.
165 type StrongShard: Clone + Debug + Eq + Codec<Cfg = CodecConfig> + Send + Sync + 'static;
166 /// A weak shard shared with other participants, to aid them in reconstruction.
167 ///
168 /// In most cases, this will be the same as `StrongShard`, but some schemes might
169 /// have extra information in `StrongShard` that may not be necessary to reconstruct
170 /// the data.
171 type WeakShard: Clone + Debug + Eq + Codec<Cfg = CodecConfig> + Send + Sync + 'static;
172 /// Data which can assist in checking shards.
173 type CheckingData: Clone + Send + Sync;
174 /// A shard that has been checked for inclusion in the commitment.
175 ///
176 /// This allows excluding [Scheme::WeakShard]s which are invalid, and shouldn't
177 /// be considered as progress towards meeting the minimum number of shards.
178 type CheckedShard: Clone + Send + Sync;
179 /// The type of errors that can occur during encoding, weakening, checking, and decoding.
180 type Error: std::fmt::Debug + Send;
181
182 /// Encode a piece of data, returning a commitment, along with shards, and proofs.
183 ///
184 /// Each shard and proof is intended for exactly one participant. The number of shards returned
185 /// should equal `config.minimum_shards + config.extra_shards`.
186 #[allow(clippy::type_complexity)]
187 fn encode(
188 config: &Config,
189 data: impl Buf,
190 strategy: &impl Strategy,
191 ) -> Result<(Self::Commitment, Vec<Self::StrongShard>), Self::Error>;
192
193 /// Take your own shard, check it, and produce a [Scheme::WeakShard] to forward to others.
194 ///
195 /// This takes in an index, which is the index you expect the shard to be.
196 ///
197 /// This will produce a [Scheme::CheckedShard] which counts towards the minimum
198 /// number of shards you need to reconstruct the data, in [Scheme::decode].
199 ///
200 /// You also get [Scheme::CheckingData], which has information you can use to check
201 /// the shards you receive from others.
202 #[allow(clippy::type_complexity)]
203 fn weaken(
204 config: &Config,
205 commitment: &Self::Commitment,
206 index: u16,
207 shard: Self::StrongShard,
208 ) -> Result<(Self::CheckingData, Self::CheckedShard, Self::WeakShard), Self::Error>;
209
210 /// Check the integrity of a weak shard, producing a checked shard.
211 ///
212 /// This requires the [Scheme::CheckingData] produced by [Scheme::weaken].
213 ///
214 /// This takes in an index, to make sure that the weak shard you're checking
215 /// is associated with the participant you expect it to be.
216 fn check(
217 config: &Config,
218 commitment: &Self::Commitment,
219 checking_data: &Self::CheckingData,
220 index: u16,
221 weak_shard: Self::WeakShard,
222 ) -> Result<Self::CheckedShard, Self::Error>;
223
224 /// Decode the data from shards received from other participants.
225 ///
226 /// The data must be decodeable with as few as `config.minimum_shards`,
227 /// including your own shard.
228 ///
229 /// Calls to this function with the same commitment, but with different shards,
230 /// or shards in a different should also result in the same output data, or in failure.
231 /// In other words, when using the decoding function in a broader system, you
232 /// get a guarantee that every participant decoding will see the same final
233 /// data, even if they receive different shards, or receive them in a different order.
234 fn decode(
235 config: &Config,
236 commitment: &Self::Commitment,
237 checking_data: Self::CheckingData,
238 shards: &[Self::CheckedShard],
239 strategy: &impl Strategy,
240 ) -> Result<Vec<u8>, Self::Error>;
241 }
242
243 /// A marker trait indicating that [Scheme::check] proves validity of the encoding.
244 ///
245 /// In more detail, this means that upon a successful call to [Scheme::check],
246 /// guarantees that the shard results from a valid encoding of the data, and thus,
247 /// if other participants also call check, then the data is guaranteed to be reconstructable.
248 pub trait ValidatingScheme: Scheme {}
249});
250
251#[cfg(test)]
252mod test {
253 use super::*;
254 use crate::reed_solomon::ReedSolomon;
255 use arbitrary::Unstructured;
256 use commonware_codec::Encode;
257 use commonware_cryptography::Sha256;
258 use commonware_invariants::minifuzz;
259 use commonware_macros::test_group;
260 use commonware_parallel::Sequential;
261 use commonware_utils::NZU16;
262
263 const MAX_SHARD_SIZE: usize = 1 << 31;
264 const MAX_SHARDS: u16 = 32;
265 const MAX_DATA: usize = 1024;
266 const MIN_EXTRA_SHARDS: u16 = 1;
267
268 fn roundtrip<S: Scheme>(config: &Config, data: &[u8], selected: &[u16]) {
269 // Encode data into shards.
270 let (commitment, shards) = S::encode(config, data, &Sequential).unwrap();
271 let read_cfg = CodecConfig {
272 maximum_shard_size: MAX_SHARD_SIZE,
273 };
274 for (i, shard) in shards.iter().enumerate() {
275 // Strong shard codec roundtrip.
276 let decoded_shard = S::StrongShard::read_cfg(&mut shard.encode(), &read_cfg).unwrap();
277 assert_eq!(decoded_shard, *shard);
278
279 // Weak shard codec roundtrip.
280 let (_, _, weak_shard) =
281 S::weaken(config, &commitment, i as u16, shard.clone()).unwrap();
282 let decoded_weak_shard =
283 S::WeakShard::read_cfg(&mut weak_shard.encode(), &read_cfg).unwrap();
284 assert_eq!(decoded_weak_shard, weak_shard);
285 }
286
287 // Collect selected shards for decoding. The first selected shard
288 // goes through `weaken`, the rest go through `check`.
289 let mut checked_shards = Vec::new();
290 let mut checking_data = None;
291 for (i, shard) in shards.into_iter().enumerate() {
292 if !selected.contains(&(i as u16)) {
293 continue;
294 }
295 let (cd, checked, weak_shard) =
296 S::weaken(config, &commitment, i as u16, shard).unwrap();
297 if let Some(cd) = &checking_data {
298 let checked = S::check(config, &commitment, cd, i as u16, weak_shard).unwrap();
299 checked_shards.push(checked);
300 } else {
301 checking_data = Some(cd);
302 checked_shards.push(checked);
303 }
304 }
305
306 // Shuffle the checked shards to verify decode is order-independent.
307 checked_shards.reverse();
308
309 // Decode from the selected shards and verify data integrity.
310 let decoded = S::decode(
311 config,
312 &commitment,
313 checking_data.unwrap(),
314 &checked_shards,
315 &Sequential,
316 )
317 .unwrap();
318 assert_eq!(decoded, data);
319 }
320
321 fn generate_case(u: &mut Unstructured<'_>) -> arbitrary::Result<(Config, Vec<u8>, Vec<u16>)> {
322 let minimum_shards = (u.arbitrary::<u16>()? % MAX_SHARDS) + 1;
323 let extra_shards =
324 MIN_EXTRA_SHARDS + (u.arbitrary::<u16>()? % (MAX_SHARDS - MIN_EXTRA_SHARDS + 1));
325 let total_shards = minimum_shards + extra_shards;
326
327 let data_len = usize::from(u.arbitrary::<u16>()?) % (MAX_DATA + 1);
328 let data = u.bytes(data_len)?.to_vec();
329
330 let selected_len = usize::from(minimum_shards)
331 + (usize::from(u.arbitrary::<u16>()?) % (usize::from(extra_shards) + 1));
332 let mut selected: Vec<u16> = (0..total_shards).collect();
333 for i in 0..selected_len {
334 let remaining = usize::from(total_shards) - i;
335 let j = i + (usize::from(u.arbitrary::<u16>()?) % remaining);
336 selected.swap(i, j);
337 }
338 selected.truncate(selected_len);
339
340 Ok((
341 Config {
342 minimum_shards: NZU16!(minimum_shards),
343 extra_shards: NZU16!(extra_shards),
344 },
345 data,
346 selected,
347 ))
348 }
349
350 #[test]
351 fn roundtrip_empty_data() {
352 let config = Config {
353 minimum_shards: NZU16!(30),
354 extra_shards: NZU16!(70),
355 };
356 let selected: Vec<u16> = (0..30).collect();
357
358 roundtrip::<ReedSolomon<Sha256>>(&config, b"", &selected);
359 roundtrip::<NoCoding<Sha256>>(&config, b"", &selected);
360 roundtrip::<Zoda<Sha256>>(&config, b"", &selected);
361 }
362
363 // This exercises an edge case in ZODA, but is also useful for other schemes.
364 #[test]
365 fn roundtrip_2_pow_16_25_total_shards() {
366 let config = Config {
367 minimum_shards: NZU16!(8),
368 extra_shards: NZU16!(17),
369 };
370 let data = vec![0x67; 1 << 16];
371 let selected: Vec<u16> = (0..8).collect();
372
373 roundtrip::<ReedSolomon<Sha256>>(&config, &data, &selected);
374 roundtrip::<NoCoding<Sha256>>(&config, &data, &selected);
375 roundtrip::<Zoda<Sha256>>(&config, &data, &selected);
376 }
377
378 #[test]
379 fn minifuzz_roundtrip_reed_solomon() {
380 minifuzz::Builder::default()
381 .with_seed(0)
382 .with_search_limit(64)
383 .test(|u| {
384 let (config, data, selected) = generate_case(u)?;
385 roundtrip::<ReedSolomon<Sha256>>(&config, &data, &selected);
386 Ok(())
387 });
388 }
389
390 #[test]
391 fn minifuzz_roundtrip_no_coding() {
392 minifuzz::Builder::default()
393 .with_seed(0)
394 .with_search_limit(64)
395 .test(|u| {
396 let (config, data, selected) = generate_case(u)?;
397 roundtrip::<NoCoding<Sha256>>(&config, &data, &selected);
398 Ok(())
399 });
400 }
401
402 #[test_group("slow")]
403 #[test]
404 fn minifuzz_roundtrip_zoda() {
405 minifuzz::Builder::default()
406 .with_seed(0)
407 .with_search_limit(64)
408 .test(|u| {
409 let (config, data, selected) = generate_case(u)?;
410 roundtrip::<Zoda<Sha256>>(&config, &data, &selected);
411 Ok(())
412 });
413 }
414
415 #[cfg(feature = "arbitrary")]
416 mod conformance {
417 use super::*;
418 use commonware_codec::conformance::CodecConformance;
419
420 commonware_conformance::conformance_tests! {
421 CodecConformance<Config>,
422 }
423 }
424}