commonware_coding/lib.rs
1//! Encode data to enable recovery from a subset of fragments.
2//!
3//! # Status
4//!
5//! Stability varies by primitive. See [README](https://github.com/commonwarexyz/monorepo#stability) for details.
6
7#![doc(
8 html_logo_url = "https://commonware.xyz/imgs/rustdoc_logo.svg",
9 html_favicon_url = "https://commonware.xyz/favicon.ico"
10)]
11
12commonware_macros::stability_scope!(ALPHA {
13 use bytes::Buf;
14 use commonware_codec::{Codec, FixedSize, Read, Write};
15 use commonware_cryptography::Digest;
16 use commonware_parallel::Strategy;
17 use std::fmt::Debug;
18
19 mod no_coding;
20 pub use no_coding::{Error as NoCodingError, NoCoding};
21
22 mod reed_solomon;
23 pub use reed_solomon::{Error as ReedSolomonError, ReedSolomon};
24
25 mod zoda;
26 pub use zoda::{Error as ZodaError, Zoda};
27
28 /// Configuration common to all encoding schemes.
29 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
30 #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
31 pub struct Config {
32 /// The minimum number of shards needed to encode the data.
33 pub minimum_shards: u16,
34 /// Extra shards beyond the minimum number.
35 ///
36 /// Alternatively, one can think of the configuration as having a total number
37 /// `N = extra_shards + minimum_shards`, but by specifying the `extra_shards`
38 /// rather than `N`, we avoid needing to check that `minimum_shards <= N`.
39 pub extra_shards: u16,
40 }
41
42 impl Config {
43 /// Returns the total number of shards produced by this configuration.
44 pub fn total_shards(&self) -> u32 {
45 u32::from(self.minimum_shards) + u32::from(self.extra_shards)
46 }
47 }
48
49 impl FixedSize for Config {
50 const SIZE: usize = 2 * <u16 as FixedSize>::SIZE;
51 }
52
53 impl Write for Config {
54 fn write(&self, buf: &mut impl bytes::BufMut) {
55 self.minimum_shards.write(buf);
56 self.extra_shards.write(buf);
57 }
58 }
59
60 impl Read for Config {
61 type Cfg = ();
62
63 fn read_cfg(buf: &mut impl Buf, cfg: &Self::Cfg) -> Result<Self, commonware_codec::Error> {
64 Ok(Self {
65 minimum_shards: u16::read_cfg(buf, cfg)?,
66 extra_shards: u16::read_cfg(buf, cfg)?,
67 })
68 }
69 }
70
71 /// The configuration for decoding shard data.
72 #[derive(Clone, Debug)]
73 pub struct CodecConfig {
74 /// The maximum number of bytes a shard is expected to contain.
75 ///
76 /// This can be an upper bound, and only constrains the non-fixed-size portion
77 /// of shard data.
78 pub maximum_shard_size: usize,
79 }
80
81 /// A scheme for encoding data into pieces, and recovering the data from those pieces.
82 ///
83 /// # Example
84 /// ```
85 /// use commonware_coding::{Config, ReedSolomon, Scheme as _};
86 /// use commonware_cryptography::Sha256;
87 /// use commonware_parallel::Sequential;
88 ///
89 /// const STRATEGY: Sequential = Sequential;
90 ///
91 /// type RS = ReedSolomon<Sha256>;
92 ///
93 /// let config = Config { minimum_shards: 2, extra_shards: 1 };
94 /// let data = b"Hello!";
95 /// // Turn the data into shards, and a commitment to those shards.
96 /// let (commitment, shards) =
97 /// RS::encode(&config, data.as_slice(), &STRATEGY).unwrap();
98 ///
99 /// // Each person produces reshards, their own checked shard, and checking data
100 /// // to check other peoples reshards.
101 /// let (mut checking_data_w_shard, reshards): (Vec<_>, Vec<_>) = shards
102 /// .into_iter()
103 /// .enumerate()
104 /// .map(|(i, shard)| {
105 /// let (checking_data, checked_shard, reshard) = RS::reshard(&config, &commitment, i as u16, shard).unwrap();
106 /// ((checking_data, checked_shard), reshard)
107 /// })
108 /// .collect();
109 /// // Let's pretend that the last item is "ours"
110 /// let (checking_data, checked_shard) = checking_data_w_shard.pop().unwrap();
111 /// // We can use this checking_data to check the other shards.
112 /// let mut checked_shards = Vec::new();
113 /// checked_shards.push(checked_shard);
114 /// for (i, reshard) in reshards.into_iter().enumerate().skip(1) {
115 /// checked_shards.push(RS::check(&config, &commitment, &checking_data, i as u16, reshard).unwrap())
116 /// }
117 ///
118 /// let data2 = RS::decode(&config, &commitment, checking_data, &checked_shards[..2], &STRATEGY).unwrap();
119 /// assert_eq!(&data[..], &data2[..]);
120 ///
121 /// // Decoding works with different shards, with a guarantee to get the same result.
122 /// let data3 = RS::decode(&config, &commitment, checking_data, &checked_shards[1..], &STRATEGY).unwrap();
123 /// assert_eq!(&data[..], &data3[..]);
124 /// ```
125 pub trait Scheme: Debug + Clone + Send + Sync + 'static {
126 /// A commitment attesting to the shards of data.
127 type Commitment: Digest;
128 /// A shard of data, to be received by a participant.
129 type Shard: Clone + Eq + Codec<Cfg = CodecConfig> + Send + Sync + 'static;
130 /// A shard shared with other participants, to aid them in reconstruction.
131 ///
132 /// In most cases, this will be the same as `Shard`, but some schemes might
133 /// have extra information in `Shard` that may not be necessary to reconstruct
134 /// the data.
135 type ReShard: Clone + Eq + Codec<Cfg = CodecConfig> + Send + Sync + 'static;
136 /// Data which can assist in checking shards.
137 type CheckingData: Clone + Send;
138 /// A shard that has been checked for inclusion in the commitment.
139 ///
140 /// This allows excluding [Scheme::ReShard]s which are invalid, and shouldn't
141 /// be considered as progress towards meeting the minimum number of shards.
142 type CheckedShard;
143 type Error: std::fmt::Debug;
144
145 /// Encode a piece of data, returning a commitment, along with shards, and proofs.
146 ///
147 /// Each shard and proof is intended for exactly one participant. The number of shards returned
148 /// should equal `config.minimum_shards + config.extra_shards`.
149 #[allow(clippy::type_complexity)]
150 fn encode(
151 config: &Config,
152 data: impl Buf,
153 strategy: &impl Strategy,
154 ) -> Result<(Self::Commitment, Vec<Self::Shard>), Self::Error>;
155
156 /// Take your own shard, check it, and produce a [Scheme::ReShard] to forward to others.
157 ///
158 /// This takes in an index, which is the index you expect the shard to be.
159 ///
160 /// This will produce a [Scheme::CheckedShard] which counts towards the minimum
161 /// number of shards you need to reconstruct the data, in [Scheme::decode].
162 ///
163 /// You also get [Scheme::CheckingData], which has information you can use to check
164 /// the shards you receive from others.
165 #[allow(clippy::type_complexity)]
166 fn reshard(
167 config: &Config,
168 commitment: &Self::Commitment,
169 index: u16,
170 shard: Self::Shard,
171 ) -> Result<(Self::CheckingData, Self::CheckedShard, Self::ReShard), Self::Error>;
172
173 /// Check the integrity of a reshard, producing a checked shard.
174 ///
175 /// This requires the [Scheme::CheckingData] produced by [Scheme::reshard].
176 ///
177 /// This takes in an index, to make sure that the reshard you're checking
178 /// is associated with the participant you expect it to be.
179 fn check(
180 config: &Config,
181 commitment: &Self::Commitment,
182 checking_data: &Self::CheckingData,
183 index: u16,
184 reshard: Self::ReShard,
185 ) -> Result<Self::CheckedShard, Self::Error>;
186
187 /// Decode the data from shards received from other participants.
188 ///
189 /// The data must be decodeable with as few as `config.minimum_shards`,
190 /// including your own shard.
191 ///
192 /// Calls to this function with the same commitment, but with different shards,
193 /// or shards in a different should also result in the same output data, or in failure.
194 /// In other words, when using the decoding function in a broader system, you
195 /// get a guarantee that every participant decoding will see the same final
196 /// data, even if they receive different shards, or receive them in a different order.
197 fn decode(
198 config: &Config,
199 commitment: &Self::Commitment,
200 checking_data: Self::CheckingData,
201 shards: &[Self::CheckedShard],
202 strategy: &impl Strategy,
203 ) -> Result<Vec<u8>, Self::Error>;
204 }
205
206 /// A marker trait indicating that [Scheme::check] proves validity of the encoding.
207 ///
208 /// In more detail, this means that upon a successful call to [Scheme::check],
209 /// guarantees that the shard results from a valid encoding of the data, and thus,
210 /// if other participants also call check, then the data is guaranteed to be reconstructable.
211 pub trait ValidatingScheme: Scheme {}
212});
213
214#[cfg(test)]
215mod test {
216 use super::*;
217 use crate::reed_solomon::ReedSolomon;
218 use commonware_codec::Encode;
219 use commonware_cryptography::Sha256;
220 use commonware_parallel::Sequential;
221 use std::cmp::Reverse;
222
223 const MAX_DATA_BYTES: usize = 1 << 31;
224
225 fn general_test<S: Scheme>(
226 name: &str,
227 data: &[u8],
228 min_shards: u16,
229 total_shards: u16,
230 indices: &[u16],
231 ) {
232 // If the indices reference some larger shard, use that as the total.
233 let total_shards = indices
234 .iter()
235 .map(|&x| x + 1)
236 .max()
237 .map_or(total_shards, |x| x.max(total_shards));
238 assert!(min_shards >= 1, "min_shards must be at least 1");
239 assert!(
240 indices.len() >= min_shards as usize,
241 "you need to supply at least {min_shards} indices"
242 );
243 assert!(
244 total_shards >= min_shards,
245 "total_shards ({total_shards}) must be >= min_shards ({min_shards})"
246 );
247
248 let config = Config {
249 minimum_shards: min_shards,
250 extra_shards: total_shards - min_shards,
251 };
252 let read_cfg = CodecConfig {
253 maximum_shard_size: MAX_DATA_BYTES,
254 };
255 let (commitment, shards) = S::encode(&config, data, &Sequential).unwrap();
256 // Pick out the packets we want, in reverse order.
257 let ((_, _, checking_data, my_checked_shard, _), other_packets) = {
258 let mut out = shards
259 .into_iter()
260 .enumerate()
261 .filter_map(|(i, shard)| {
262 let shard = S::Shard::read_cfg(&mut shard.encode(), &read_cfg).unwrap();
263 let i = i as u16;
264 let pos_of_i = indices.iter().position(|&x| x == i)?;
265 let (x0, x1, x2) = S::reshard(&config, &commitment, i, shard).unwrap();
266 Some((pos_of_i, i, x0, x1, x2))
267 })
268 .collect::<Vec<_>>();
269 out.sort_by_key(|&(pos_of_i, _, _, _, _)| Reverse(pos_of_i));
270 let first = out.pop().unwrap();
271 (first, out)
272 };
273 let checked_shards = {
274 let mut others = other_packets
275 .into_iter()
276 .map(|(_, i, _, _, reshard)| {
277 let reshard = S::ReShard::read_cfg(&mut reshard.encode(), &read_cfg).unwrap();
278 S::check(&config, &commitment, &checking_data, i, reshard).unwrap()
279 })
280 .collect::<Vec<_>>();
281 others.push(my_checked_shard);
282 others
283 };
284 let decoded = S::decode(
285 &config,
286 &commitment,
287 checking_data,
288 &checked_shards,
289 &Sequential,
290 )
291 .unwrap();
292 assert_eq!(&decoded, data, "{name} failed");
293 }
294
295 fn test_basic<S: Scheme>() {
296 general_test::<S>("test_basic", b"Hello, Reed-Solomon!", 4, 7, &[0, 1, 2, 3]);
297 }
298
299 fn test_moderate<S: Scheme>() {
300 general_test::<S>(
301 "test_moderate",
302 b"Testing with more pieces than minimum",
303 4,
304 10,
305 &[0, 1, 2, 8, 9],
306 );
307 }
308
309 fn test_odd_shard_len<S: Scheme>() {
310 general_test::<S>("test_odd_shard_len", b"?", 2, 3, &[0, 1]);
311 }
312
313 fn test_recovery<S: Scheme>() {
314 general_test::<S>(
315 "test_recovery",
316 b"Testing recovery pieces",
317 3,
318 8,
319 &[5, 6, 7],
320 );
321 }
322
323 fn test_empty_data<S: Scheme>() {
324 general_test::<S>(
325 "test_empty_data",
326 b"",
327 30,
328 100,
329 (0..30u16).collect::<Vec<_>>().as_slice(),
330 );
331 }
332
333 fn test_large_data<S: Scheme>() {
334 general_test::<S>(
335 "test_large_data",
336 vec![42u8; 1000].as_slice(),
337 3,
338 4,
339 &[0, 1, 2],
340 );
341 }
342
343 fn test_no_data_two_shards<S: Scheme>() {
344 general_test::<S>("test_no_data_one_shard", b"", 1, 2, &[0]);
345 }
346
347 // This exercises an edge case in ZODA, but is also useful for other schemes.
348 fn test_2_pow_16_25_total_shards<S: Scheme>() {
349 general_test::<S>(
350 "test_2_pow_16_25_total_shards",
351 vec![0x67; 1 << 16].as_slice(),
352 8,
353 25,
354 &(0..8).collect::<Vec<_>>(),
355 );
356 }
357
358 fn test_suite<S: Scheme>() {
359 test_basic::<S>();
360 test_moderate::<S>();
361 test_odd_shard_len::<S>();
362 test_recovery::<S>();
363 test_empty_data::<S>();
364 test_large_data::<S>();
365 test_no_data_two_shards::<S>();
366 test_2_pow_16_25_total_shards::<S>();
367 }
368
369 #[test]
370 fn test_suite_reed_solomon() {
371 test_suite::<ReedSolomon<Sha256>>();
372 }
373
374 #[test]
375 fn test_suite_no_coding() {
376 test_suite::<NoCoding<Sha256>>();
377 }
378
379 #[test]
380 fn test_suite_zoda() {
381 test_suite::<Zoda<Sha256>>();
382 }
383
384 #[cfg(feature = "arbitrary")]
385 mod conformance {
386 use super::*;
387 use commonware_codec::conformance::CodecConformance;
388
389 commonware_conformance::conformance_tests! {
390 CodecConformance<Config>,
391 }
392 }
393}