commonware_coding/lib.rs
1//! Encode data to enable recovery from a subset of fragments.
2//!
3//! # Status
4//!
5//! `commonware-coding` is **ALPHA** software and is not yet recommended for production use. Developers should
6//! expect breaking changes and occasional instability.
7
8#![doc(
9 html_logo_url = "https://commonware.xyz/imgs/rustdoc_logo.svg",
10 html_favicon_url = "https://commonware.xyz/favicon.ico"
11)]
12
13use bytes::Buf;
14use commonware_codec::{Codec, FixedSize, Read, Write};
15use commonware_parallel::Strategy;
16use std::fmt::Debug;
17
18mod reed_solomon;
19use commonware_cryptography::Digest;
20pub use reed_solomon::{Error as ReedSolomonError, ReedSolomon};
21
22mod no_coding;
23pub use no_coding::{Error as NoCodingError, NoCoding};
24
25mod zoda;
26pub use zoda::{Error as ZodaError, Zoda};
27
28/// Configuration common to all encoding schemes.
29#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
30#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
31pub struct Config {
32 /// The minimum number of shards needed to encode the data.
33 pub minimum_shards: u16,
34 /// Extra shards beyond the minimum number.
35 ///
36 /// Alternatively, one can think of the configuration as having a total number
37 /// `N = extra_shards + minimum_shards`, but by specifying the `extra_shards`
38 /// rather than `N`, we avoid needing to check that `minimum_shards <= N`.
39 pub extra_shards: u16,
40}
41
42impl Config {
43 /// Returns the total number of shards produced by this configuration.
44 pub fn total_shards(&self) -> u32 {
45 u32::from(self.minimum_shards) + u32::from(self.extra_shards)
46 }
47}
48
49impl FixedSize for Config {
50 const SIZE: usize = 2 * <u16 as FixedSize>::SIZE;
51}
52
53impl Write for Config {
54 fn write(&self, buf: &mut impl bytes::BufMut) {
55 self.minimum_shards.write(buf);
56 self.extra_shards.write(buf);
57 }
58}
59
60impl Read for Config {
61 type Cfg = ();
62
63 fn read_cfg(buf: &mut impl Buf, cfg: &Self::Cfg) -> Result<Self, commonware_codec::Error> {
64 Ok(Self {
65 minimum_shards: u16::read_cfg(buf, cfg)?,
66 extra_shards: u16::read_cfg(buf, cfg)?,
67 })
68 }
69}
70
71/// The configuration for decoding shard data.
72#[derive(Clone, Debug)]
73pub struct CodecConfig {
74 /// The maximum number of bytes a shard is expected to contain.
75 ///
76 /// This can be an upper bound, and only constrains the non-fixed-size portion
77 /// of shard data.
78 pub maximum_shard_size: usize,
79}
80
81/// A scheme for encoding data into pieces, and recovering the data from those pieces.
82///
83/// # Example
84/// ```
85/// use commonware_coding::{Config, ReedSolomon, Scheme as _};
86/// use commonware_cryptography::Sha256;
87/// use commonware_parallel::Sequential;
88///
89/// const STRATEGY: Sequential = Sequential;
90///
91/// type RS = ReedSolomon<Sha256>;
92///
93/// let config = Config { minimum_shards: 2, extra_shards: 1 };
94/// let data = b"Hello!";
95/// // Turn the data into shards, and a commitment to those shards.
96/// let (commitment, shards) =
97/// RS::encode(&config, data.as_slice(), &STRATEGY).unwrap();
98///
99/// // Each person produces reshards, their own checked shard, and checking data
100/// // to check other peoples reshards.
101/// let (mut checking_data_w_shard, reshards): (Vec<_>, Vec<_>) = shards
102/// .into_iter()
103/// .enumerate()
104/// .map(|(i, shard)| {
105/// let (checking_data, checked_shard, reshard) = RS::reshard(&config, &commitment, i as u16, shard).unwrap();
106/// ((checking_data, checked_shard), reshard)
107/// })
108/// .collect();
109/// // Let's pretend that the last item is "ours"
110/// let (checking_data, checked_shard) = checking_data_w_shard.pop().unwrap();
111/// // We can use this checking_data to check the other shards.
112/// let mut checked_shards = Vec::new();
113/// checked_shards.push(checked_shard);
114/// for (i, reshard) in reshards.into_iter().enumerate().skip(1) {
115/// checked_shards.push(RS::check(&config, &commitment, &checking_data, i as u16, reshard).unwrap())
116/// }
117///
118/// let data2 = RS::decode(&config, &commitment, checking_data, &checked_shards[..2], &STRATEGY).unwrap();
119/// assert_eq!(&data[..], &data2[..]);
120///
121/// // Decoding works with different shards, with a guarantee to get the same result.
122/// let data3 = RS::decode(&config, &commitment, checking_data, &checked_shards[1..], &STRATEGY).unwrap();
123/// assert_eq!(&data[..], &data3[..]);
124/// ```
125pub trait Scheme: Debug + Clone + Send + Sync + 'static {
126 /// A commitment attesting to the shards of data.
127 type Commitment: Digest;
128 /// A shard of data, to be received by a participant.
129 type Shard: Clone + Eq + Codec<Cfg = CodecConfig> + Send + Sync + 'static;
130 /// A shard shared with other participants, to aid them in reconstruction.
131 ///
132 /// In most cases, this will be the same as `Shard`, but some schemes might
133 /// have extra information in `Shard` that may not be necessary to reconstruct
134 /// the data.
135 type ReShard: Clone + Eq + Codec<Cfg = CodecConfig> + Send + Sync + 'static;
136 /// Data which can assist in checking shards.
137 type CheckingData: Clone + Send;
138 /// A shard that has been checked for inclusion in the commitment.
139 ///
140 /// This allows excluding [Scheme::ReShard]s which are invalid, and shouldn't
141 /// be considered as progress towards meeting the minimum number of shards.
142 type CheckedShard;
143 type Error: std::fmt::Debug;
144
145 /// Encode a piece of data, returning a commitment, along with shards, and proofs.
146 ///
147 /// Each shard and proof is intended for exactly one participant. The number of shards returned
148 /// should equal `config.minimum_shards + config.extra_shards`.
149 #[allow(clippy::type_complexity)]
150 fn encode(
151 config: &Config,
152 data: impl Buf,
153 strategy: &impl Strategy,
154 ) -> Result<(Self::Commitment, Vec<Self::Shard>), Self::Error>;
155
156 /// Take your own shard, check it, and produce a [Scheme::ReShard] to forward to others.
157 ///
158 /// This takes in an index, which is the index you expect the shard to be.
159 ///
160 /// This will produce a [Scheme::CheckedShard] which counts towards the minimum
161 /// number of shards you need to reconstruct the data, in [Scheme::decode].
162 ///
163 /// You also get [Scheme::CheckingData], which has information you can use to check
164 /// the shards you receive from others.
165 #[allow(clippy::type_complexity)]
166 fn reshard(
167 config: &Config,
168 commitment: &Self::Commitment,
169 index: u16,
170 shard: Self::Shard,
171 ) -> Result<(Self::CheckingData, Self::CheckedShard, Self::ReShard), Self::Error>;
172
173 /// Check the integrity of a reshard, producing a checked shard.
174 ///
175 /// This requires the [Scheme::CheckingData] produced by [Scheme::reshard].
176 ///
177 /// This takes in an index, to make sure that the reshard you're checking
178 /// is associated with the participant you expect it to be.
179 fn check(
180 config: &Config,
181 commitment: &Self::Commitment,
182 checking_data: &Self::CheckingData,
183 index: u16,
184 reshard: Self::ReShard,
185 ) -> Result<Self::CheckedShard, Self::Error>;
186
187 /// Decode the data from shards received from other participants.
188 ///
189 /// The data must be decodeable with as few as `config.minimum_shards`,
190 /// including your own shard.
191 ///
192 /// Calls to this function with the same commitment, but with different shards,
193 /// or shards in a different should also result in the same output data, or in failure.
194 /// In other words, when using the decoding function in a broader system, you
195 /// get a guarantee that every participant decoding will see the same final
196 /// data, even if they receive different shards, or receive them in a different order.
197 fn decode(
198 config: &Config,
199 commitment: &Self::Commitment,
200 checking_data: Self::CheckingData,
201 shards: &[Self::CheckedShard],
202 strategy: &impl Strategy,
203 ) -> Result<Vec<u8>, Self::Error>;
204}
205
206/// A marker trait indicating that [Scheme::check] proves validity of the encoding.
207///
208/// In more detail, this means that upon a successful call to [Scheme::check],
209/// guarantees that the shard results from a valid encoding of the data, and thus,
210/// if other participants also call check, then the data is guaranteed to be reconstructable.
211pub trait ValidatingScheme: Scheme {}
212
213#[cfg(test)]
214mod test {
215 use super::*;
216 use crate::reed_solomon::ReedSolomon;
217 use commonware_codec::Encode;
218 use commonware_cryptography::Sha256;
219 use commonware_parallel::Sequential;
220 use std::cmp::Reverse;
221
222 const MAX_DATA_BYTES: usize = 1 << 31;
223
224 fn general_test<S: Scheme>(
225 name: &str,
226 data: &[u8],
227 min_shards: u16,
228 total_shards: u16,
229 indices: &[u16],
230 ) {
231 // If the indices reference some larger shard, use that as the total.
232 let total_shards = indices
233 .iter()
234 .map(|&x| x + 1)
235 .max()
236 .map_or(total_shards, |x| x.max(total_shards));
237 assert!(min_shards >= 1, "min_shards must be at least 1");
238 assert!(
239 indices.len() >= min_shards as usize,
240 "you need to supply at least {min_shards} indices"
241 );
242 assert!(
243 total_shards >= min_shards,
244 "total_shards ({total_shards}) must be >= min_shards ({min_shards})"
245 );
246
247 let config = Config {
248 minimum_shards: min_shards,
249 extra_shards: total_shards - min_shards,
250 };
251 let read_cfg = CodecConfig {
252 maximum_shard_size: MAX_DATA_BYTES,
253 };
254 let (commitment, shards) = S::encode(&config, data, &Sequential).unwrap();
255 // Pick out the packets we want, in reverse order.
256 let ((_, _, checking_data, my_checked_shard, _), other_packets) = {
257 let mut out = shards
258 .into_iter()
259 .enumerate()
260 .filter_map(|(i, shard)| {
261 let shard = S::Shard::read_cfg(&mut shard.encode(), &read_cfg).unwrap();
262 let i = i as u16;
263 let pos_of_i = indices.iter().position(|&x| x == i)?;
264 let (x0, x1, x2) = S::reshard(&config, &commitment, i, shard).unwrap();
265 Some((pos_of_i, i, x0, x1, x2))
266 })
267 .collect::<Vec<_>>();
268 out.sort_by_key(|&(pos_of_i, _, _, _, _)| Reverse(pos_of_i));
269 let first = out.pop().unwrap();
270 (first, out)
271 };
272 let checked_shards = {
273 let mut others = other_packets
274 .into_iter()
275 .map(|(_, i, _, _, reshard)| {
276 let reshard = S::ReShard::read_cfg(&mut reshard.encode(), &read_cfg).unwrap();
277 S::check(&config, &commitment, &checking_data, i, reshard).unwrap()
278 })
279 .collect::<Vec<_>>();
280 others.push(my_checked_shard);
281 others
282 };
283 let decoded = S::decode(
284 &config,
285 &commitment,
286 checking_data,
287 &checked_shards,
288 &Sequential,
289 )
290 .unwrap();
291 assert_eq!(&decoded, data, "{name} failed");
292 }
293
294 fn test_basic<S: Scheme>() {
295 general_test::<S>("test_basic", b"Hello, Reed-Solomon!", 4, 7, &[0, 1, 2, 3]);
296 }
297
298 fn test_moderate<S: Scheme>() {
299 general_test::<S>(
300 "test_moderate",
301 b"Testing with more pieces than minimum",
302 4,
303 10,
304 &[0, 1, 2, 8, 9],
305 );
306 }
307
308 fn test_odd_shard_len<S: Scheme>() {
309 general_test::<S>("test_odd_shard_len", b"?", 2, 3, &[0, 1]);
310 }
311
312 fn test_recovery<S: Scheme>() {
313 general_test::<S>(
314 "test_recovery",
315 b"Testing recovery pieces",
316 3,
317 8,
318 &[5, 6, 7],
319 );
320 }
321
322 fn test_empty_data<S: Scheme>() {
323 general_test::<S>(
324 "test_empty_data",
325 b"",
326 30,
327 100,
328 (0..30u16).collect::<Vec<_>>().as_slice(),
329 );
330 }
331
332 fn test_large_data<S: Scheme>() {
333 general_test::<S>(
334 "test_large_data",
335 vec![42u8; 1000].as_slice(),
336 3,
337 4,
338 &[0, 1, 2],
339 );
340 }
341
342 fn test_no_data_two_shards<S: Scheme>() {
343 general_test::<S>("test_no_data_one_shard", b"", 1, 2, &[0]);
344 }
345
346 // This exercises an edge case in ZODA, but is also useful for other schemes.
347 fn test_2_pow_16_25_total_shards<S: Scheme>() {
348 general_test::<S>(
349 "test_2_pow_16_25_total_shards",
350 vec![0x67; 1 << 16].as_slice(),
351 8,
352 25,
353 &(0..8).collect::<Vec<_>>(),
354 );
355 }
356
357 fn test_suite<S: Scheme>() {
358 test_basic::<S>();
359 test_moderate::<S>();
360 test_odd_shard_len::<S>();
361 test_recovery::<S>();
362 test_empty_data::<S>();
363 test_large_data::<S>();
364 test_no_data_two_shards::<S>();
365 test_2_pow_16_25_total_shards::<S>();
366 }
367
368 #[test]
369 fn test_suite_reed_solomon() {
370 test_suite::<ReedSolomon<Sha256>>();
371 }
372
373 #[test]
374 fn test_suite_no_coding() {
375 test_suite::<NoCoding<Sha256>>();
376 }
377
378 #[test]
379 fn test_suite_zoda() {
380 test_suite::<Zoda<Sha256>>();
381 }
382
383 #[cfg(feature = "arbitrary")]
384 mod conformance {
385 use super::*;
386 use commonware_codec::conformance::CodecConformance;
387
388 commonware_conformance::conformance_tests! {
389 CodecConformance<Config>,
390 }
391 }
392}