commonware_coding/lib.rs
1//! Encode data to enable recovery from a subset of fragments.
2//!
3//! # Status
4//!
5//! `commonware-coding` is **ALPHA** software and is not yet recommended for production use. Developers should
6//! expect breaking changes and occasional instability.
7
8#![doc(
9 html_logo_url = "https://commonware.xyz/imgs/rustdoc_logo.svg",
10 html_favicon_url = "https://commonware.xyz/favicon.ico"
11)]
12
13use bytes::Buf;
14use commonware_codec::{Codec, FixedSize, Read, Write};
15use std::fmt::Debug;
16
17mod reed_solomon;
18use commonware_cryptography::Digest;
19pub use reed_solomon::{Error as ReedSolomonError, ReedSolomon};
20
21mod no_coding;
22pub use no_coding::{Error as NoCodingError, NoCoding};
23
24mod zoda;
25pub use zoda::{Error as ZodaError, Zoda};
26
27/// Configuration common to all encoding schemes.
28#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
29#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
30pub struct Config {
31 /// The minimum number of shards needed to encode the data.
32 pub minimum_shards: u16,
33 /// Extra shards beyond the minimum number.
34 ///
35 /// Alternatively, one can think of the configuration as having a total number
36 /// `N = extra_shards + minimum_shards`, but by specifying the `extra_shards`
37 /// rather than `N`, we avoid needing to check that `minimum_shards <= N`.
38 pub extra_shards: u16,
39}
40
41impl Config {
42 /// Returns the total number of shards produced by this configuration.
43 pub fn total_shards(&self) -> u32 {
44 u32::from(self.minimum_shards) + u32::from(self.extra_shards)
45 }
46}
47
48impl FixedSize for Config {
49 const SIZE: usize = 2 * <u16 as FixedSize>::SIZE;
50}
51
52impl Write for Config {
53 fn write(&self, buf: &mut impl bytes::BufMut) {
54 self.minimum_shards.write(buf);
55 self.extra_shards.write(buf);
56 }
57}
58
59impl Read for Config {
60 type Cfg = ();
61
62 fn read_cfg(buf: &mut impl Buf, cfg: &Self::Cfg) -> Result<Self, commonware_codec::Error> {
63 Ok(Self {
64 minimum_shards: u16::read_cfg(buf, cfg)?,
65 extra_shards: u16::read_cfg(buf, cfg)?,
66 })
67 }
68}
69
70/// The configuration for decoding shard data.
71#[derive(Clone, Debug)]
72pub struct CodecConfig {
73 /// The maximum number of bytes a shard is expected to contain.
74 ///
75 /// This can be an upper bound, and only constrains the non-fixed-size portion
76 /// of shard data.
77 pub maximum_shard_size: usize,
78}
79
80/// A scheme for encoding data into pieces, and recovering the data from those pieces.
81///
82/// # Example
83/// ```
84/// use commonware_coding::{Config, ReedSolomon, Scheme as _};
85/// use commonware_cryptography::Sha256;
86///
87/// const CONCURRENCY: usize = 1;
88///
89/// type RS = ReedSolomon<Sha256>;
90///
91/// let config = Config { minimum_shards: 2, extra_shards: 1 };
92/// let data = b"Hello!";
93/// // Turn the data into shards, and a commitment to those shards.
94/// let (commitment, shards) =
95/// RS::encode(&config, data.as_slice(), CONCURRENCY).unwrap();
96///
97/// // Each person produces reshards, their own checked shard, and checking data
98/// // to check other peoples reshards.
99/// let (mut checking_data_w_shard, reshards): (Vec<_>, Vec<_>) = shards
100/// .into_iter()
101/// .enumerate()
102/// .map(|(i, shard)| {
103/// let (checking_data, checked_shard, reshard) = RS::reshard(&config, &commitment, i as u16, shard).unwrap();
104/// ((checking_data, checked_shard), reshard)
105/// })
106/// .collect();
107/// // Let's pretend that the last item is "ours"
108/// let (checking_data, checked_shard) = checking_data_w_shard.pop().unwrap();
109/// // We can use this checking_data to check the other shards.
110/// let mut checked_shards = Vec::new();
111/// checked_shards.push(checked_shard);
112/// for (i, reshard) in reshards.into_iter().enumerate().skip(1) {
113/// checked_shards.push(RS::check(&config, &commitment, &checking_data, i as u16, reshard).unwrap())
114/// }
115///
116/// let data2 = RS::decode(&config, &commitment, checking_data, &checked_shards[..2], CONCURRENCY).unwrap();
117/// assert_eq!(&data[..], &data2[..]);
118///
119/// // Decoding works with different shards, with a guarantee to get the same result.
120/// let data3 = RS::decode(&config, &commitment, checking_data, &checked_shards[1..], CONCURRENCY).unwrap();
121/// assert_eq!(&data[..], &data3[..]);
122/// ```
123pub trait Scheme: Debug + Clone + Send + Sync + 'static {
124 /// A commitment attesting to the shards of data.
125 type Commitment: Digest;
126 /// A shard of data, to be received by a participant.
127 type Shard: Clone + Eq + Codec<Cfg = CodecConfig> + Send + Sync + 'static;
128 /// A shard shared with other participants, to aid them in reconstruction.
129 ///
130 /// In most cases, this will be the same as `Shard`, but some schemes might
131 /// have extra information in `Shard` that may not be necessary to reconstruct
132 /// the data.
133 type ReShard: Clone + Eq + Codec<Cfg = CodecConfig> + Send + Sync + 'static;
134 /// Data which can assist in checking shards.
135 type CheckingData: Clone + Send;
136 /// A shard that has been checked for inclusion in the commitment.
137 ///
138 /// This allows excluding [Scheme::ReShard]s which are invalid, and shouldn't
139 /// be considered as progress towards meeting the minimum number of shards.
140 type CheckedShard;
141 type Error: std::fmt::Debug;
142
143 /// Encode a piece of data, returning a commitment, along with shards, and proofs.
144 ///
145 /// Each shard and proof is intended for exactly one participant. The number of shards returned
146 /// should equal `config.minimum_shards + config.extra_shards`.
147 #[allow(clippy::type_complexity)]
148 fn encode(
149 config: &Config,
150 data: impl Buf,
151 concurrency: usize,
152 ) -> Result<(Self::Commitment, Vec<Self::Shard>), Self::Error>;
153
154 /// Take your own shard, check it, and produce a [Scheme::ReShard] to forward to others.
155 ///
156 /// This takes in an index, which is the index you expect the shard to be.
157 ///
158 /// This will produce a [Scheme::CheckedShard] which counts towards the minimum
159 /// number of shards you need to reconstruct the data, in [Scheme::decode].
160 ///
161 /// You also get [Scheme::CheckingData], which has information you can use to check
162 /// the shards you receive from others.
163 #[allow(clippy::type_complexity)]
164 fn reshard(
165 config: &Config,
166 commitment: &Self::Commitment,
167 index: u16,
168 shard: Self::Shard,
169 ) -> Result<(Self::CheckingData, Self::CheckedShard, Self::ReShard), Self::Error>;
170
171 /// Check the integrity of a reshard, producing a checked shard.
172 ///
173 /// This requires the [Scheme::CheckingData] produced by [Scheme::reshard].
174 ///
175 /// This takes in an index, to make sure that the reshard you're checking
176 /// is associated with the participant you expect it to be.
177 fn check(
178 config: &Config,
179 commitment: &Self::Commitment,
180 checking_data: &Self::CheckingData,
181 index: u16,
182 reshard: Self::ReShard,
183 ) -> Result<Self::CheckedShard, Self::Error>;
184
185 /// Decode the data from shards received from other participants.
186 ///
187 /// The data must be decodeable with as few as `config.minimum_shards`,
188 /// including your own shard.
189 ///
190 /// Calls to this function with the same commitment, but with different shards,
191 /// or shards in a different should also result in the same output data, or in failure.
192 /// In other words, when using the decoding function in a broader system, you
193 /// get a guarantee that every participant decoding will see the same final
194 /// data, even if they receive different shards, or receive them in a different order.
195 fn decode(
196 config: &Config,
197 commitment: &Self::Commitment,
198 checking_data: Self::CheckingData,
199 shards: &[Self::CheckedShard],
200 concurrency: usize,
201 ) -> Result<Vec<u8>, Self::Error>;
202}
203
204/// A marker trait indicating that [Scheme::check] proves validity of the encoding.
205///
206/// In more detail, this means that upon a successful call to [Scheme::check],
207/// guarantees that the shard results from a valid encoding of the data, and thus,
208/// if other participants also call check, then the data is guaranteed to be reconstructable.
209pub trait ValidatingScheme: Scheme {}
210
211#[cfg(test)]
212mod test {
213 use super::*;
214 use crate::reed_solomon::ReedSolomon;
215 use commonware_codec::Encode;
216 use commonware_cryptography::Sha256;
217 use std::cmp::Reverse;
218
219 const CONCURRENCY: usize = 1;
220 const MAX_DATA_BYTES: usize = 1 << 31;
221
222 fn general_test<S: Scheme>(
223 name: &str,
224 data: &[u8],
225 min_shards: u16,
226 total_shards: u16,
227 indices: &[u16],
228 ) {
229 // If the indices reference some larger shard, use that as the total.
230 let total_shards = indices
231 .iter()
232 .map(|&x| x + 1)
233 .max()
234 .map_or(total_shards, |x| x.max(total_shards));
235 assert!(min_shards >= 1, "min_shards must be at least 1");
236 assert!(
237 indices.len() >= min_shards as usize,
238 "you need to supply at least {min_shards} indices"
239 );
240 assert!(
241 total_shards >= min_shards,
242 "total_shards ({total_shards}) must be >= min_shards ({min_shards})"
243 );
244
245 let config = Config {
246 minimum_shards: min_shards,
247 extra_shards: total_shards - min_shards,
248 };
249 let read_cfg = CodecConfig {
250 maximum_shard_size: MAX_DATA_BYTES,
251 };
252 let (commitment, shards) = S::encode(&config, data, CONCURRENCY).unwrap();
253 // Pick out the packets we want, in reverse order.
254 let ((_, _, checking_data, my_checked_shard, _), other_packets) = {
255 let mut out = shards
256 .into_iter()
257 .enumerate()
258 .filter_map(|(i, shard)| {
259 let shard = S::Shard::read_cfg(&mut shard.encode(), &read_cfg).unwrap();
260 let i = i as u16;
261 let pos_of_i = indices.iter().position(|&x| x == i)?;
262 let (x0, x1, x2) = S::reshard(&config, &commitment, i, shard).unwrap();
263 Some((pos_of_i, i, x0, x1, x2))
264 })
265 .collect::<Vec<_>>();
266 out.sort_by_key(|&(pos_of_i, _, _, _, _)| Reverse(pos_of_i));
267 let first = out.pop().unwrap();
268 (first, out)
269 };
270 let checked_shards = {
271 let mut others = other_packets
272 .into_iter()
273 .map(|(_, i, _, _, reshard)| {
274 let reshard = S::ReShard::read_cfg(&mut reshard.encode(), &read_cfg).unwrap();
275 S::check(&config, &commitment, &checking_data, i, reshard).unwrap()
276 })
277 .collect::<Vec<_>>();
278 others.push(my_checked_shard);
279 others
280 };
281 let decoded = S::decode(
282 &config,
283 &commitment,
284 checking_data,
285 &checked_shards,
286 CONCURRENCY,
287 )
288 .unwrap();
289 assert_eq!(&decoded, data, "{name} failed");
290 }
291
292 fn test_basic<S: Scheme>() {
293 general_test::<S>("test_basic", b"Hello, Reed-Solomon!", 4, 7, &[0, 1, 2, 3]);
294 }
295
296 fn test_moderate<S: Scheme>() {
297 general_test::<S>(
298 "test_moderate",
299 b"Testing with more pieces than minimum",
300 4,
301 10,
302 &[0, 1, 2, 8, 9],
303 );
304 }
305
306 fn test_odd_shard_len<S: Scheme>() {
307 general_test::<S>("test_odd_shard_len", b"?", 2, 3, &[0, 1]);
308 }
309
310 fn test_recovery<S: Scheme>() {
311 general_test::<S>(
312 "test_recovery",
313 b"Testing recovery pieces",
314 3,
315 8,
316 &[5, 6, 7],
317 );
318 }
319
320 fn test_empty_data<S: Scheme>() {
321 general_test::<S>(
322 "test_empty_data",
323 b"",
324 30,
325 100,
326 (0..30u16).collect::<Vec<_>>().as_slice(),
327 );
328 }
329
330 fn test_large_data<S: Scheme>() {
331 general_test::<S>(
332 "test_large_data",
333 vec![42u8; 1000].as_slice(),
334 3,
335 4,
336 &[0, 1, 2],
337 );
338 }
339
340 fn test_no_data_two_shards<S: Scheme>() {
341 general_test::<S>("test_no_data_one_shard", b"", 1, 2, &[0]);
342 }
343
344 // This exercises an edge case in ZODA, but is also useful for other schemes.
345 fn test_2_pow_16_25_total_shards<S: Scheme>() {
346 general_test::<S>(
347 "test_2_pow_16_25_total_shards",
348 vec![0x67; 1 << 16].as_slice(),
349 8,
350 25,
351 &(0..8).collect::<Vec<_>>(),
352 );
353 }
354
355 fn test_suite<S: Scheme>() {
356 test_basic::<S>();
357 test_moderate::<S>();
358 test_odd_shard_len::<S>();
359 test_recovery::<S>();
360 test_empty_data::<S>();
361 test_large_data::<S>();
362 test_no_data_two_shards::<S>();
363 test_2_pow_16_25_total_shards::<S>();
364 }
365
366 #[test]
367 fn test_suite_reed_solomon() {
368 test_suite::<ReedSolomon<Sha256>>();
369 }
370
371 #[test]
372 fn test_suite_no_coding() {
373 test_suite::<NoCoding<Sha256>>();
374 }
375
376 #[test]
377 fn test_suite_zoda() {
378 test_suite::<Zoda<Sha256>>();
379 }
380
381 #[cfg(feature = "arbitrary")]
382 mod conformance {
383 use super::*;
384 use commonware_codec::conformance::CodecConformance;
385
386 commonware_conformance::conformance_tests! {
387 CodecConformance<Config>,
388 }
389 }
390}