commonware_coding/
lib.rs

1//! Encode data to enable recovery from a subset of fragments.
2//!
3//! # Status
4//!
5//! `commonware-coding` is **ALPHA** software and is not yet recommended for production use. Developers should
6//! expect breaking changes and occasional instability.
7
8#![doc(
9    html_logo_url = "https://commonware.xyz/imgs/rustdoc_logo.svg",
10    html_favicon_url = "https://commonware.xyz/favicon.ico"
11)]
12
13use bytes::Buf;
14use commonware_codec::{Codec, FixedSize, Read, Write};
15use std::fmt::Debug;
16
17mod reed_solomon;
18use commonware_cryptography::Digest;
19pub use reed_solomon::{Error as ReedSolomonError, ReedSolomon};
20
21mod no_coding;
22pub use no_coding::{Error as NoCodingError, NoCoding};
23
24mod zoda;
25pub use zoda::{Error as ZodaError, Zoda};
26
27/// Configuration common to all encoding schemes.
28#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
29#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
30pub struct Config {
31    /// The minimum number of shards needed to encode the data.
32    pub minimum_shards: u16,
33    /// Extra shards beyond the minimum number.
34    ///
35    /// Alternatively, one can think of the configuration as having a total number
36    /// `N = extra_shards + minimum_shards`, but by specifying the `extra_shards`
37    /// rather than `N`, we avoid needing to check that `minimum_shards <= N`.
38    pub extra_shards: u16,
39}
40
41impl Config {
42    /// Returns the total number of shards produced by this configuration.
43    pub fn total_shards(&self) -> u32 {
44        u32::from(self.minimum_shards) + u32::from(self.extra_shards)
45    }
46}
47
48impl FixedSize for Config {
49    const SIZE: usize = 2 * <u16 as FixedSize>::SIZE;
50}
51
52impl Write for Config {
53    fn write(&self, buf: &mut impl bytes::BufMut) {
54        self.minimum_shards.write(buf);
55        self.extra_shards.write(buf);
56    }
57}
58
59impl Read for Config {
60    type Cfg = ();
61
62    fn read_cfg(buf: &mut impl Buf, cfg: &Self::Cfg) -> Result<Self, commonware_codec::Error> {
63        Ok(Self {
64            minimum_shards: u16::read_cfg(buf, cfg)?,
65            extra_shards: u16::read_cfg(buf, cfg)?,
66        })
67    }
68}
69
70/// The configuration for decoding shard data.
71#[derive(Clone, Debug)]
72pub struct CodecConfig {
73    /// The maximum number of bytes a shard is expected to contain.
74    ///
75    /// This can be an upper bound, and only constrains the non-fixed-size portion
76    /// of shard data.
77    pub maximum_shard_size: usize,
78}
79
80/// A scheme for encoding data into pieces, and recovering the data from those pieces.
81///
82/// # Example
83/// ```
84/// use commonware_coding::{Config, ReedSolomon, Scheme as _};
85/// use commonware_cryptography::Sha256;
86///
87/// const CONCURRENCY: usize = 1;
88///
89/// type RS = ReedSolomon<Sha256>;
90///
91/// let config = Config { minimum_shards: 2, extra_shards: 1 };
92/// let data = b"Hello!";
93/// // Turn the data into shards, and a commitment to those shards.
94/// let (commitment, shards) =
95///      RS::encode(&config, data.as_slice(), CONCURRENCY).unwrap();
96///
97/// // Each person produces reshards, their own checked shard, and checking data
98/// // to check other peoples reshards.
99/// let (mut checking_data_w_shard, reshards): (Vec<_>, Vec<_>) = shards
100///         .into_iter()
101///         .enumerate()
102///         .map(|(i, shard)| {
103///             let (checking_data, checked_shard, reshard) = RS::reshard(&config, &commitment, i as u16, shard).unwrap();
104///             ((checking_data, checked_shard), reshard)
105///         })
106///         .collect();
107/// // Let's pretend that the last item is "ours"
108/// let (checking_data, checked_shard) = checking_data_w_shard.pop().unwrap();
109/// // We can use this checking_data to check the other shards.
110/// let mut checked_shards = Vec::new();
111/// checked_shards.push(checked_shard);
112/// for (i, reshard) in reshards.into_iter().enumerate().skip(1) {
113///   checked_shards.push(RS::check(&config, &commitment, &checking_data, i as u16, reshard).unwrap())
114/// }
115///
116/// let data2 = RS::decode(&config, &commitment, checking_data, &checked_shards[..2], CONCURRENCY).unwrap();
117/// assert_eq!(&data[..], &data2[..]);
118///
119/// // Decoding works with different shards, with a guarantee to get the same result.
120/// let data3 = RS::decode(&config, &commitment, checking_data, &checked_shards[1..], CONCURRENCY).unwrap();
121/// assert_eq!(&data[..], &data3[..]);
122/// ```
123pub trait Scheme: Debug + Clone + Send + Sync + 'static {
124    /// A commitment attesting to the shards of data.
125    type Commitment: Digest;
126    /// A shard of data, to be received by a participant.
127    type Shard: Clone + Eq + Codec<Cfg = CodecConfig> + Send + Sync + 'static;
128    /// A shard shared with other participants, to aid them in reconstruction.
129    ///
130    /// In most cases, this will be the same as `Shard`, but some schemes might
131    /// have extra information in `Shard` that may not be necessary to reconstruct
132    /// the data.
133    type ReShard: Clone + Eq + Codec<Cfg = CodecConfig> + Send + Sync + 'static;
134    /// Data which can assist in checking shards.
135    type CheckingData: Clone + Send;
136    /// A shard that has been checked for inclusion in the commitment.
137    ///
138    /// This allows excluding [Scheme::ReShard]s which are invalid, and shouldn't
139    /// be considered as progress towards meeting the minimum number of shards.
140    type CheckedShard;
141    type Error: std::fmt::Debug;
142
143    /// Encode a piece of data, returning a commitment, along with shards, and proofs.
144    ///
145    /// Each shard and proof is intended for exactly one participant. The number of shards returned
146    /// should equal `config.minimum_shards + config.extra_shards`.
147    #[allow(clippy::type_complexity)]
148    fn encode(
149        config: &Config,
150        data: impl Buf,
151        concurrency: usize,
152    ) -> Result<(Self::Commitment, Vec<Self::Shard>), Self::Error>;
153
154    /// Take your own shard, check it, and produce a [Scheme::ReShard] to forward to others.
155    ///
156    /// This takes in an index, which is the index you expect the shard to be.
157    ///
158    /// This will produce a [Scheme::CheckedShard] which counts towards the minimum
159    /// number of shards you need to reconstruct the data, in [Scheme::decode].
160    ///
161    /// You also get [Scheme::CheckingData], which has information you can use to check
162    /// the shards you receive from others.
163    #[allow(clippy::type_complexity)]
164    fn reshard(
165        config: &Config,
166        commitment: &Self::Commitment,
167        index: u16,
168        shard: Self::Shard,
169    ) -> Result<(Self::CheckingData, Self::CheckedShard, Self::ReShard), Self::Error>;
170
171    /// Check the integrity of a reshard, producing a checked shard.
172    ///
173    /// This requires the [Scheme::CheckingData] produced by [Scheme::reshard].
174    ///
175    /// This takes in an index, to make sure that the reshard you're checking
176    /// is associated with the participant you expect it to be.
177    fn check(
178        config: &Config,
179        commitment: &Self::Commitment,
180        checking_data: &Self::CheckingData,
181        index: u16,
182        reshard: Self::ReShard,
183    ) -> Result<Self::CheckedShard, Self::Error>;
184
185    /// Decode the data from shards received from other participants.
186    ///
187    /// The data must be decodeable with as few as `config.minimum_shards`,
188    /// including your own shard.
189    ///
190    /// Calls to this function with the same commitment, but with different shards,
191    /// or shards in a different should also result in the same output data, or in failure.
192    /// In other words, when using the decoding function in a broader system, you
193    /// get a guarantee that every participant decoding will see the same final
194    /// data, even if they receive different shards, or receive them in a different order.
195    fn decode(
196        config: &Config,
197        commitment: &Self::Commitment,
198        checking_data: Self::CheckingData,
199        shards: &[Self::CheckedShard],
200        concurrency: usize,
201    ) -> Result<Vec<u8>, Self::Error>;
202}
203
204/// A marker trait indicating that [Scheme::check] proves validity of the encoding.
205///
206/// In more detail, this means that upon a successful call to [Scheme::check],
207/// guarantees that the shard results from a valid encoding of the data, and thus,
208/// if other participants also call check, then the data is guaranteed to be reconstructable.
209pub trait ValidatingScheme: Scheme {}
210
211#[cfg(test)]
212mod test {
213    use super::*;
214    use crate::reed_solomon::ReedSolomon;
215    use commonware_codec::Encode;
216    use commonware_cryptography::Sha256;
217    use std::cmp::Reverse;
218
219    const CONCURRENCY: usize = 1;
220    const MAX_DATA_BYTES: usize = 1 << 31;
221
222    fn general_test<S: Scheme>(
223        name: &str,
224        data: &[u8],
225        min_shards: u16,
226        total_shards: u16,
227        indices: &[u16],
228    ) {
229        // If the indices reference some larger shard, use that as the total.
230        let total_shards = indices
231            .iter()
232            .map(|&x| x + 1)
233            .max()
234            .map_or(total_shards, |x| x.max(total_shards));
235        assert!(min_shards >= 1, "min_shards must be at least 1");
236        assert!(
237            indices.len() >= min_shards as usize,
238            "you need to supply at least {min_shards} indices"
239        );
240        assert!(
241            total_shards >= min_shards,
242            "total_shards ({total_shards}) must be >= min_shards ({min_shards})"
243        );
244
245        let config = Config {
246            minimum_shards: min_shards,
247            extra_shards: total_shards - min_shards,
248        };
249        let read_cfg = CodecConfig {
250            maximum_shard_size: MAX_DATA_BYTES,
251        };
252        let (commitment, shards) = S::encode(&config, data, CONCURRENCY).unwrap();
253        // Pick out the packets we want, in reverse order.
254        let ((_, _, checking_data, my_checked_shard, _), other_packets) = {
255            let mut out = shards
256                .into_iter()
257                .enumerate()
258                .filter_map(|(i, shard)| {
259                    let shard = S::Shard::read_cfg(&mut shard.encode(), &read_cfg).unwrap();
260                    let i = i as u16;
261                    let pos_of_i = indices.iter().position(|&x| x == i)?;
262                    let (x0, x1, x2) = S::reshard(&config, &commitment, i, shard).unwrap();
263                    Some((pos_of_i, i, x0, x1, x2))
264                })
265                .collect::<Vec<_>>();
266            out.sort_by_key(|&(pos_of_i, _, _, _, _)| Reverse(pos_of_i));
267            let first = out.pop().unwrap();
268            (first, out)
269        };
270        let checked_shards = {
271            let mut others = other_packets
272                .into_iter()
273                .map(|(_, i, _, _, reshard)| {
274                    let reshard = S::ReShard::read_cfg(&mut reshard.encode(), &read_cfg).unwrap();
275                    S::check(&config, &commitment, &checking_data, i, reshard).unwrap()
276                })
277                .collect::<Vec<_>>();
278            others.push(my_checked_shard);
279            others
280        };
281        let decoded = S::decode(
282            &config,
283            &commitment,
284            checking_data,
285            &checked_shards,
286            CONCURRENCY,
287        )
288        .unwrap();
289        assert_eq!(&decoded, data, "{name} failed");
290    }
291
292    fn test_basic<S: Scheme>() {
293        general_test::<S>("test_basic", b"Hello, Reed-Solomon!", 4, 7, &[0, 1, 2, 3]);
294    }
295
296    fn test_moderate<S: Scheme>() {
297        general_test::<S>(
298            "test_moderate",
299            b"Testing with more pieces than minimum",
300            4,
301            10,
302            &[0, 1, 2, 8, 9],
303        );
304    }
305
306    fn test_odd_shard_len<S: Scheme>() {
307        general_test::<S>("test_odd_shard_len", b"?", 2, 3, &[0, 1]);
308    }
309
310    fn test_recovery<S: Scheme>() {
311        general_test::<S>(
312            "test_recovery",
313            b"Testing recovery pieces",
314            3,
315            8,
316            &[5, 6, 7],
317        );
318    }
319
320    fn test_empty_data<S: Scheme>() {
321        general_test::<S>(
322            "test_empty_data",
323            b"",
324            30,
325            100,
326            (0..30u16).collect::<Vec<_>>().as_slice(),
327        );
328    }
329
330    fn test_large_data<S: Scheme>() {
331        general_test::<S>(
332            "test_large_data",
333            vec![42u8; 1000].as_slice(),
334            3,
335            4,
336            &[0, 1, 2],
337        );
338    }
339
340    fn test_no_data_two_shards<S: Scheme>() {
341        general_test::<S>("test_no_data_one_shard", b"", 1, 2, &[0]);
342    }
343
344    // This exercises an edge case in ZODA, but is also useful for other schemes.
345    fn test_2_pow_16_25_total_shards<S: Scheme>() {
346        general_test::<S>(
347            "test_2_pow_16_25_total_shards",
348            vec![0x67; 1 << 16].as_slice(),
349            8,
350            25,
351            &(0..8).collect::<Vec<_>>(),
352        );
353    }
354
355    fn test_suite<S: Scheme>() {
356        test_basic::<S>();
357        test_moderate::<S>();
358        test_odd_shard_len::<S>();
359        test_recovery::<S>();
360        test_empty_data::<S>();
361        test_large_data::<S>();
362        test_no_data_two_shards::<S>();
363        test_2_pow_16_25_total_shards::<S>();
364    }
365
366    #[test]
367    fn test_suite_reed_solomon() {
368        test_suite::<ReedSolomon<Sha256>>();
369    }
370
371    #[test]
372    fn test_suite_no_coding() {
373        test_suite::<NoCoding<Sha256>>();
374    }
375
376    #[test]
377    fn test_suite_zoda() {
378        test_suite::<Zoda<Sha256>>();
379    }
380
381    #[cfg(feature = "arbitrary")]
382    mod conformance {
383        use super::*;
384        use commonware_codec::conformance::CodecConformance;
385
386        commonware_conformance::conformance_tests! {
387            CodecConformance<Config>,
388        }
389    }
390}