Skip to main content

commonware_coding/
lib.rs

1//! Encode data to enable recovery from a subset of fragments.
2//!
3//! # Status
4//!
5//! Stability varies by primitive. See [README](https://github.com/commonwarexyz/monorepo#stability) for details.
6
7#![doc(
8    html_logo_url = "https://commonware.xyz/imgs/rustdoc_logo.svg",
9    html_favicon_url = "https://commonware.xyz/favicon.ico"
10)]
11
12commonware_macros::stability_scope!(ALPHA {
13    use bytes::Buf;
14    use commonware_codec::{Codec, FixedSize, Read, Write};
15    use commonware_cryptography::Digest;
16    use commonware_parallel::Strategy;
17    use std::fmt::Debug;
18
19    mod no_coding;
20    pub use no_coding::{Error as NoCodingError, NoCoding};
21
22    mod reed_solomon;
23    pub use reed_solomon::{Error as ReedSolomonError, ReedSolomon};
24
25    mod zoda;
26    pub use zoda::{Error as ZodaError, Zoda};
27
28    /// Configuration common to all encoding schemes.
29    #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
30    #[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
31    pub struct Config {
32        /// The minimum number of shards needed to encode the data.
33        pub minimum_shards: u16,
34        /// Extra shards beyond the minimum number.
35        ///
36        /// Alternatively, one can think of the configuration as having a total number
37        /// `N = extra_shards + minimum_shards`, but by specifying the `extra_shards`
38        /// rather than `N`, we avoid needing to check that `minimum_shards <= N`.
39        pub extra_shards: u16,
40    }
41
42    impl Config {
43        /// Returns the total number of shards produced by this configuration.
44        pub fn total_shards(&self) -> u32 {
45            u32::from(self.minimum_shards) + u32::from(self.extra_shards)
46        }
47    }
48
49    impl FixedSize for Config {
50        const SIZE: usize = 2 * <u16 as FixedSize>::SIZE;
51    }
52
53    impl Write for Config {
54        fn write(&self, buf: &mut impl bytes::BufMut) {
55            self.minimum_shards.write(buf);
56            self.extra_shards.write(buf);
57        }
58    }
59
60    impl Read for Config {
61        type Cfg = ();
62
63        fn read_cfg(buf: &mut impl Buf, cfg: &Self::Cfg) -> Result<Self, commonware_codec::Error> {
64            Ok(Self {
65                minimum_shards: u16::read_cfg(buf, cfg)?,
66                extra_shards: u16::read_cfg(buf, cfg)?,
67            })
68        }
69    }
70
71    /// The configuration for decoding shard data.
72    #[derive(Clone, Debug)]
73    pub struct CodecConfig {
74        /// The maximum number of bytes a shard is expected to contain.
75        ///
76        /// This can be an upper bound, and only constrains the non-fixed-size portion
77        /// of shard data.
78        pub maximum_shard_size: usize,
79    }
80
81    /// A scheme for encoding data into pieces, and recovering the data from those pieces.
82    ///
83    /// # Example
84    /// ```
85    /// use commonware_coding::{Config, ReedSolomon, Scheme as _};
86    /// use commonware_cryptography::Sha256;
87    /// use commonware_parallel::Sequential;
88    ///
89    /// const STRATEGY: Sequential = Sequential;
90    ///
91    /// type RS = ReedSolomon<Sha256>;
92    ///
93    /// let config = Config { minimum_shards: 2, extra_shards: 1 };
94    /// let data = b"Hello!";
95    /// // Turn the data into shards, and a commitment to those shards.
96    /// let (commitment, shards) =
97    ///      RS::encode(&config, data.as_slice(), &STRATEGY).unwrap();
98    ///
99    /// // Each person produces reshards, their own checked shard, and checking data
100    /// // to check other peoples reshards.
101    /// let (mut checking_data_w_shard, reshards): (Vec<_>, Vec<_>) = shards
102    ///         .into_iter()
103    ///         .enumerate()
104    ///         .map(|(i, shard)| {
105    ///             let (checking_data, checked_shard, reshard) = RS::reshard(&config, &commitment, i as u16, shard).unwrap();
106    ///             ((checking_data, checked_shard), reshard)
107    ///         })
108    ///         .collect();
109    /// // Let's pretend that the last item is "ours"
110    /// let (checking_data, checked_shard) = checking_data_w_shard.pop().unwrap();
111    /// // We can use this checking_data to check the other shards.
112    /// let mut checked_shards = Vec::new();
113    /// checked_shards.push(checked_shard);
114    /// for (i, reshard) in reshards.into_iter().enumerate().skip(1) {
115    ///   checked_shards.push(RS::check(&config, &commitment, &checking_data, i as u16, reshard).unwrap())
116    /// }
117    ///
118    /// let data2 = RS::decode(&config, &commitment, checking_data, &checked_shards[..2], &STRATEGY).unwrap();
119    /// assert_eq!(&data[..], &data2[..]);
120    ///
121    /// // Decoding works with different shards, with a guarantee to get the same result.
122    /// let data3 = RS::decode(&config, &commitment, checking_data, &checked_shards[1..], &STRATEGY).unwrap();
123    /// assert_eq!(&data[..], &data3[..]);
124    /// ```
125    pub trait Scheme: Debug + Clone + Send + Sync + 'static {
126        /// A commitment attesting to the shards of data.
127        type Commitment: Digest;
128        /// A shard of data, to be received by a participant.
129        type Shard: Clone + Eq + Codec<Cfg = CodecConfig> + Send + Sync + 'static;
130        /// A shard shared with other participants, to aid them in reconstruction.
131        ///
132        /// In most cases, this will be the same as `Shard`, but some schemes might
133        /// have extra information in `Shard` that may not be necessary to reconstruct
134        /// the data.
135        type ReShard: Clone + Eq + Codec<Cfg = CodecConfig> + Send + Sync + 'static;
136        /// Data which can assist in checking shards.
137        type CheckingData: Clone + Send;
138        /// A shard that has been checked for inclusion in the commitment.
139        ///
140        /// This allows excluding [Scheme::ReShard]s which are invalid, and shouldn't
141        /// be considered as progress towards meeting the minimum number of shards.
142        type CheckedShard;
143        type Error: std::fmt::Debug;
144
145        /// Encode a piece of data, returning a commitment, along with shards, and proofs.
146        ///
147        /// Each shard and proof is intended for exactly one participant. The number of shards returned
148        /// should equal `config.minimum_shards + config.extra_shards`.
149        #[allow(clippy::type_complexity)]
150        fn encode(
151            config: &Config,
152            data: impl Buf,
153            strategy: &impl Strategy,
154        ) -> Result<(Self::Commitment, Vec<Self::Shard>), Self::Error>;
155
156        /// Take your own shard, check it, and produce a [Scheme::ReShard] to forward to others.
157        ///
158        /// This takes in an index, which is the index you expect the shard to be.
159        ///
160        /// This will produce a [Scheme::CheckedShard] which counts towards the minimum
161        /// number of shards you need to reconstruct the data, in [Scheme::decode].
162        ///
163        /// You also get [Scheme::CheckingData], which has information you can use to check
164        /// the shards you receive from others.
165        #[allow(clippy::type_complexity)]
166        fn reshard(
167            config: &Config,
168            commitment: &Self::Commitment,
169            index: u16,
170            shard: Self::Shard,
171        ) -> Result<(Self::CheckingData, Self::CheckedShard, Self::ReShard), Self::Error>;
172
173        /// Check the integrity of a reshard, producing a checked shard.
174        ///
175        /// This requires the [Scheme::CheckingData] produced by [Scheme::reshard].
176        ///
177        /// This takes in an index, to make sure that the reshard you're checking
178        /// is associated with the participant you expect it to be.
179        fn check(
180            config: &Config,
181            commitment: &Self::Commitment,
182            checking_data: &Self::CheckingData,
183            index: u16,
184            reshard: Self::ReShard,
185        ) -> Result<Self::CheckedShard, Self::Error>;
186
187        /// Decode the data from shards received from other participants.
188        ///
189        /// The data must be decodeable with as few as `config.minimum_shards`,
190        /// including your own shard.
191        ///
192        /// Calls to this function with the same commitment, but with different shards,
193        /// or shards in a different should also result in the same output data, or in failure.
194        /// In other words, when using the decoding function in a broader system, you
195        /// get a guarantee that every participant decoding will see the same final
196        /// data, even if they receive different shards, or receive them in a different order.
197        fn decode(
198            config: &Config,
199            commitment: &Self::Commitment,
200            checking_data: Self::CheckingData,
201            shards: &[Self::CheckedShard],
202            strategy: &impl Strategy,
203        ) -> Result<Vec<u8>, Self::Error>;
204    }
205
206    /// A marker trait indicating that [Scheme::check] proves validity of the encoding.
207    ///
208    /// In more detail, this means that upon a successful call to [Scheme::check],
209    /// guarantees that the shard results from a valid encoding of the data, and thus,
210    /// if other participants also call check, then the data is guaranteed to be reconstructable.
211    pub trait ValidatingScheme: Scheme {}
212});
213
214#[cfg(test)]
215mod test {
216    use super::*;
217    use crate::reed_solomon::ReedSolomon;
218    use commonware_codec::Encode;
219    use commonware_cryptography::Sha256;
220    use commonware_parallel::Sequential;
221    use std::cmp::Reverse;
222
223    const MAX_DATA_BYTES: usize = 1 << 31;
224
225    fn general_test<S: Scheme>(
226        name: &str,
227        data: &[u8],
228        min_shards: u16,
229        total_shards: u16,
230        indices: &[u16],
231    ) {
232        // If the indices reference some larger shard, use that as the total.
233        let total_shards = indices
234            .iter()
235            .map(|&x| x + 1)
236            .max()
237            .map_or(total_shards, |x| x.max(total_shards));
238        assert!(min_shards >= 1, "min_shards must be at least 1");
239        assert!(
240            indices.len() >= min_shards as usize,
241            "you need to supply at least {min_shards} indices"
242        );
243        assert!(
244            total_shards >= min_shards,
245            "total_shards ({total_shards}) must be >= min_shards ({min_shards})"
246        );
247
248        let config = Config {
249            minimum_shards: min_shards,
250            extra_shards: total_shards - min_shards,
251        };
252        let read_cfg = CodecConfig {
253            maximum_shard_size: MAX_DATA_BYTES,
254        };
255        let (commitment, shards) = S::encode(&config, data, &Sequential).unwrap();
256        // Pick out the packets we want, in reverse order.
257        let ((_, _, checking_data, my_checked_shard, _), other_packets) = {
258            let mut out = shards
259                .into_iter()
260                .enumerate()
261                .filter_map(|(i, shard)| {
262                    let shard = S::Shard::read_cfg(&mut shard.encode(), &read_cfg).unwrap();
263                    let i = i as u16;
264                    let pos_of_i = indices.iter().position(|&x| x == i)?;
265                    let (x0, x1, x2) = S::reshard(&config, &commitment, i, shard).unwrap();
266                    Some((pos_of_i, i, x0, x1, x2))
267                })
268                .collect::<Vec<_>>();
269            out.sort_by_key(|&(pos_of_i, _, _, _, _)| Reverse(pos_of_i));
270            let first = out.pop().unwrap();
271            (first, out)
272        };
273        let checked_shards = {
274            let mut others = other_packets
275                .into_iter()
276                .map(|(_, i, _, _, reshard)| {
277                    let reshard = S::ReShard::read_cfg(&mut reshard.encode(), &read_cfg).unwrap();
278                    S::check(&config, &commitment, &checking_data, i, reshard).unwrap()
279                })
280                .collect::<Vec<_>>();
281            others.push(my_checked_shard);
282            others
283        };
284        let decoded = S::decode(
285            &config,
286            &commitment,
287            checking_data,
288            &checked_shards,
289            &Sequential,
290        )
291        .unwrap();
292        assert_eq!(&decoded, data, "{name} failed");
293    }
294
295    fn test_basic<S: Scheme>() {
296        general_test::<S>("test_basic", b"Hello, Reed-Solomon!", 4, 7, &[0, 1, 2, 3]);
297    }
298
299    fn test_moderate<S: Scheme>() {
300        general_test::<S>(
301            "test_moderate",
302            b"Testing with more pieces than minimum",
303            4,
304            10,
305            &[0, 1, 2, 8, 9],
306        );
307    }
308
309    fn test_odd_shard_len<S: Scheme>() {
310        general_test::<S>("test_odd_shard_len", b"?", 2, 3, &[0, 1]);
311    }
312
313    fn test_recovery<S: Scheme>() {
314        general_test::<S>(
315            "test_recovery",
316            b"Testing recovery pieces",
317            3,
318            8,
319            &[5, 6, 7],
320        );
321    }
322
323    fn test_empty_data<S: Scheme>() {
324        general_test::<S>(
325            "test_empty_data",
326            b"",
327            30,
328            100,
329            (0..30u16).collect::<Vec<_>>().as_slice(),
330        );
331    }
332
333    fn test_large_data<S: Scheme>() {
334        general_test::<S>(
335            "test_large_data",
336            vec![42u8; 1000].as_slice(),
337            3,
338            4,
339            &[0, 1, 2],
340        );
341    }
342
343    fn test_no_data_two_shards<S: Scheme>() {
344        general_test::<S>("test_no_data_one_shard", b"", 1, 2, &[0]);
345    }
346
347    // This exercises an edge case in ZODA, but is also useful for other schemes.
348    fn test_2_pow_16_25_total_shards<S: Scheme>() {
349        general_test::<S>(
350            "test_2_pow_16_25_total_shards",
351            vec![0x67; 1 << 16].as_slice(),
352            8,
353            25,
354            &(0..8).collect::<Vec<_>>(),
355        );
356    }
357
358    fn test_suite<S: Scheme>() {
359        test_basic::<S>();
360        test_moderate::<S>();
361        test_odd_shard_len::<S>();
362        test_recovery::<S>();
363        test_empty_data::<S>();
364        test_large_data::<S>();
365        test_no_data_two_shards::<S>();
366        test_2_pow_16_25_total_shards::<S>();
367    }
368
369    #[test]
370    fn test_suite_reed_solomon() {
371        test_suite::<ReedSolomon<Sha256>>();
372    }
373
374    #[test]
375    fn test_suite_no_coding() {
376        test_suite::<NoCoding<Sha256>>();
377    }
378
379    #[test]
380    fn test_suite_zoda() {
381        test_suite::<Zoda<Sha256>>();
382    }
383
384    #[cfg(feature = "arbitrary")]
385    mod conformance {
386        use super::*;
387        use commonware_codec::conformance::CodecConformance;
388
389        commonware_conformance::conformance_tests! {
390            CodecConformance<Config>,
391        }
392    }
393}