Skip to main content

commonware_coding/
lib.rs

1//! Encode data to enable recovery from a subset of fragments.
2//!
3//! # Status
4//!
5//! Stability varies by primitive. See [README](https://github.com/commonwarexyz/monorepo#stability) for details.
6
7#![doc(
8    html_logo_url = "https://commonware.xyz/imgs/rustdoc_logo.svg",
9    html_favicon_url = "https://commonware.xyz/favicon.ico"
10)]
11
12commonware_macros::stability_scope!(ALPHA {
13    use bytes::Buf;
14    use commonware_codec::{Codec, FixedSize, Read, Write};
15    use commonware_cryptography::Digest;
16    use commonware_parallel::Strategy;
17    use std::{fmt::Debug, num::NonZeroU16};
18
19    mod no_coding;
20    pub use no_coding::{Error as NoCodingError, NoCoding};
21
22    mod reed_solomon;
23    pub use reed_solomon::{Error as ReedSolomonError, ReedSolomon};
24
25    mod zoda;
26    pub use zoda::{Error as ZodaError, Zoda};
27
28    /// Configuration common to all encoding schemes.
29    #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
30    pub struct Config {
31        /// The minimum number of shards needed to encode the data.
32        pub minimum_shards: NonZeroU16,
33        /// Extra shards beyond the minimum number.
34        ///
35        /// Alternatively, one can think of the configuration as having a total number
36        /// `N = extra_shards + minimum_shards`, but by specifying the `extra_shards`
37        /// rather than `N`, we avoid needing to check that `minimum_shards <= N`.
38        pub extra_shards: NonZeroU16,
39    }
40
41    impl Config {
42        /// Returns the total number of shards produced by this configuration.
43        pub fn total_shards(&self) -> u32 {
44            u32::from(self.minimum_shards.get()) + u32::from(self.extra_shards.get())
45        }
46    }
47
48    impl FixedSize for Config {
49        const SIZE: usize = 2 * <NonZeroU16 as FixedSize>::SIZE;
50    }
51
52    impl Write for Config {
53        fn write(&self, buf: &mut impl bytes::BufMut) {
54            self.minimum_shards.write(buf);
55            self.extra_shards.write(buf);
56        }
57    }
58
59    impl Read for Config {
60        type Cfg = ();
61
62        fn read_cfg(buf: &mut impl Buf, cfg: &Self::Cfg) -> Result<Self, commonware_codec::Error> {
63            Ok(Self {
64                minimum_shards: NonZeroU16::read_cfg(buf, cfg)?,
65                extra_shards: NonZeroU16::read_cfg(buf, cfg)?,
66            })
67        }
68    }
69
70    #[cfg(feature = "arbitrary")]
71    impl<'a> arbitrary::Arbitrary<'a> for Config {
72        fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
73            let minimum_shards = commonware_utils::NZU16!(u.int_in_range(1..=u16::MAX)?);
74            let extra_shards = commonware_utils::NZU16!(u.int_in_range(1..=u16::MAX)?);
75            Ok(Self {
76                minimum_shards,
77                extra_shards,
78            })
79        }
80    }
81
82    /// The configuration for decoding shard data.
83    #[derive(Clone, Debug)]
84    pub struct CodecConfig {
85        /// The maximum number of bytes a shard is expected to contain.
86        ///
87        /// This can be an upper bound, and only constrains the non-fixed-size portion
88        /// of shard data.
89        pub maximum_shard_size: usize,
90    }
91
92    /// A scheme for encoding data into pieces, and recovering the data from those pieces.
93    ///
94    /// # Example
95    /// ```
96    /// use commonware_coding::{Config, ReedSolomon, Scheme as _};
97    /// use commonware_cryptography::Sha256;
98    /// use commonware_parallel::Sequential;
99    /// use commonware_utils::NZU16;
100    ///
101    /// const STRATEGY: Sequential = Sequential;
102    ///
103    /// type RS = ReedSolomon<Sha256>;
104    ///
105    /// let config = Config {
106    ///     minimum_shards: NZU16!(2),
107    ///     extra_shards: NZU16!(1),
108    /// };
109    /// let data = b"Hello!";
110    /// // Turn the data into shards, and a commitment to those shards.
111    /// let (commitment, shards) =
112    ///      RS::encode(&config, data.as_slice(), &STRATEGY).unwrap();
113    ///
114    /// // Each person produces weak shards, their own checked shard, and checking data
115    /// // to check other peoples weak shards.
116    /// let (mut checking_data_w_shard, weak_shards): (Vec<_>, Vec<_>) = shards
117    ///         .into_iter()
118    ///         .enumerate()
119    ///         .map(|(i, shard)| {
120    ///             let (checking_data, checked_shard, weak_shard) = RS::weaken(&config, &commitment, i as u16, shard).unwrap();
121    ///             ((checking_data, checked_shard), weak_shard)
122    ///         })
123    ///         .collect();
124    /// // Let's pretend that the last item is "ours"
125    /// let (checking_data, checked_shard) = checking_data_w_shard.pop().unwrap();
126    /// // We can use this checking_data to check the other shards.
127    /// let mut checked_shards = Vec::new();
128    /// checked_shards.push(checked_shard);
129    /// for (i, weak_shard) in weak_shards.into_iter().enumerate().skip(1) {
130    ///   checked_shards.push(RS::check(&config, &commitment, &checking_data, i as u16, weak_shard).unwrap())
131    /// }
132    ///
133    /// let data2 = RS::decode(&config, &commitment, checking_data, &checked_shards[..2], &STRATEGY).unwrap();
134    /// assert_eq!(&data[..], &data2[..]);
135    ///
136    /// // Decoding works with different shards, with a guarantee to get the same result.
137    /// let data3 = RS::decode(&config, &commitment, checking_data, &checked_shards[1..], &STRATEGY).unwrap();
138    /// assert_eq!(&data[..], &data3[..]);
139    /// ```
140    ///
141    /// # Guarantees
142    ///
143    /// Here are additional properties that implementors of this trait need to
144    /// consider, and that users of this trait can rely on.
145    ///
146    /// ## Weaken vs Check
147    ///
148    /// [`Scheme::weaken`] and [`Scheme::check`] should agree, even for malicious encoders.
149    ///
150    /// It should not be possible for parties A and B to call `weaken` successfully,
151    /// but then have either of them fail on the other's shard when calling `check`.
152    ///
153    /// In other words, if an honest party considers their shard to be correctly
154    /// formed, then other honest parties which have successfully constructed their
155    /// checking data will also agree with the shard being correct.
156    ///
157    /// A violation of this property would be, for example, if a malicious payload
158    /// could convince two parties that they both have valid shards, but then the
159    /// checking data they produce from the malicious payload reports issues with
160    /// those shards.
161    pub trait Scheme: Debug + Clone + Send + Sync + 'static {
162        /// A commitment attesting to the shards of data.
163        type Commitment: Digest;
164        /// A strong shard of data, to be received by a participant.
165        type StrongShard: Clone + Debug + Eq + Codec<Cfg = CodecConfig> + Send + Sync + 'static;
166        /// A weak shard shared with other participants, to aid them in reconstruction.
167        ///
168        /// In most cases, this will be the same as `StrongShard`, but some schemes might
169        /// have extra information in `StrongShard` that may not be necessary to reconstruct
170        /// the data.
171        type WeakShard: Clone + Debug + Eq + Codec<Cfg = CodecConfig> + Send + Sync + 'static;
172        /// Data which can assist in checking shards.
173        type CheckingData: Clone + Send + Sync;
174        /// A shard that has been checked for inclusion in the commitment.
175        ///
176        /// This allows excluding [Scheme::WeakShard]s which are invalid, and shouldn't
177        /// be considered as progress towards meeting the minimum number of shards.
178        type CheckedShard: Clone + Send + Sync;
179        /// The type of errors that can occur during encoding, weakening, checking, and decoding.
180        type Error: std::fmt::Debug + Send;
181
182        /// Encode a piece of data, returning a commitment, along with shards, and proofs.
183        ///
184        /// Each shard and proof is intended for exactly one participant. The number of shards returned
185        /// should equal `config.minimum_shards + config.extra_shards`.
186        #[allow(clippy::type_complexity)]
187        fn encode(
188            config: &Config,
189            data: impl Buf,
190            strategy: &impl Strategy,
191        ) -> Result<(Self::Commitment, Vec<Self::StrongShard>), Self::Error>;
192
193        /// Take your own shard, check it, and produce a [Scheme::WeakShard] to forward to others.
194        ///
195        /// This takes in an index, which is the index you expect the shard to be.
196        ///
197        /// This will produce a [Scheme::CheckedShard] which counts towards the minimum
198        /// number of shards you need to reconstruct the data, in [Scheme::decode].
199        ///
200        /// You also get [Scheme::CheckingData], which has information you can use to check
201        /// the shards you receive from others.
202        #[allow(clippy::type_complexity)]
203        fn weaken(
204            config: &Config,
205            commitment: &Self::Commitment,
206            index: u16,
207            shard: Self::StrongShard,
208        ) -> Result<(Self::CheckingData, Self::CheckedShard, Self::WeakShard), Self::Error>;
209
210        /// Check the integrity of a weak shard, producing a checked shard.
211        ///
212        /// This requires the [Scheme::CheckingData] produced by [Scheme::weaken].
213        ///
214        /// This takes in an index, to make sure that the weak shard you're checking
215        /// is associated with the participant you expect it to be.
216        fn check(
217            config: &Config,
218            commitment: &Self::Commitment,
219            checking_data: &Self::CheckingData,
220            index: u16,
221            weak_shard: Self::WeakShard,
222        ) -> Result<Self::CheckedShard, Self::Error>;
223
224        /// Decode the data from shards received from other participants.
225        ///
226        /// The data must be decodeable with as few as `config.minimum_shards`,
227        /// including your own shard.
228        ///
229        /// Calls to this function with the same commitment, but with different shards,
230        /// or shards in a different should also result in the same output data, or in failure.
231        /// In other words, when using the decoding function in a broader system, you
232        /// get a guarantee that every participant decoding will see the same final
233        /// data, even if they receive different shards, or receive them in a different order.
234        fn decode(
235            config: &Config,
236            commitment: &Self::Commitment,
237            checking_data: Self::CheckingData,
238            shards: &[Self::CheckedShard],
239            strategy: &impl Strategy,
240        ) -> Result<Vec<u8>, Self::Error>;
241    }
242
243    /// A marker trait indicating that [Scheme::check] proves validity of the encoding.
244    ///
245    /// In more detail, this means that upon a successful call to [Scheme::check],
246    /// guarantees that the shard results from a valid encoding of the data, and thus,
247    /// if other participants also call check, then the data is guaranteed to be reconstructable.
248    pub trait ValidatingScheme: Scheme {}
249});
250
251#[cfg(test)]
252mod test {
253    use super::*;
254    use crate::reed_solomon::ReedSolomon;
255    use arbitrary::Unstructured;
256    use commonware_codec::Encode;
257    use commonware_cryptography::Sha256;
258    use commonware_invariants::minifuzz;
259    use commonware_macros::test_group;
260    use commonware_parallel::Sequential;
261    use commonware_utils::NZU16;
262
263    const MAX_SHARD_SIZE: usize = 1 << 31;
264    const MAX_SHARDS: u16 = 32;
265    const MAX_DATA: usize = 1024;
266    const MIN_EXTRA_SHARDS: u16 = 1;
267
268    fn roundtrip<S: Scheme>(config: &Config, data: &[u8], selected: &[u16]) {
269        // Encode data into shards.
270        let (commitment, shards) = S::encode(config, data, &Sequential).unwrap();
271        let read_cfg = CodecConfig {
272            maximum_shard_size: MAX_SHARD_SIZE,
273        };
274        for (i, shard) in shards.iter().enumerate() {
275            // Strong shard codec roundtrip.
276            let decoded_shard = S::StrongShard::read_cfg(&mut shard.encode(), &read_cfg).unwrap();
277            assert_eq!(decoded_shard, *shard);
278
279            // Weak shard codec roundtrip.
280            let (_, _, weak_shard) =
281                S::weaken(config, &commitment, i as u16, shard.clone()).unwrap();
282            let decoded_weak_shard =
283                S::WeakShard::read_cfg(&mut weak_shard.encode(), &read_cfg).unwrap();
284            assert_eq!(decoded_weak_shard, weak_shard);
285        }
286
287        // Collect selected shards for decoding. The first selected shard
288        // goes through `weaken`, the rest go through `check`.
289        let mut checked_shards = Vec::new();
290        let mut checking_data = None;
291        for (i, shard) in shards.into_iter().enumerate() {
292            if !selected.contains(&(i as u16)) {
293                continue;
294            }
295            let (cd, checked, weak_shard) =
296                S::weaken(config, &commitment, i as u16, shard).unwrap();
297            if let Some(cd) = &checking_data {
298                let checked = S::check(config, &commitment, cd, i as u16, weak_shard).unwrap();
299                checked_shards.push(checked);
300            } else {
301                checking_data = Some(cd);
302                checked_shards.push(checked);
303            }
304        }
305
306        // Shuffle the checked shards to verify decode is order-independent.
307        checked_shards.reverse();
308
309        // Decode from the selected shards and verify data integrity.
310        let decoded = S::decode(
311            config,
312            &commitment,
313            checking_data.unwrap(),
314            &checked_shards,
315            &Sequential,
316        )
317        .unwrap();
318        assert_eq!(decoded, data);
319    }
320
321    fn generate_case(u: &mut Unstructured<'_>) -> arbitrary::Result<(Config, Vec<u8>, Vec<u16>)> {
322        let minimum_shards = (u.arbitrary::<u16>()? % MAX_SHARDS) + 1;
323        let extra_shards =
324            MIN_EXTRA_SHARDS + (u.arbitrary::<u16>()? % (MAX_SHARDS - MIN_EXTRA_SHARDS + 1));
325        let total_shards = minimum_shards + extra_shards;
326
327        let data_len = usize::from(u.arbitrary::<u16>()?) % (MAX_DATA + 1);
328        let data = u.bytes(data_len)?.to_vec();
329
330        let selected_len = usize::from(minimum_shards)
331            + (usize::from(u.arbitrary::<u16>()?) % (usize::from(extra_shards) + 1));
332        let mut selected: Vec<u16> = (0..total_shards).collect();
333        for i in 0..selected_len {
334            let remaining = usize::from(total_shards) - i;
335            let j = i + (usize::from(u.arbitrary::<u16>()?) % remaining);
336            selected.swap(i, j);
337        }
338        selected.truncate(selected_len);
339
340        Ok((
341            Config {
342                minimum_shards: NZU16!(minimum_shards),
343                extra_shards: NZU16!(extra_shards),
344            },
345            data,
346            selected,
347        ))
348    }
349
350    #[test]
351    fn roundtrip_empty_data() {
352        let config = Config {
353            minimum_shards: NZU16!(30),
354            extra_shards: NZU16!(70),
355        };
356        let selected: Vec<u16> = (0..30).collect();
357
358        roundtrip::<ReedSolomon<Sha256>>(&config, b"", &selected);
359        roundtrip::<NoCoding<Sha256>>(&config, b"", &selected);
360        roundtrip::<Zoda<Sha256>>(&config, b"", &selected);
361    }
362
363    // This exercises an edge case in ZODA, but is also useful for other schemes.
364    #[test]
365    fn roundtrip_2_pow_16_25_total_shards() {
366        let config = Config {
367            minimum_shards: NZU16!(8),
368            extra_shards: NZU16!(17),
369        };
370        let data = vec![0x67; 1 << 16];
371        let selected: Vec<u16> = (0..8).collect();
372
373        roundtrip::<ReedSolomon<Sha256>>(&config, &data, &selected);
374        roundtrip::<NoCoding<Sha256>>(&config, &data, &selected);
375        roundtrip::<Zoda<Sha256>>(&config, &data, &selected);
376    }
377
378    #[test]
379    fn minifuzz_roundtrip_reed_solomon() {
380        minifuzz::Builder::default()
381            .with_seed(0)
382            .with_search_limit(64)
383            .test(|u| {
384                let (config, data, selected) = generate_case(u)?;
385                roundtrip::<ReedSolomon<Sha256>>(&config, &data, &selected);
386                Ok(())
387            });
388    }
389
390    #[test]
391    fn minifuzz_roundtrip_no_coding() {
392        minifuzz::Builder::default()
393            .with_seed(0)
394            .with_search_limit(64)
395            .test(|u| {
396                let (config, data, selected) = generate_case(u)?;
397                roundtrip::<NoCoding<Sha256>>(&config, &data, &selected);
398                Ok(())
399            });
400    }
401
402    #[test_group("slow")]
403    #[test]
404    fn minifuzz_roundtrip_zoda() {
405        minifuzz::Builder::default()
406            .with_seed(0)
407            .with_search_limit(64)
408            .test(|u| {
409                let (config, data, selected) = generate_case(u)?;
410                roundtrip::<Zoda<Sha256>>(&config, &data, &selected);
411                Ok(())
412            });
413    }
414
415    #[cfg(feature = "arbitrary")]
416    mod conformance {
417        use super::*;
418        use commonware_codec::conformance::CodecConformance;
419
420        commonware_conformance::conformance_tests! {
421            CodecConformance<Config>,
422        }
423    }
424}