commonware_coding/
lib.rs

1//! Encode data to enable recovery from a subset of fragments.
2//!
3//! # Status
4//!
5//! `commonware-coding` is **ALPHA** software and is not yet recommended for production use. Developers should
6//! expect breaking changes and occasional instability.
7
8#![doc(
9    html_logo_url = "https://commonware.xyz/imgs/rustdoc_logo.svg",
10    html_favicon_url = "https://commonware.xyz/favicon.ico"
11)]
12
13use bytes::Buf;
14use commonware_codec::{Codec, FixedSize, Read, Write};
15use commonware_parallel::Strategy;
16use std::fmt::Debug;
17
18mod reed_solomon;
19use commonware_cryptography::Digest;
20pub use reed_solomon::{Error as ReedSolomonError, ReedSolomon};
21
22mod no_coding;
23pub use no_coding::{Error as NoCodingError, NoCoding};
24
25mod zoda;
26pub use zoda::{Error as ZodaError, Zoda};
27
28/// Configuration common to all encoding schemes.
29#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
30#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]
31pub struct Config {
32    /// The minimum number of shards needed to encode the data.
33    pub minimum_shards: u16,
34    /// Extra shards beyond the minimum number.
35    ///
36    /// Alternatively, one can think of the configuration as having a total number
37    /// `N = extra_shards + minimum_shards`, but by specifying the `extra_shards`
38    /// rather than `N`, we avoid needing to check that `minimum_shards <= N`.
39    pub extra_shards: u16,
40}
41
42impl Config {
43    /// Returns the total number of shards produced by this configuration.
44    pub fn total_shards(&self) -> u32 {
45        u32::from(self.minimum_shards) + u32::from(self.extra_shards)
46    }
47}
48
49impl FixedSize for Config {
50    const SIZE: usize = 2 * <u16 as FixedSize>::SIZE;
51}
52
53impl Write for Config {
54    fn write(&self, buf: &mut impl bytes::BufMut) {
55        self.minimum_shards.write(buf);
56        self.extra_shards.write(buf);
57    }
58}
59
60impl Read for Config {
61    type Cfg = ();
62
63    fn read_cfg(buf: &mut impl Buf, cfg: &Self::Cfg) -> Result<Self, commonware_codec::Error> {
64        Ok(Self {
65            minimum_shards: u16::read_cfg(buf, cfg)?,
66            extra_shards: u16::read_cfg(buf, cfg)?,
67        })
68    }
69}
70
71/// The configuration for decoding shard data.
72#[derive(Clone, Debug)]
73pub struct CodecConfig {
74    /// The maximum number of bytes a shard is expected to contain.
75    ///
76    /// This can be an upper bound, and only constrains the non-fixed-size portion
77    /// of shard data.
78    pub maximum_shard_size: usize,
79}
80
81/// A scheme for encoding data into pieces, and recovering the data from those pieces.
82///
83/// # Example
84/// ```
85/// use commonware_coding::{Config, ReedSolomon, Scheme as _};
86/// use commonware_cryptography::Sha256;
87/// use commonware_parallel::Sequential;
88///
89/// const STRATEGY: Sequential = Sequential;
90///
91/// type RS = ReedSolomon<Sha256>;
92///
93/// let config = Config { minimum_shards: 2, extra_shards: 1 };
94/// let data = b"Hello!";
95/// // Turn the data into shards, and a commitment to those shards.
96/// let (commitment, shards) =
97///      RS::encode(&config, data.as_slice(), &STRATEGY).unwrap();
98///
99/// // Each person produces reshards, their own checked shard, and checking data
100/// // to check other peoples reshards.
101/// let (mut checking_data_w_shard, reshards): (Vec<_>, Vec<_>) = shards
102///         .into_iter()
103///         .enumerate()
104///         .map(|(i, shard)| {
105///             let (checking_data, checked_shard, reshard) = RS::reshard(&config, &commitment, i as u16, shard).unwrap();
106///             ((checking_data, checked_shard), reshard)
107///         })
108///         .collect();
109/// // Let's pretend that the last item is "ours"
110/// let (checking_data, checked_shard) = checking_data_w_shard.pop().unwrap();
111/// // We can use this checking_data to check the other shards.
112/// let mut checked_shards = Vec::new();
113/// checked_shards.push(checked_shard);
114/// for (i, reshard) in reshards.into_iter().enumerate().skip(1) {
115///   checked_shards.push(RS::check(&config, &commitment, &checking_data, i as u16, reshard).unwrap())
116/// }
117///
118/// let data2 = RS::decode(&config, &commitment, checking_data, &checked_shards[..2], &STRATEGY).unwrap();
119/// assert_eq!(&data[..], &data2[..]);
120///
121/// // Decoding works with different shards, with a guarantee to get the same result.
122/// let data3 = RS::decode(&config, &commitment, checking_data, &checked_shards[1..], &STRATEGY).unwrap();
123/// assert_eq!(&data[..], &data3[..]);
124/// ```
125pub trait Scheme: Debug + Clone + Send + Sync + 'static {
126    /// A commitment attesting to the shards of data.
127    type Commitment: Digest;
128    /// A shard of data, to be received by a participant.
129    type Shard: Clone + Eq + Codec<Cfg = CodecConfig> + Send + Sync + 'static;
130    /// A shard shared with other participants, to aid them in reconstruction.
131    ///
132    /// In most cases, this will be the same as `Shard`, but some schemes might
133    /// have extra information in `Shard` that may not be necessary to reconstruct
134    /// the data.
135    type ReShard: Clone + Eq + Codec<Cfg = CodecConfig> + Send + Sync + 'static;
136    /// Data which can assist in checking shards.
137    type CheckingData: Clone + Send;
138    /// A shard that has been checked for inclusion in the commitment.
139    ///
140    /// This allows excluding [Scheme::ReShard]s which are invalid, and shouldn't
141    /// be considered as progress towards meeting the minimum number of shards.
142    type CheckedShard;
143    type Error: std::fmt::Debug;
144
145    /// Encode a piece of data, returning a commitment, along with shards, and proofs.
146    ///
147    /// Each shard and proof is intended for exactly one participant. The number of shards returned
148    /// should equal `config.minimum_shards + config.extra_shards`.
149    #[allow(clippy::type_complexity)]
150    fn encode(
151        config: &Config,
152        data: impl Buf,
153        strategy: &impl Strategy,
154    ) -> Result<(Self::Commitment, Vec<Self::Shard>), Self::Error>;
155
156    /// Take your own shard, check it, and produce a [Scheme::ReShard] to forward to others.
157    ///
158    /// This takes in an index, which is the index you expect the shard to be.
159    ///
160    /// This will produce a [Scheme::CheckedShard] which counts towards the minimum
161    /// number of shards you need to reconstruct the data, in [Scheme::decode].
162    ///
163    /// You also get [Scheme::CheckingData], which has information you can use to check
164    /// the shards you receive from others.
165    #[allow(clippy::type_complexity)]
166    fn reshard(
167        config: &Config,
168        commitment: &Self::Commitment,
169        index: u16,
170        shard: Self::Shard,
171    ) -> Result<(Self::CheckingData, Self::CheckedShard, Self::ReShard), Self::Error>;
172
173    /// Check the integrity of a reshard, producing a checked shard.
174    ///
175    /// This requires the [Scheme::CheckingData] produced by [Scheme::reshard].
176    ///
177    /// This takes in an index, to make sure that the reshard you're checking
178    /// is associated with the participant you expect it to be.
179    fn check(
180        config: &Config,
181        commitment: &Self::Commitment,
182        checking_data: &Self::CheckingData,
183        index: u16,
184        reshard: Self::ReShard,
185    ) -> Result<Self::CheckedShard, Self::Error>;
186
187    /// Decode the data from shards received from other participants.
188    ///
189    /// The data must be decodeable with as few as `config.minimum_shards`,
190    /// including your own shard.
191    ///
192    /// Calls to this function with the same commitment, but with different shards,
193    /// or shards in a different should also result in the same output data, or in failure.
194    /// In other words, when using the decoding function in a broader system, you
195    /// get a guarantee that every participant decoding will see the same final
196    /// data, even if they receive different shards, or receive them in a different order.
197    fn decode(
198        config: &Config,
199        commitment: &Self::Commitment,
200        checking_data: Self::CheckingData,
201        shards: &[Self::CheckedShard],
202        strategy: &impl Strategy,
203    ) -> Result<Vec<u8>, Self::Error>;
204}
205
206/// A marker trait indicating that [Scheme::check] proves validity of the encoding.
207///
208/// In more detail, this means that upon a successful call to [Scheme::check],
209/// guarantees that the shard results from a valid encoding of the data, and thus,
210/// if other participants also call check, then the data is guaranteed to be reconstructable.
211pub trait ValidatingScheme: Scheme {}
212
213#[cfg(test)]
214mod test {
215    use super::*;
216    use crate::reed_solomon::ReedSolomon;
217    use commonware_codec::Encode;
218    use commonware_cryptography::Sha256;
219    use commonware_parallel::Sequential;
220    use std::cmp::Reverse;
221
222    const MAX_DATA_BYTES: usize = 1 << 31;
223
224    fn general_test<S: Scheme>(
225        name: &str,
226        data: &[u8],
227        min_shards: u16,
228        total_shards: u16,
229        indices: &[u16],
230    ) {
231        // If the indices reference some larger shard, use that as the total.
232        let total_shards = indices
233            .iter()
234            .map(|&x| x + 1)
235            .max()
236            .map_or(total_shards, |x| x.max(total_shards));
237        assert!(min_shards >= 1, "min_shards must be at least 1");
238        assert!(
239            indices.len() >= min_shards as usize,
240            "you need to supply at least {min_shards} indices"
241        );
242        assert!(
243            total_shards >= min_shards,
244            "total_shards ({total_shards}) must be >= min_shards ({min_shards})"
245        );
246
247        let config = Config {
248            minimum_shards: min_shards,
249            extra_shards: total_shards - min_shards,
250        };
251        let read_cfg = CodecConfig {
252            maximum_shard_size: MAX_DATA_BYTES,
253        };
254        let (commitment, shards) = S::encode(&config, data, &Sequential).unwrap();
255        // Pick out the packets we want, in reverse order.
256        let ((_, _, checking_data, my_checked_shard, _), other_packets) = {
257            let mut out = shards
258                .into_iter()
259                .enumerate()
260                .filter_map(|(i, shard)| {
261                    let shard = S::Shard::read_cfg(&mut shard.encode(), &read_cfg).unwrap();
262                    let i = i as u16;
263                    let pos_of_i = indices.iter().position(|&x| x == i)?;
264                    let (x0, x1, x2) = S::reshard(&config, &commitment, i, shard).unwrap();
265                    Some((pos_of_i, i, x0, x1, x2))
266                })
267                .collect::<Vec<_>>();
268            out.sort_by_key(|&(pos_of_i, _, _, _, _)| Reverse(pos_of_i));
269            let first = out.pop().unwrap();
270            (first, out)
271        };
272        let checked_shards = {
273            let mut others = other_packets
274                .into_iter()
275                .map(|(_, i, _, _, reshard)| {
276                    let reshard = S::ReShard::read_cfg(&mut reshard.encode(), &read_cfg).unwrap();
277                    S::check(&config, &commitment, &checking_data, i, reshard).unwrap()
278                })
279                .collect::<Vec<_>>();
280            others.push(my_checked_shard);
281            others
282        };
283        let decoded = S::decode(
284            &config,
285            &commitment,
286            checking_data,
287            &checked_shards,
288            &Sequential,
289        )
290        .unwrap();
291        assert_eq!(&decoded, data, "{name} failed");
292    }
293
294    fn test_basic<S: Scheme>() {
295        general_test::<S>("test_basic", b"Hello, Reed-Solomon!", 4, 7, &[0, 1, 2, 3]);
296    }
297
298    fn test_moderate<S: Scheme>() {
299        general_test::<S>(
300            "test_moderate",
301            b"Testing with more pieces than minimum",
302            4,
303            10,
304            &[0, 1, 2, 8, 9],
305        );
306    }
307
308    fn test_odd_shard_len<S: Scheme>() {
309        general_test::<S>("test_odd_shard_len", b"?", 2, 3, &[0, 1]);
310    }
311
312    fn test_recovery<S: Scheme>() {
313        general_test::<S>(
314            "test_recovery",
315            b"Testing recovery pieces",
316            3,
317            8,
318            &[5, 6, 7],
319        );
320    }
321
322    fn test_empty_data<S: Scheme>() {
323        general_test::<S>(
324            "test_empty_data",
325            b"",
326            30,
327            100,
328            (0..30u16).collect::<Vec<_>>().as_slice(),
329        );
330    }
331
332    fn test_large_data<S: Scheme>() {
333        general_test::<S>(
334            "test_large_data",
335            vec![42u8; 1000].as_slice(),
336            3,
337            4,
338            &[0, 1, 2],
339        );
340    }
341
342    fn test_no_data_two_shards<S: Scheme>() {
343        general_test::<S>("test_no_data_one_shard", b"", 1, 2, &[0]);
344    }
345
346    // This exercises an edge case in ZODA, but is also useful for other schemes.
347    fn test_2_pow_16_25_total_shards<S: Scheme>() {
348        general_test::<S>(
349            "test_2_pow_16_25_total_shards",
350            vec![0x67; 1 << 16].as_slice(),
351            8,
352            25,
353            &(0..8).collect::<Vec<_>>(),
354        );
355    }
356
357    fn test_suite<S: Scheme>() {
358        test_basic::<S>();
359        test_moderate::<S>();
360        test_odd_shard_len::<S>();
361        test_recovery::<S>();
362        test_empty_data::<S>();
363        test_large_data::<S>();
364        test_no_data_two_shards::<S>();
365        test_2_pow_16_25_total_shards::<S>();
366    }
367
368    #[test]
369    fn test_suite_reed_solomon() {
370        test_suite::<ReedSolomon<Sha256>>();
371    }
372
373    #[test]
374    fn test_suite_no_coding() {
375        test_suite::<NoCoding<Sha256>>();
376    }
377
378    #[test]
379    fn test_suite_zoda() {
380        test_suite::<Zoda<Sha256>>();
381    }
382
383    #[cfg(feature = "arbitrary")]
384    mod conformance {
385        use super::*;
386        use commonware_codec::conformance::CodecConformance;
387
388        commonware_conformance::conformance_tests! {
389            CodecConformance<Config>,
390        }
391    }
392}