decds_lib/
chunkset.rs

1use crate::{
2    chunk::{self, Chunk},
3    consts::DECDS_NUM_ERASURE_CODED_SHARES,
4    errors::DecdsError,
5    merkle_tree::MerkleTree,
6};
7
8/// Represents a fixed set (= 16) of erasure-coded chunks, along with its Merkle root commitment.
9/// This structure is used for encoding a fixed size (10MB = 10 * 2^20 bytes) portion of the original
10/// blob data into `NUM_ERASURE_CODED_CHUNKS` (= 16) erasure-coded verifiable chunks, each carrying
11/// a merkle proof of inclusion in both this chunkset and the blob.
12#[derive(Clone, Debug, PartialEq)]
13pub(crate) struct ChunkSet {
14    commitment: blake3::Hash,
15    chunks: Vec<chunk::ProofCarryingChunk>,
16}
17
18impl ChunkSet {
19    pub const NUM_ORIGINAL_CHUNKS: usize = 10;
20    pub const BYTE_LENGTH: usize = Self::NUM_ORIGINAL_CHUNKS * Chunk::BYTE_LENGTH;
21    pub const NUM_ERASURE_CODED_CHUNKS: usize = DECDS_NUM_ERASURE_CODED_SHARES;
22    pub const PROOF_SIZE: usize = usize::ilog2(Self::NUM_ERASURE_CODED_CHUNKS) as usize;
23
24    /// Creates a new `ChunkSet` by taking a fixed sized block of data, splits into 10 equal sized chunks,
25    /// each of 1MB, RLNC encoding them into 16 erasure-coded chunks, and building a Merkle tree over these chunks.
26    ///
27    /// # Arguments
28    ///
29    /// * `chunkset_id` - The unique identifier for this chunkset.
30    /// * `data` - The raw data (10MB) to be erasure-coded into chunks for this chunkset.
31    ///
32    /// # Returns
33    ///
34    /// Returns a `Result` which is:
35    /// - `Ok(ChunkSet)` containing the newly created `ChunkSet` if successful.
36    /// - `Err(DecdsError::InvalidChunksetSize)` if the `data` length does not match `ChunkSet::BYTE_LENGTH`.
37    pub fn new(chunkset_id: usize, data: Vec<u8>) -> Result<ChunkSet, DecdsError> {
38        if data.len() != Self::BYTE_LENGTH {
39            return Err(DecdsError::InvalidChunksetSize(data.len()));
40        }
41
42        let mut rng = rand::rng();
43        let encoder = unsafe { rlnc::full::encoder::Encoder::new(data, Self::NUM_ORIGINAL_CHUNKS).unwrap_unchecked() };
44
45        let chunks = (0..Self::NUM_ERASURE_CODED_CHUNKS)
46            .map(|i| {
47                let chunk_id = chunkset_id * Self::NUM_ERASURE_CODED_CHUNKS + i;
48                let erasure_coded_data = encoder.code(&mut rng);
49
50                chunk::Chunk::new(chunkset_id, chunk_id, erasure_coded_data)
51            })
52            .collect::<Vec<Chunk>>();
53
54        let merkle_leaves = chunks.iter().map(|chunk| chunk.digest()).collect::<Vec<blake3::Hash>>();
55        let merkle_tree = unsafe { MerkleTree::new(merkle_leaves).unwrap_unchecked() };
56
57        let commitment = merkle_tree.get_root_commitment();
58
59        let proof_carrying_chunks = chunks
60            .into_iter()
61            .enumerate()
62            .map(|(leaf_idx, chunk)| chunk::ProofCarryingChunk::new(chunk, unsafe { merkle_tree.generate_proof(leaf_idx).unwrap_unchecked() }))
63            .collect::<Vec<chunk::ProofCarryingChunk>>();
64
65        Ok(ChunkSet {
66            commitment,
67            chunks: proof_carrying_chunks,
68        })
69    }
70
71    /// Returns the Merkle root commitment of this `ChunkSet`.
72    pub fn get_root_commitment(&self) -> blake3::Hash {
73        self.commitment
74    }
75
76    /// Retrieves a specific `ProofCarryingChunk` from the `ChunkSet` by its local chunk ID.
77    ///
78    /// # Arguments
79    ///
80    /// * `chunk_id` - The local ID (`>= 0 && < Self::NUM_ERASURE_CODED_CHUNKS`) of the chunk to retrieve within this chunkset.
81    ///
82    /// # Returns
83    ///
84    /// Returns a `Result` which is:
85    /// - `Ok(&chunk::ProofCarryingChunk)` containing a reference to the chunk if found.
86    /// - `Err(DecdsError::InvalidErasureCodedShareId)` if `chunk_id` is out of bounds for this chunkset.
87    pub fn get_chunk(&self, chunk_id: usize) -> Result<&chunk::ProofCarryingChunk, DecdsError> {
88        self.chunks.get(chunk_id).ok_or(DecdsError::InvalidErasureCodedShareId(chunk_id))
89    }
90}
91
92/// A structure designed to help incrementally reconstruct the original data of a `ChunkSet`
93/// by collecting enough erasure-coded chunks, verifying their integrity, and performing RLNC decoding.
94pub struct RepairingChunkSet {
95    chunkset_id: usize,
96    commitment: blake3::Hash,
97    decoder: rlnc::full::decoder::Decoder,
98}
99
100impl RepairingChunkSet {
101    /// The padded byte length of individual chunks used in RLNC encoding.
102    /// It ensures that the total chunkset size is a multiple of `NUM_ORIGINAL_CHUNKS`,
103    /// after appending a single byte end-of-data marker.
104    const PADDED_CHUNK_BYTE_LEN: usize = (ChunkSet::BYTE_LENGTH + 1).div_ceil(ChunkSet::NUM_ORIGINAL_CHUNKS);
105
106    /// Creates a new `RepairingChunkSet` instance.
107    ///
108    /// # Arguments
109    ///
110    /// * `chunkset_id` - The ID of the chunkset being repaired.
111    /// * `commitment` - The expected Merkle root commitment of the chunkset, used for validating chunk inclusion in chunkset.
112    ///
113    /// # Returns
114    ///
115    /// A new `RepairingChunkSet` instance.
116    pub fn new(chunkset_id: usize, commitment: blake3::Hash) -> Self {
117        RepairingChunkSet {
118            chunkset_id,
119            commitment,
120            decoder: unsafe { rlnc::full::decoder::Decoder::new(Self::PADDED_CHUNK_BYTE_LEN, ChunkSet::NUM_ORIGINAL_CHUNKS).unwrap_unchecked() },
121        }
122    }
123
124    /// Adds a `ProofCarryingChunk` to the `RepairingChunkSet` after validating its Merkle proof.
125    /// The chunk's inclusion proof in this chunkset is verified against the `commitment` stored in `RepairingChunkSet`.
126    ///
127    /// # Arguments
128    ///
129    /// * `chunk` - The `ProofCarryingChunk` to add.
130    ///
131    /// # Returns
132    ///
133    /// Returns a `Result` which is:
134    /// - `Ok(())` if the chunk is successfully added and validated.
135    /// - `Err(DecdsError::InvalidProofInChunk)` if the chunk's inclusion proof is invalid for this chunkset.
136    /// - `Err(DecdsError::InvalidChunkMetadata)` if the chunk's `chunkset_id` does not match this `RepairingChunkSet`.
137    /// - `Err(DecdsError::ChunkDecodingFailed)` if the underlying RLNC decoding operation fails.
138    pub fn add_chunk(&mut self, chunk: &chunk::ProofCarryingChunk) -> Result<(), DecdsError> {
139        if chunk.validate_inclusion_in_chunkset(self.commitment) {
140            self.add_chunk_unvalidated(chunk)
141        } else {
142            Err(DecdsError::InvalidProofInChunk(chunk.get_chunkset_id()))
143        }
144    }
145
146    /// Adds a `ProofCarryingChunk` to the `RepairingChunkSet` without validating its Merkle proof.
147    /// This method is intended for use when proof validation has already occurred.
148    ///
149    /// # Arguments
150    ///
151    /// * `chunk` - The `ProofCarryingChunk` to add.
152    ///
153    /// # Returns
154    ///
155    /// Returns a `Result` which is:
156    /// - `Ok(())` if the chunk is successfully added.
157    /// - `Err(DecdsError::InvalidChunkMetadata)` if the chunk's `chunkset_id` does not match this `RepairingChunkSet`.
158    /// - `Err(DecdsError::ChunksetReadyToRepair)` if the chunkset is ready to repair, no more chunks are required. Just call `repair`.
159    /// - `Err(DecdsError::ChunkDecodingFailed)` if the underlying RLNC decoding operation fails.
160    pub fn add_chunk_unvalidated(&mut self, chunk: &chunk::ProofCarryingChunk) -> Result<(), DecdsError> {
161        if self.chunkset_id != chunk.get_chunkset_id() {
162            return Err(DecdsError::InvalidChunkMetadata(chunk.get_chunkset_id()));
163        }
164        if self.is_ready_to_repair() {
165            return Err(DecdsError::ChunksetReadyToRepair(self.chunkset_id));
166        }
167
168        self.decoder
169            .decode(chunk.get_erasure_coded_data())
170            .map_err(|err| DecdsError::ChunkDecodingFailed(chunk.get_chunkset_id(), err.to_string()))
171    }
172
173    /// Checks if enough useful erasure-coded chunks have been collected to repair the original data for this chunkset.
174    pub fn is_ready_to_repair(&self) -> bool {
175        self.decoder.is_already_decoded()
176    }
177
178    /// Repairs the original data of the chunkset if enough chunks have been collected.
179    /// This consumes the `RepairingChunkSet` as the decoding process is final.
180    ///
181    /// # Returns
182    ///
183    /// Returns a `Result` which is:
184    /// - `Ok(Vec<u8>)` containing the repaired original data if successful.
185    /// - `Err(DecdsError::ChunksetNotYetReadyToRepair)` if not enough chunks have been added yet.
186    /// - `Err(DecdsError::ChunksetRepairingFailed)` if an error occurs during the RLNC decoding process.
187    pub fn repair(self) -> Result<Vec<u8>, DecdsError> {
188        if self.is_ready_to_repair() {
189            self.decoder
190                .get_decoded_data()
191                .map_err(|err| DecdsError::ChunksetRepairingFailed(self.chunkset_id, format!("RLNC Decoding error: {}", err)))
192        } else {
193            Err(DecdsError::ChunksetNotYetReadyToRepair(self.chunkset_id))
194        }
195    }
196}
197
198#[cfg(test)]
199mod tests {
200    use crate::{
201        DecdsError,
202        chunk::ProofCarryingChunk,
203        chunkset::{ChunkSet, RepairingChunkSet},
204        merkle_tree::tests::flip_a_bit,
205    };
206    use rand::{Rng, seq::SliceRandom};
207
208    fn flip_a_single_bit_in_proof_carrying_chunk<R: Rng + ?Sized>(mut chunk_bytes: Vec<u8>, rng: &mut R) -> Vec<u8> {
209        if chunk_bytes.is_empty() {
210            return chunk_bytes;
211        }
212
213        let random_byte_index = rng.random_range(0..chunk_bytes.len());
214        let random_bit_index = rng.random_range(0..u8::BITS) as usize;
215
216        chunk_bytes[random_byte_index] = flip_a_bit(chunk_bytes[random_byte_index], random_bit_index);
217        chunk_bytes
218    }
219
220    #[test]
221    fn prop_test_erasure_coding_chunks_and_validating_proofs_work() {
222        const NUM_TEST_ITERATIONS: usize = 10;
223        let mut rng = rand::rng();
224
225        (0..NUM_TEST_ITERATIONS).for_each(|_| {
226            let data = (0..ChunkSet::BYTE_LENGTH).map(|_| rng.random()).collect::<Vec<u8>>();
227            let chunkset = ChunkSet::new(0, data).expect("Must be able to build erasure-coded ChunkSet");
228
229            for i in 0..ChunkSet::NUM_ERASURE_CODED_CHUNKS {
230                let chunk = chunkset.get_chunk(i).expect("Must be able to lookup chunk by id");
231                assert!(chunk.validate_inclusion_in_chunkset(chunkset.get_root_commitment()));
232
233                let chunk_bytes = chunk.to_bytes().expect("Must be able to serialize proof-carrying chunk as bytes");
234                let bit_flipped_chunk_bytes = flip_a_single_bit_in_proof_carrying_chunk(chunk_bytes, &mut rng);
235
236                match ProofCarryingChunk::from_bytes(&bit_flipped_chunk_bytes) {
237                    Ok((bit_flipped_chunk, _)) => assert!(!bit_flipped_chunk.validate_inclusion_in_chunkset(chunkset.get_root_commitment())),
238                    Err(e) => assert!(e.to_string().starts_with("failed to deserialize proof carrying chunk: ")),
239                }
240            }
241        });
242    }
243
244    #[test]
245    fn prop_test_repairing_erasure_coded_chunks_work() {
246        const NUM_TEST_ITERATIONS: usize = 10;
247        let mut rng = rand::rng();
248
249        (0..NUM_TEST_ITERATIONS).for_each(|_| {
250            let data = (0..ChunkSet::BYTE_LENGTH).map(|_| rng.random()).collect::<Vec<u8>>();
251            let data_copy = data.clone();
252
253            let chunkset = ChunkSet::new(0, data).expect("Must be able to build erasure-coded ChunkSet");
254            let mut repairing_chunkset = RepairingChunkSet::new(0, chunkset.get_root_commitment());
255
256            let mut chunks = (0..ChunkSet::NUM_ERASURE_CODED_CHUNKS)
257                .map(|i| chunkset.get_chunk(i).expect("Must be able to lookup chunk by id"))
258                .collect::<Vec<&ProofCarryingChunk>>();
259            chunks.shuffle(&mut rng);
260
261            let mut chunk_idx = 0;
262            while !repairing_chunkset.is_ready_to_repair() {
263                let _ = repairing_chunkset.add_chunk(chunks[chunk_idx]);
264                chunk_idx += 1;
265            }
266
267            let repaired_data = repairing_chunkset.repair().expect("Data must be reconstructed by this point!");
268            assert_eq!(data_copy, repaired_data);
269        });
270    }
271
272    #[test]
273    fn test_chunkset_new_invalid_size() {
274        let data_too_small = vec![0u8; ChunkSet::BYTE_LENGTH - 1];
275        let data_too_large = vec![0u8; ChunkSet::BYTE_LENGTH + 1];
276
277        assert_eq!(
278            ChunkSet::new(0, data_too_small),
279            Err(DecdsError::InvalidChunksetSize(ChunkSet::BYTE_LENGTH - 1))
280        );
281        assert_eq!(
282            ChunkSet::new(0, data_too_large),
283            Err(DecdsError::InvalidChunksetSize(ChunkSet::BYTE_LENGTH + 1))
284        );
285    }
286
287    #[test]
288    fn test_chunkset_get_chunk_out_of_bounds() {
289        let mut rng = rand::rng();
290
291        let data = (0..ChunkSet::BYTE_LENGTH).map(|_| rng.random()).collect::<Vec<u8>>();
292        let chunkset = ChunkSet::new(0, data).expect("Must be able to build erasure-coded ChunkSet");
293
294        assert_eq!(
295            chunkset.get_chunk(ChunkSet::NUM_ERASURE_CODED_CHUNKS),
296            Err(DecdsError::InvalidErasureCodedShareId(ChunkSet::NUM_ERASURE_CODED_CHUNKS))
297        );
298        assert_eq!(
299            chunkset.get_chunk(ChunkSet::NUM_ERASURE_CODED_CHUNKS + 100),
300            Err(DecdsError::InvalidErasureCodedShareId(ChunkSet::NUM_ERASURE_CODED_CHUNKS + 100))
301        );
302    }
303
304    #[test]
305    fn test_repairing_chunkset_new() {
306        let mut rng = rand::rng();
307
308        let data = (0..ChunkSet::BYTE_LENGTH).map(|_| rng.random()).collect::<Vec<u8>>();
309        let chunkset = ChunkSet::new(0, data).expect("Must be able to build erasure-coded ChunkSet");
310        let commitment = chunkset.get_root_commitment();
311        let chunkset_id = 0;
312
313        let repairing_chunkset = RepairingChunkSet::new(chunkset_id, commitment);
314
315        assert_eq!(repairing_chunkset.chunkset_id, chunkset_id);
316        assert_eq!(repairing_chunkset.commitment, commitment);
317        assert!(!repairing_chunkset.is_ready_to_repair());
318    }
319
320    #[test]
321    fn test_repairing_chunkset_add_chunk_invalid_proof_in_chunk() {
322        let mut rng = rand::rng();
323
324        let data = (0..ChunkSet::BYTE_LENGTH).map(|_| rng.random()).collect::<Vec<u8>>();
325        let chunkset = ChunkSet::new(0, data).expect("Must be able to build erasure-coded ChunkSet");
326
327        // Create a repairing chunkset with a *different* commitment
328        let tampered_commitment = blake3::hash(b"tampered_commitment");
329        let mut repairing_chunkset = RepairingChunkSet::new(0, tampered_commitment);
330
331        // Get a valid chunk from the original chunkset
332        let valid_chunk = chunkset.get_chunk(0).unwrap();
333
334        // Adding this valid chunk to a repairing_chunkset with a tampered commitment should fail
335        assert_eq!(
336            repairing_chunkset.add_chunk(valid_chunk).unwrap_err(),
337            DecdsError::InvalidProofInChunk(valid_chunk.get_chunkset_id())
338        );
339    }
340
341    #[test]
342    fn test_repairing_chunkset_add_chunk_unvalidated_invalid_chunk_metadata() {
343        let mut rng = rand::rng();
344
345        let data = (0..ChunkSet::BYTE_LENGTH).map(|_| rng.random()).collect::<Vec<u8>>();
346        let chunkset = ChunkSet::new(0, data).expect("Must be able to build erasure-coded ChunkSet");
347
348        let chunk_from_chunkset_0 = chunkset.get_chunk(0).unwrap();
349
350        // Create a repairing chunkset for a different ID (e.g., ID 1 instead of 0)
351        let mut repairing_chunkset = RepairingChunkSet::new(1, chunkset.get_root_commitment());
352
353        // Attempt to add a chunk that belongs to chunkset_id 0 to a repairing_chunkset for chunkset_id 1
354        assert_eq!(
355            repairing_chunkset.add_chunk_unvalidated(chunk_from_chunkset_0).unwrap_err(),
356            DecdsError::InvalidChunkMetadata(chunk_from_chunkset_0.get_chunkset_id())
357        );
358    }
359
360    #[test]
361    fn test_repairing_chunkset_repair_when_not_ready() {
362        let mut rng = rand::rng();
363
364        let data = (0..ChunkSet::BYTE_LENGTH).map(|_| rng.random()).collect::<Vec<u8>>();
365        let chunkset = ChunkSet::new(0, data).expect("Must be able to build erasure-coded ChunkSet");
366        let mut repairing_chunkset = RepairingChunkSet::new(0, chunkset.get_root_commitment());
367
368        // Add fewer than NUM_ORIGINAL_CHUNKS chunks
369        let mut chunk_idx = 0;
370        let mut useful_count = 0;
371
372        while chunk_idx < ChunkSet::NUM_ERASURE_CODED_CHUNKS {
373            let chunk = chunkset.get_chunk(chunk_idx).expect("Must be able to lookup chunk by id");
374            if let Ok(_) = repairing_chunkset.add_chunk(chunk) {
375                useful_count += 1;
376            }
377
378            chunk_idx += 1;
379            if useful_count == ChunkSet::NUM_ORIGINAL_CHUNKS - 1 {
380                break;
381            }
382        }
383
384        assert!(!repairing_chunkset.is_ready_to_repair());
385        assert_eq!(repairing_chunkset.repair(), Err(DecdsError::ChunksetNotYetReadyToRepair(0)));
386    }
387
388    #[test]
389    fn test_repairing_chunkset_add_chunk_after_ready_to_repair() {
390        let mut rng = rand::rng();
391
392        let data = (0..ChunkSet::BYTE_LENGTH).map(|_| rng.random()).collect::<Vec<u8>>();
393        let chunkset = ChunkSet::new(0, data.clone()).expect("Must be able to build erasure-coded ChunkSet");
394        let mut repairing_chunkset = RepairingChunkSet::new(0, chunkset.get_root_commitment());
395
396        let mut chunk_idx = 0;
397        while !repairing_chunkset.is_ready_to_repair() && chunk_idx < ChunkSet::NUM_ERASURE_CODED_CHUNKS {
398            let chunk = chunkset.get_chunk(chunk_idx).expect("Must be able to lookup chunk by id");
399            let _ = repairing_chunkset.add_chunk(chunk);
400
401            chunk_idx += 1;
402        }
403
404        while chunk_idx < ChunkSet::NUM_ERASURE_CODED_CHUNKS {
405            let chunk = chunkset.get_chunk(chunk_idx).expect("Must be able to lookup chunk by id");
406            assert_eq!(repairing_chunkset.add_chunk(chunk), Err(DecdsError::ChunksetReadyToRepair(0)));
407
408            chunk_idx += 1;
409        }
410
411        let repaired_chunkset = repairing_chunkset.repair().expect("Must be able to repair chunkset");
412        assert_eq!(repaired_chunkset, data);
413    }
414}