decds_lib/
blob.rs

1use crate::{
2    RepairingChunkSet,
3    chunk::{self, ProofCarryingChunk},
4    chunkset::{self, ChunkSet},
5    consts::DECDS_BINCODE_CONFIG,
6    errors::DecdsError,
7    merkle_tree::MerkleTree,
8};
9use blake3;
10use rayon::prelude::*;
11use serde::{Deserialize, Serialize};
12use std::{collections::HashMap, ops::RangeBounds, usize};
13
14/// Represents the header of a `Blob`, containing essential metadata about the blob's
15/// structure and cryptographic commitments. This is essentially what is used during
16/// validity checking and repairing of erasure-coded chunks.
17#[derive(Clone, Serialize, Deserialize, Debug, PartialEq)]
18pub struct BlobHeader {
19    byte_length: usize,
20    num_chunksets: usize,
21    digest: blake3::Hash,
22    root_commitment: blake3::Hash,
23    chunkset_root_commitments: Vec<blake3::Hash>,
24}
25
26impl BlobHeader {
27    /// Returns the original byte length of the blob data before padding.
28    pub fn get_blob_size(&self) -> usize {
29        self.byte_length
30    }
31
32    /// Returns the total number of chunksets that comprise the blob.
33    pub fn get_num_chunksets(&self) -> usize {
34        self.num_chunksets
35    }
36
37    /// Returns the total number of erasure-coded chunks across all chunksets in the blob.
38    pub fn get_num_chunks(&self) -> usize {
39        self.get_num_chunksets() * chunkset::ChunkSet::NUM_ERASURE_CODED_CHUNKS
40    }
41
42    /// Returns the BLAKE3 digest of the original, unpadded blob data.
43    pub fn get_blob_digest(&self) -> blake3::Hash {
44        self.digest
45    }
46
47    /// Returns the Merkle root commitment of the entire blob.
48    ///
49    /// This commitment is derived from the Merkle tree of all chunksets in the blob.
50    pub fn get_root_commitment(&self) -> blake3::Hash {
51        self.root_commitment
52    }
53
54    /// Returns the Merkle root commitment of a specific chunkset within the blob.
55    ///
56    /// # Arguments
57    ///
58    /// * `chunkset_id` - The ID of the chunkset whose commitment is to be retrieved.
59    ///
60    /// # Returns
61    ///
62    /// Returns a `Result` which is:
63    /// - `Ok(blake3::Hash)` containing the root commitment of the specified chunkset if successful.
64    /// - `Err(DecdsError::InvalidChunksetId)` if `chunkset_id` is out of bounds.
65    pub fn get_chunkset_commitment(&self, chunkset_id: usize) -> Result<blake3::Hash, DecdsError> {
66        self.chunkset_root_commitments
67            .get(chunkset_id)
68            .and_then(|&v| Some(v))
69            .ok_or(DecdsError::InvalidChunksetId(chunkset_id, self.get_num_chunksets()))
70    }
71
72    /// Calculates the effective byte length of a specific chunkset within the blob.
73    /// This accounts for the last chunkset potentially being smaller than `ChunkSet::BYTE_LENGTH`.
74    ///
75    /// # Arguments
76    ///
77    /// * `chunkset_id` - The ID of the chunkset whose size is to be determined.
78    ///
79    /// # Returns
80    ///
81    /// Returns a `Result` which is:
82    /// - `Ok(usize)` containing the effective byte length of the chunkset if successful.
83    /// - `Err(DecdsError::InvalidChunksetId)` if `chunkset_id` is out of bounds.
84    pub fn get_chunkset_size(&self, chunkset_id: usize) -> Result<usize, DecdsError> {
85        if chunkset_id < self.get_num_chunksets() {
86            let from = chunkset_id * ChunkSet::BYTE_LENGTH;
87            let to = (from + ChunkSet::BYTE_LENGTH).min(self.get_blob_size());
88            let effective_len = to - from;
89
90            Ok(effective_len)
91        } else {
92            Err(DecdsError::InvalidChunksetId(chunkset_id, self.get_num_chunksets()))
93        }
94    }
95
96    /// Returns the full byte range `[start, end)` of a specific chunkset as it would appear
97    /// in the zero-padded blob data.
98    ///
99    /// # Arguments
100    ///
101    /// * `chunkset_id` - The ID of the chunkset whose byte range is to be retrieved.
102    ///
103    /// # Returns
104    ///
105    /// Returns a `Result` which is:
106    /// - `Ok((usize, usize))` containing a tuple `[start_byte_idx, end_byte_idx)` if successful.
107    /// - `Err(DecdsError::InvalidChunksetId)` if `chunkset_id` is out of bounds.
108    pub fn get_byte_range_for_chunkset(&self, chunkset_id: usize) -> Result<(usize, usize), DecdsError> {
109        if chunkset_id < self.get_num_chunksets() {
110            let from = chunkset_id * ChunkSet::BYTE_LENGTH;
111            let to = (from + ChunkSet::BYTE_LENGTH).min(self.get_blob_size());
112
113            Ok((from, to))
114        } else {
115            Err(DecdsError::InvalidChunksetId(chunkset_id, self.get_num_chunksets()))
116        }
117    }
118
119    /// Determines the IDs of all chunksets that overlap with a given byte range within the blob.
120    ///
121    /// # Arguments
122    ///
123    /// * `byte_range` - A range `impl RangeBounds<usize>` specifying the byte range.
124    ///
125    /// # Returns
126    ///
127    /// Returns a `Result` which is:
128    /// - `Ok(Vec<usize>)` containing a vector of chunkset IDs if successful.
129    /// - `Err(DecdsError::InvalidStartBound)` if the start bound of the range is not valid.
130    /// - `Err(DecdsError::InvalidEndBound)` if the end bound of the range is not valid (e.g., 0 for an `Excluded` bound or `usize::MAX`).
131    /// - `Err(DecdsError::InvalidChunksetId)` if the calculated `end_chunkset_id` is out of bounds.
132    pub fn get_chunkset_ids_for_byte_range(&self, byte_range: impl RangeBounds<usize>) -> Result<Vec<usize>, DecdsError> {
133        let start = match byte_range.start_bound() {
134            std::ops::Bound::Unbounded => 0,
135            std::ops::Bound::Included(&x) => x,
136            _ => return Err(DecdsError::InvalidStartBound),
137        };
138
139        let end = match byte_range.end_bound() {
140            std::ops::Bound::Included(&x) => x,
141            std::ops::Bound::Excluded(&x) => {
142                if x == 0 {
143                    return Err(DecdsError::InvalidEndBound(x));
144                }
145
146                x - 1
147            }
148            _ => return Err(DecdsError::InvalidEndBound(usize::MAX)),
149        };
150
151        let start_chunkset_id = start / ChunkSet::BYTE_LENGTH;
152        let end_chunkset_id = end / ChunkSet::BYTE_LENGTH;
153
154        if end_chunkset_id >= self.get_num_chunksets() {
155            return Err(DecdsError::InvalidChunksetId(end_chunkset_id, self.get_num_chunksets()));
156        }
157
158        Ok((start_chunkset_id..=end_chunkset_id).collect())
159    }
160
161    /// Serializes the `BlobHeader` into a vector of bytes using `bincode`.
162    ///
163    /// # Returns
164    ///
165    /// Returns a `Result` which is:
166    /// - `Ok(Vec<u8>)` containing the serialized bytes if successful.
167    /// - `Err(DecdsError::BlobHeaderSerializationFailed)` if `bincode` serialization fails.
168    pub fn to_bytes(&self) -> Result<Vec<u8>, DecdsError> {
169        bincode::serde::encode_to_vec(self, DECDS_BINCODE_CONFIG).map_err(|err| DecdsError::BlobHeaderSerializationFailed(err.to_string()))
170    }
171
172    /// Deserializes a `BlobHeader` from a byte slice using `bincode`.
173    ///
174    /// # Arguments
175    ///
176    /// * `bytes` - The byte slice from which to deserialize the header.
177    ///
178    /// # Returns
179    ///
180    /// Returns a `Result` which is:
181    /// - `Ok((Self, usize))` containing the deserialized `BlobHeader` and the number of bytes read if successful.
182    /// - `Err(DecdsError::BlobHeaderDeserializationFailed)` if `bincode` deserialization fails, or if the number
183    ///   of chunksets in the header does not match the number of root commitments.
184    pub fn from_bytes(bytes: &[u8]) -> Result<(Self, usize), DecdsError> {
185        match bincode::serde::decode_from_slice::<BlobHeader, bincode::config::Configuration>(bytes, DECDS_BINCODE_CONFIG) {
186            Ok((header, n)) => {
187                if header.num_chunksets != header.chunkset_root_commitments.len() {
188                    return Err(DecdsError::BlobHeaderDeserializationFailed(
189                        "number of chunksets and root commitments do not match".to_string(),
190                    ));
191                }
192
193                Ok((header, n))
194            }
195            Err(err) => Err(DecdsError::BlobHeaderDeserializationFailed(err.to_string())),
196        }
197    }
198
199    /// Validates a `ProofCarryingChunk` against the `BlobHeader`'s commitments.
200    ///
201    /// This checks if the chunk is correctly included in the blob (via blob root commitment)
202    /// and its respective chunkset (via chunkset root commitment).
203    ///
204    /// # Arguments
205    ///
206    /// * `chunk` - A reference to the `ProofCarryingChunk` to validate.
207    ///
208    /// # Returns
209    ///
210    /// Returns `true` if the chunk is valid and its proofs are consistent with the blob header, `false` otherwise.
211    pub fn validate_chunk(&self, chunk: &chunk::ProofCarryingChunk) -> bool {
212        chunk.validate_inclusion_in_blob(self.root_commitment)
213            && (chunk.get_chunkset_id() < self.num_chunksets)
214            && chunk.validate_inclusion_in_chunkset(self.chunkset_root_commitments[chunk.get_chunkset_id()])
215    }
216}
217
218/// `BlobBuilder` provides an incremental way to construct a `Blob` from a stream of data.
219///
220/// This builder handles the division of input data into fixed-size `ChunkSet`s, prepares RLNC-based erasure-coded chunks,
221/// computes BLAKE3 digest of blob, and generates Merkle inclusion (in respective `Chunkset`s) proof for proof-carrying chunks.
222pub struct BlobBuilder {
223    hasher: blake3::Hasher,
224    num_bytes_absorbed: usize,
225    num_chunksets: usize,
226    offset: usize,
227    buffer: Vec<u8>,
228    chunkset_root_commitments: Vec<blake3::Hash>,
229}
230
231impl BlobBuilder {
232    /// Initializes a new `BlobBuilder` - ready to build a blob.
233    pub fn init() -> Self {
234        BlobBuilder {
235            hasher: blake3::Hasher::new(),
236            num_bytes_absorbed: 0,
237            num_chunksets: 0,
238            offset: 0,
239            buffer: vec![0u8; ChunkSet::BYTE_LENGTH],
240            chunkset_root_commitments: vec![],
241        }
242    }
243
244    pub fn num_bytes_absorbed_so_far(&self) -> usize {
245        self.num_bytes_absorbed
246    }
247
248    /// Updates the `BlobBuilder` with new data.
249    ///
250    /// This method absorbs the provided `data` into the internal buffer. If enough
251    /// data accumulates to form a complete `ChunkSet`, it is processed (erasure-coded,
252    /// Merkle-proofed) and its resulting `ProofCarryingChunk`s are returned.
253    ///
254    /// You can call this method arbitrary number of times, before calling `Self::finalize`.
255    /// Note, you must call this method atleast once with non-empty input data, to not get
256    /// an error from `Self::finalize` - as you can't build a blob over empty input data.
257    ///
258    /// # Arguments
259    ///
260    /// * `data` - A byte slice containing the new data to be processed.
261    ///
262    /// # Returns
263    ///
264    /// An `Option<Vec<ProofCarryingChunk>>`.
265    /// - `Some(Vec<ProofCarryingChunk>)` if one or more `ChunkSet`s were completed and their chunks generated. These chunks carry Merkle proof-of-inclusion in respective Chunkset.
266    /// - `None` if no complete `ChunkSet` was formed or if the input `data` was empty.
267    pub fn update(&mut self, data: &[u8]) -> Option<Vec<ProofCarryingChunk>> {
268        if data.is_empty() {
269            return None;
270        }
271
272        self.hasher.update(data);
273        self.num_bytes_absorbed += data.len();
274
275        let total_num_bytes = self.offset + data.len();
276        let num_chunksets = total_num_bytes / ChunkSet::BYTE_LENGTH;
277
278        if num_chunksets == 0 {
279            self.buffer[self.offset..total_num_bytes].copy_from_slice(data);
280            self.offset = total_num_bytes;
281
282            return None;
283        } else {
284            let remaining_num_bytes = total_num_bytes - num_chunksets * ChunkSet::BYTE_LENGTH;
285            let dont_use_from_idx = data.len() - remaining_num_bytes;
286
287            let mut chunks = Vec::with_capacity(num_chunksets * ChunkSet::NUM_ERASURE_CODED_CHUNKS);
288
289            if num_chunksets == 1 {
290                self.buffer[self.offset..].copy_from_slice(&data[..dont_use_from_idx]);
291
292                let chunkset_id = self.num_chunksets;
293                let owned_buffer = std::mem::replace(&mut self.buffer, vec![0u8; ChunkSet::BYTE_LENGTH]);
294                let chunkset = unsafe { chunkset::ChunkSet::new(chunkset_id, owned_buffer).unwrap_unchecked() };
295
296                chunks.extend((0..ChunkSet::NUM_ERASURE_CODED_CHUNKS).map(|chunk_id| unsafe { chunkset.get_chunk(chunk_id).unwrap_unchecked().clone() }));
297                self.chunkset_root_commitments.push(chunkset.get_root_commitment());
298
299                self.num_chunksets += 1;
300            } else {
301                let mut working_mem = vec![0u8; num_chunksets * ChunkSet::BYTE_LENGTH];
302                working_mem[..self.offset].copy_from_slice(&self.buffer[..self.offset]);
303                working_mem[self.offset..].copy_from_slice(&data[..dont_use_from_idx]);
304
305                let mut chunkset_root_commitments = Vec::with_capacity(num_chunksets);
306                let mut nested_chunks: Vec<Vec<ProofCarryingChunk>> = Vec::with_capacity(num_chunksets);
307
308                working_mem
309                    .par_chunks_exact(ChunkSet::BYTE_LENGTH)
310                    .enumerate()
311                    .map(|(data_chunk_idx, data_chunk)| {
312                        let chunkset_id = self.num_chunksets + data_chunk_idx;
313                        let chunkset = unsafe { chunkset::ChunkSet::new(chunkset_id, data_chunk.to_vec()).unwrap_unchecked() };
314
315                        (
316                            chunkset.get_root_commitment(),
317                            (0..ChunkSet::NUM_ERASURE_CODED_CHUNKS)
318                                .map(|chunk_id| unsafe { chunkset.get_chunk(chunk_id).unwrap_unchecked().clone() })
319                                .collect(),
320                        )
321                    })
322                    .unzip_into_vecs(&mut chunkset_root_commitments, &mut nested_chunks);
323
324                self.chunkset_root_commitments.append(&mut chunkset_root_commitments);
325                chunks.extend(nested_chunks.into_iter().flatten());
326
327                self.num_chunksets += num_chunksets;
328            }
329
330            if remaining_num_bytes > 0 {
331                self.buffer[..remaining_num_bytes].copy_from_slice(&data[dont_use_from_idx..]);
332                self.offset = remaining_num_bytes;
333            }
334
335            Some(chunks)
336        }
337    }
338
339    /// Finalizes the `BlobBuilder`, processing any remaining buffered data
340    /// and constructing the `BlobHeader`.
341    ///
342    /// This method pads any incomplete `ChunkSet` in the buffer with zeros,
343    /// processes it, computes the final blob digest, and builds the top-level
344    /// Merkle tree over chunkset root commitments, yielding the `BlobHeader`
345    /// and at max 16 proof-carrying chunks, if there was an incomplete chunkset,
346    /// which needed to be built.
347    ///
348    /// # Returns
349    ///
350    /// Returns a `Result` which is:
351    /// - `Ok((Vec<ProofCarryingChunk>, BlobHeader))` containing either 0 or 16 `ProofCarryingChunk`s from last chunkset and the `BlobHeader` for the complete blob.
352    /// - `Err(DecdsError::EmptyDataForBlob)` if no data was ever absorbed by the builder.
353    /// - Other `DecdsError` types may be returned from underlying `MerkleTree::new` calls.
354    pub fn finalize(mut self) -> Result<(Vec<ProofCarryingChunk>, BlobHeader), DecdsError> {
355        if self.num_bytes_absorbed == 0 {
356            return Err(DecdsError::EmptyDataForBlob);
357        }
358
359        let chunks = if self.offset != 0 {
360            self.buffer[self.offset..].fill(0);
361
362            let chunkset_id = self.num_chunksets;
363            let chunkset = unsafe { chunkset::ChunkSet::new(chunkset_id, self.buffer).unwrap_unchecked() };
364
365            self.chunkset_root_commitments.push(chunkset.get_root_commitment());
366            self.num_chunksets += 1;
367
368            (0..ChunkSet::NUM_ERASURE_CODED_CHUNKS)
369                .map(|chunk_id| unsafe { chunkset.get_chunk(chunk_id).unwrap_unchecked().clone() })
370                .collect()
371        } else {
372            Vec::new()
373        };
374
375        let blob_digest = self.hasher.finalize();
376
377        let merkle_tree = MerkleTree::new(self.chunkset_root_commitments.clone())?;
378        let blob_root_commitment = merkle_tree.get_root_commitment();
379
380        Ok((
381            chunks,
382            BlobHeader {
383                byte_length: self.num_bytes_absorbed,
384                num_chunksets: self.num_chunksets,
385                digest: blob_digest,
386                root_commitment: blob_root_commitment,
387                chunkset_root_commitments: self.chunkset_root_commitments,
388            },
389        ))
390    }
391}
392
393/// Represents a blob that is in the process of being incrementally repaired or reconstructed
394/// from received `ProofCarryingChunk`s.
395pub struct RepairingBlob {
396    header: BlobHeader,
397    body: HashMap<usize, Option<chunkset::RepairingChunkSet>>,
398}
399
400impl RepairingBlob {
401    /// Creates a new `RepairingBlob` instance from a `BlobHeader`.
402    ///
403    /// This initializes an empty `RepairingChunkSet` for each chunkset indicated in the header,
404    /// ready to receive chunks for repair.
405    ///
406    /// # Arguments
407    ///
408    /// * `header` - The `BlobHeader` of the blob to be repaired. This header provides the necessary
409    ///   metadata, including chunkset commitments, for the repair process.
410    ///
411    /// # Returns
412    ///
413    /// A new `RepairingBlob` instance, prepared to accept chunks for reconstruction.
414    pub fn new(header: BlobHeader) -> Self {
415        RepairingBlob {
416            body: HashMap::from_iter((0..header.get_num_chunksets()).map(|chunkset_id| {
417                (
418                    chunkset_id,
419                    Some(RepairingChunkSet::new(chunkset_id, unsafe {
420                        header.get_chunkset_commitment(chunkset_id).unwrap_unchecked()
421                    })),
422                )
423            })),
424            header: header,
425        }
426    }
427
428    /// Adds a `ProofCarryingChunk` to the appropriate `RepairingChunkSet` within the blob.
429    ///
430    /// This method first validates the chunk's inclusion using the blob header, then attempts
431    /// to add it to the relevant chunkset's decoder.
432    ///
433    /// # Arguments
434    ///
435    /// * `chunk` - A reference to the `ProofCarryingChunk` to add.
436    ///
437    /// # Returns
438    ///
439    /// Returns a `Result` which is:
440    /// - `Ok(())` if the chunk is successfully added.
441    /// - `Err(DecdsError::InvalidChunksetId)` if the chunk's `chunkset_id` does not exist in this blob.
442    /// - `Err(DecdsError::ChunksetAlreadyRepaired)` if the target chunkset has already been repaired.
443    /// - `Err(DecdsError::InvalidProofInChunk)` if the chunk's proof of inclusion in the blob or chunkset is invalid.
444    /// - `Err(DecdsError::ChunksetReadyToRepair)` if the chunkset is already ready to repair (and thus cannot accept more chunks).
445    /// - Other `DecdsError` types may be returned from `RepairingChunkSet::add_chunk_unvalidated`.
446    pub fn add_chunk(&mut self, chunk: &chunk::ProofCarryingChunk) -> Result<(), DecdsError> {
447        let chunkset_id = chunk.get_chunkset_id();
448
449        match self
450            .body
451            .get_mut(&chunkset_id)
452            .ok_or(DecdsError::InvalidChunksetId(chunkset_id, self.header.get_num_chunksets()))?
453        {
454            Some(chunkset) => {
455                if self.header.validate_chunk(chunk) {
456                    if !chunkset.is_ready_to_repair() {
457                        chunkset.add_chunk_unvalidated(chunk)
458                    } else {
459                        Err(DecdsError::ChunksetReadyToRepair(chunkset_id))
460                    }
461                } else {
462                    Err(DecdsError::InvalidProofInChunk(chunkset_id))
463                }
464            }
465            None => Err(DecdsError::ChunksetAlreadyRepaired(chunkset_id)),
466        }
467    }
468
469    /// Checks if a specific chunkset within the blob is ready to be repaired (reconstructed).
470    ///
471    /// # Arguments
472    ///
473    /// * `chunkset_id` - The ID of the chunkset to check.
474    ///
475    /// # Returns
476    ///
477    /// Returns a `Result` which is:
478    /// - `Ok(bool)`: `true` if the chunkset is ready for repair, `false` otherwise.
479    /// - `Err(DecdsError::InvalidChunksetId)` if `chunkset_id` is out of bounds.
480    pub fn is_chunkset_ready_to_repair(&self, chunkset_id: usize) -> Result<bool, DecdsError> {
481        Ok(self
482            .body
483            .get(&chunkset_id)
484            .ok_or(DecdsError::InvalidChunksetId(chunkset_id, self.header.get_num_chunksets()))?
485            .as_ref()
486            .is_some_and(|x| x.is_ready_to_repair()))
487    }
488
489    /// Checks if a specific chunkset within the blob has already been successfully repaired.
490    ///
491    /// # Arguments
492    ///
493    /// * `chunkset_id` - The ID of the chunkset to check.
494    ///
495    /// # Returns
496    ///
497    /// Returns a `Result` which is:
498    /// - `Ok(bool)`: `true` if the chunkset has already been repaired, `false` otherwise.
499    /// - `Err(DecdsError::InvalidChunksetId)` if `chunkset_id` is out of bounds.
500    pub fn is_chunkset_already_repaired(&self, chunkset_id: usize) -> Result<bool, DecdsError> {
501        Ok(self
502            .body
503            .get(&chunkset_id)
504            .ok_or(DecdsError::InvalidChunksetId(chunkset_id, self.header.get_num_chunksets()))?
505            .is_none())
506    }
507
508    /// Retrieves the repaired (reconstructed) data for a specific chunkset.
509    /// This method consumes the `RepairingChunkSet` for the given ID once successful,
510    /// as the data is fully reconstructed.
511    ///
512    /// # Arguments
513    ///
514    /// * `chunkset_id` - The ID of the chunkset to retrieve repaired data for.
515    ///
516    /// # Returns
517    ///
518    /// Returns a `Result` which is:
519    /// - `Ok(Vec<u8>)` containing the repaired chunkset data if successful.
520    /// - `Err(DecdsError::ChunksetAlreadyRepaired)` if the chunkset has already been repaired and retrieved.
521    /// - `Err(DecdsError::ChunksetNotYetReadyToRepair)` if not enough chunks have been added to repair the chunkset.
522    /// - `Err(DecdsError::InvalidChunksetId)` if `chunkset_id` is out of bounds.
523    /// - `Err(DecdsError::ChunksetRepairingFailed)` if an error occurs during the underlying chunkset repair process.
524    pub fn get_repaired_chunkset(&mut self, chunkset_id: usize) -> Result<Vec<u8>, DecdsError> {
525        self.is_chunkset_already_repaired(chunkset_id).and_then(|yes| {
526            if yes {
527                Err(DecdsError::ChunksetAlreadyRepaired(chunkset_id))
528            } else {
529                self.is_chunkset_ready_to_repair(chunkset_id).and_then(|yes| unsafe {
530                    if yes {
531                        self.body
532                            .insert(chunkset_id, None)
533                            .unwrap_unchecked()
534                            .unwrap_unchecked()
535                            .repair()
536                            .map(|mut repaired| {
537                                repaired.truncate(self.header.get_chunkset_size(chunkset_id).unwrap_unchecked());
538                                repaired
539                            })
540                    } else {
541                        Err(DecdsError::ChunksetNotYetReadyToRepair(chunkset_id))
542                    }
543                })
544            }
545        })
546    }
547}
548
549#[cfg(test)]
550mod tests {
551    use crate::{BlobHeader, RepairingBlob, blob::BlobBuilder, chunkset::ChunkSet, errors::DecdsError, merkle_tree::MerkleTree};
552    use blake3;
553    use rand::Rng;
554    use rayon::prelude::*;
555    use std::collections::HashMap;
556
557    #[test]
558    fn prop_test_blob_preparation_and_commitment_works() {
559        const NUM_TEST_ITERATIONS: usize = 10;
560
561        const MIN_BLOB_DATA_BYTE_LEN: usize = 1usize;
562        const MAX_BLOB_DATA_BYTE_LEN: usize = 1usize << 30;
563
564        let mut rng = rand::rng();
565
566        (0..NUM_TEST_ITERATIONS).for_each(|_| {
567            let blob_byte_len = rng.random_range(MIN_BLOB_DATA_BYTE_LEN..=MAX_BLOB_DATA_BYTE_LEN);
568            let blob_data = (0..blob_byte_len).map(|_| rng.random()).collect::<Vec<u8>>();
569
570            let (mut chunks, blob_header) = {
571                let mut all_chunks = Vec::new();
572
573                let mut blob_builder = BlobBuilder::init();
574                if let Some(chunks) = blob_builder.update(&blob_data) {
575                    all_chunks.extend(chunks);
576                }
577
578                let (chunks, blob_header) = blob_builder.finalize().expect("Must be able to prepare blob");
579                all_chunks.extend(chunks);
580
581                (all_chunks, blob_header)
582            };
583
584            let chunkset_root_commitments = (0..blob_header.get_num_chunksets())
585                .map(|chunkset_id| unsafe { blob_header.get_chunkset_commitment(chunkset_id).unwrap_unchecked() })
586                .collect();
587
588            let merkle_tree = MerkleTree::new(chunkset_root_commitments).expect("Must be able to build Merkle tree");
589            let merkle_proofs = (0..blob_header.get_num_chunksets())
590                .into_par_iter()
591                .map(|chunkset_id| unsafe { (chunkset_id, merkle_tree.generate_proof(chunkset_id).unwrap_unchecked()) })
592                .collect::<HashMap<usize, Vec<blake3::Hash>>>();
593
594            chunks.par_iter_mut().for_each(|chunk| {
595                chunk.append_proof_to_blob_root(&merkle_proofs[&chunk.get_chunkset_id()]);
596            });
597
598            assert!(chunks.iter().all(|chunk| { blob_header.validate_chunk(chunk) }));
599        });
600    }
601
602    #[test]
603    fn test_get_chunkset_commitment() {
604        let mut rng = rand::rng();
605
606        let blob_byte_len = ChunkSet::BYTE_LENGTH * 2 + ChunkSet::BYTE_LENGTH / 2;
607        let blob_data = (0..blob_byte_len).map(|_| rng.random()).collect::<Vec<u8>>();
608
609        let (_, header) = {
610            let mut all_chunks = Vec::new();
611
612            let mut blob_builder = BlobBuilder::init();
613            if let Some(chunks) = blob_builder.update(&blob_data) {
614                all_chunks.extend(chunks);
615            }
616
617            let (chunks, header) = blob_builder.finalize().expect("Must be able to prepare blob");
618            all_chunks.extend(chunks);
619
620            (all_chunks, header)
621        };
622
623        // Valid chunkset ID
624        let commitment = header.get_chunkset_commitment(0);
625        assert!(commitment.is_ok());
626
627        let commitment = header.get_chunkset_commitment(1);
628        assert!(commitment.is_ok());
629
630        // Invalid chunkset ID
631        let err = header.get_chunkset_commitment(header.get_num_chunksets());
632        assert_eq!(err, Err(DecdsError::InvalidChunksetId(header.get_num_chunksets(), header.get_num_chunksets())));
633    }
634
635    #[test]
636    fn test_get_chunkset_size() {
637        let mut rng = rand::rng();
638
639        // Blob size: 2.5 chunksets -> 2 full, 1 half
640        let blob_byte_len = ChunkSet::BYTE_LENGTH * 2 + ChunkSet::BYTE_LENGTH / 2;
641        let blob_data = (0..blob_byte_len).map(|_| rng.random()).collect::<Vec<u8>>();
642
643        let (_, header) = {
644            let mut all_chunks = Vec::new();
645
646            let mut blob_builder = BlobBuilder::init();
647            if let Some(chunks) = blob_builder.update(&blob_data) {
648                all_chunks.extend(chunks);
649            }
650
651            let (chunks, header) = blob_builder.finalize().expect("Must be able to prepare blob");
652            all_chunks.extend(chunks);
653
654            (all_chunks, header)
655        };
656
657        // Full chunkset
658        assert_eq!(header.get_chunkset_size(0).unwrap(), ChunkSet::BYTE_LENGTH);
659        assert_eq!(header.get_chunkset_size(1).unwrap(), ChunkSet::BYTE_LENGTH);
660
661        // Partial chunkset
662        assert_eq!(header.get_chunkset_size(2).unwrap(), ChunkSet::BYTE_LENGTH / 2);
663
664        // Invalid chunkset ID
665        assert_eq!(
666            header.get_chunkset_size(header.get_num_chunksets()).unwrap_err(),
667            DecdsError::InvalidChunksetId(header.get_num_chunksets(), header.get_num_chunksets())
668        );
669    }
670
671    #[test]
672    fn test_get_byte_range_for_chunkset() {
673        let mut rng = rand::rng();
674
675        let blob_byte_len = ChunkSet::BYTE_LENGTH * 2 + ChunkSet::BYTE_LENGTH / 2;
676        let blob_data = (0..blob_byte_len).map(|_| rng.random()).collect::<Vec<u8>>();
677
678        let (_, header) = {
679            let mut all_chunks = Vec::new();
680
681            let mut blob_builder = BlobBuilder::init();
682            if let Some(chunks) = blob_builder.update(&blob_data) {
683                all_chunks.extend(chunks);
684            }
685
686            let (chunks, header) = blob_builder.finalize().expect("Must be able to prepare blob");
687            all_chunks.extend(chunks);
688
689            (all_chunks, header)
690        };
691
692        // First chunkset
693        assert_eq!(header.get_byte_range_for_chunkset(0).unwrap(), (0, ChunkSet::BYTE_LENGTH));
694
695        // Second chunkset
696        assert_eq!(
697            header.get_byte_range_for_chunkset(1).unwrap(),
698            (ChunkSet::BYTE_LENGTH, ChunkSet::BYTE_LENGTH * 2)
699        );
700
701        // Last (partial) chunkset
702        assert_eq!(header.get_byte_range_for_chunkset(2).unwrap(), (ChunkSet::BYTE_LENGTH * 2, blob_byte_len));
703
704        // Invalid chunkset ID
705        assert_eq!(
706            header.get_byte_range_for_chunkset(header.get_num_chunksets()).unwrap_err(),
707            DecdsError::InvalidChunksetId(header.get_num_chunksets(), header.get_num_chunksets())
708        );
709    }
710
711    #[test]
712    fn test_get_chunkset_ids_for_byte_range() {
713        let mut rng = rand::rng();
714
715        let blob_byte_len = ChunkSet::BYTE_LENGTH * 2 + ChunkSet::BYTE_LENGTH / 2;
716        let blob_data = (0..blob_byte_len).map(|_| rng.random()).collect::<Vec<u8>>();
717
718        let (_, header) = {
719            let mut all_chunks = Vec::new();
720
721            let mut blob_builder = BlobBuilder::init();
722            if let Some(chunks) = blob_builder.update(&blob_data) {
723                all_chunks.extend(chunks);
724            }
725
726            let (chunks, header) = blob_builder.finalize().expect("Must be able to prepare blob");
727            all_chunks.extend(chunks);
728
729            (all_chunks, header)
730        };
731
732        // Range within a single chunkset
733        assert_eq!(header.get_chunkset_ids_for_byte_range(0..10).unwrap(), vec![0]);
734        assert_eq!(
735            header
736                .get_chunkset_ids_for_byte_range(ChunkSet::BYTE_LENGTH + 10..ChunkSet::BYTE_LENGTH + 20)
737                .unwrap(),
738            vec![1]
739        );
740
741        // Range spanning multiple chunksets
742        assert_eq!(
743            header.get_chunkset_ids_for_byte_range(10..(ChunkSet::BYTE_LENGTH * 1 + 10)).unwrap(),
744            vec![0, 1]
745        );
746        assert_eq!(header.get_chunkset_ids_for_byte_range(10..blob_byte_len).unwrap(), vec![0, 1, 2]);
747
748        // Range exactly matching chunkset boundaries
749        assert_eq!(header.get_chunkset_ids_for_byte_range(0..ChunkSet::BYTE_LENGTH).unwrap(), vec![0]);
750        assert_eq!(header.get_chunkset_ids_for_byte_range(0..=(ChunkSet::BYTE_LENGTH - 1)).unwrap(), vec![0]);
751
752        // Edge cases for bounds
753        assert_eq!(header.get_chunkset_ids_for_byte_range(0..0).unwrap_err(), DecdsError::InvalidEndBound(0));
754        assert_eq!(header.get_chunkset_ids_for_byte_range(0..=0).unwrap(), vec![0]); // Covers first byte of first chunkset
755
756        // Invalid end bound (range beyond blob size)
757        let end_beyond_blob = header.get_blob_size() + ChunkSet::BYTE_LENGTH;
758        let expected_end_chunkset_id = end_beyond_blob.saturating_sub(1) / ChunkSet::BYTE_LENGTH;
759        assert_eq!(
760            header.get_chunkset_ids_for_byte_range(0..end_beyond_blob).unwrap_err(),
761            DecdsError::InvalidChunksetId(expected_end_chunkset_id, header.get_num_chunksets())
762        );
763
764        // Test for `InvalidEndBound(usize::MAX)` for unbounded ranges
765        assert_eq!(header.get_chunkset_ids_for_byte_range(..).unwrap_err(), DecdsError::InvalidEndBound(usize::MAX));
766        assert_eq!(
767            header.get_chunkset_ids_for_byte_range(0..).unwrap_err(),
768            DecdsError::InvalidEndBound(usize::MAX)
769        );
770    }
771
772    #[test]
773    fn test_blob_header_serialization_deserialization() {
774        let mut rng = rand::rng();
775
776        let blob_byte_len = ChunkSet::BYTE_LENGTH * 3;
777        let blob_data = (0..blob_byte_len).map(|_| rng.random()).collect::<Vec<u8>>();
778
779        let (_, original_header) = {
780            let mut all_chunks = Vec::new();
781
782            let mut blob_builder = BlobBuilder::init();
783            if let Some(chunks) = blob_builder.update(&blob_data) {
784                all_chunks.extend(chunks);
785            }
786
787            let (chunks, header) = blob_builder.finalize().expect("Must be able to prepare blob");
788            all_chunks.extend(chunks);
789
790            (all_chunks, header)
791        };
792
793        let serialized_header = original_header.to_bytes().expect("Header serialization failed");
794        let (deserialized_header, bytes_read) = BlobHeader::from_bytes(&serialized_header).expect("Header deserialization failed");
795
796        assert_eq!(original_header, deserialized_header);
797        assert_eq!(serialized_header.len(), bytes_read);
798
799        // Test deserialization failure with lesser bytes
800        assert!(BlobHeader::from_bytes(&serialized_header[..(serialized_header.len() / 2)]).is_err());
801    }
802
803    #[test]
804    fn test_blob_new_empty_data() {
805        assert_eq!(BlobBuilder::init().finalize().err(), Some(DecdsError::EmptyDataForBlob));
806    }
807
808    #[test]
809    fn test_repairing_blob_new() {
810        let mut rng = rand::rng();
811
812        let blob_byte_len = ChunkSet::BYTE_LENGTH * 2 + ChunkSet::BYTE_LENGTH / 2;
813        let blob_data: Vec<u8> = (0..blob_byte_len).map(|_| rng.random()).collect();
814
815        let (_, header) = {
816            let mut all_chunks = Vec::new();
817
818            let mut blob_builder = BlobBuilder::init();
819            if let Some(chunks) = blob_builder.update(&blob_data) {
820                all_chunks.extend(chunks);
821            }
822
823            let (chunks, header) = blob_builder.finalize().expect("Must be able to prepare blob");
824            all_chunks.extend(chunks);
825
826            (all_chunks, header)
827        };
828
829        let repairer = RepairingBlob::new(header.clone());
830
831        assert_eq!(repairer.header.get_blob_size(), header.get_blob_size());
832        assert_eq!(repairer.header.get_num_chunksets(), header.get_num_chunksets());
833        assert_eq!(repairer.body.len(), header.get_num_chunksets());
834
835        for i in 0..header.get_num_chunksets() {
836            assert!(repairer.body.get(&i).unwrap().is_some());
837
838            assert!(!repairer.is_chunkset_ready_to_repair(i).unwrap());
839            assert!(!repairer.is_chunkset_already_repaired(i).unwrap());
840        }
841    }
842
843    #[test]
844    fn test_repairing_blob_add_chunk() {
845        let mut rng = rand::rng();
846
847        let blob_data: Vec<u8> = (0..ChunkSet::BYTE_LENGTH * 2).map(|_| rng.random()).collect(); // Two full chunksets
848
849        let (mut chunks, blob_header) = {
850            let mut all_chunks = Vec::new();
851
852            let mut blob_builder = BlobBuilder::init();
853            if let Some(chunks) = blob_builder.update(&blob_data) {
854                all_chunks.extend(chunks);
855            }
856
857            let (chunks, header) = blob_builder.finalize().expect("Must be able to prepare blob");
858            all_chunks.extend(chunks);
859
860            (all_chunks, header)
861        };
862
863        let chunkset_root_commitments = (0..blob_header.get_num_chunksets())
864            .map(|chunkset_id| unsafe { blob_header.get_chunkset_commitment(chunkset_id).unwrap_unchecked() })
865            .collect();
866
867        let merkle_tree = MerkleTree::new(chunkset_root_commitments).expect("Must be able to build Merkle tree");
868        let merkle_proofs = (0..blob_header.get_num_chunksets())
869            .into_par_iter()
870            .map(|chunkset_id| unsafe { (chunkset_id, merkle_tree.generate_proof(chunkset_id).unwrap_unchecked()) })
871            .collect::<HashMap<usize, Vec<blake3::Hash>>>();
872
873        chunks.par_iter_mut().for_each(|chunk| {
874            chunk.append_proof_to_blob_root(&merkle_proofs[&chunk.get_chunkset_id()]);
875        });
876
877        let mut repairer = RepairingBlob::new(blob_header.clone());
878
879        // Test valid chunk addition
880        let chunk_to_add = &chunks[0];
881        assert!(repairer.add_chunk(chunk_to_add).is_ok());
882
883        // Simulate an invalid chunk proof by creating a new header with a different root commitment
884        let mut invalid_header = blob_header.clone();
885        invalid_header.root_commitment = blake3::hash(b"fake_root_commitment");
886
887        let mut repairer_invalid_header = RepairingBlob::new(invalid_header);
888        assert_eq!(
889            repairer_invalid_header.add_chunk(chunk_to_add).unwrap_err(),
890            DecdsError::InvalidProofInChunk(chunk_to_add.get_chunkset_id())
891        );
892
893        // Add enough chunks to make a chunkset ready for repair
894        let mut repairer_ready = RepairingBlob::new(blob_header.clone());
895        let chunkset_id = chunks[0].get_chunkset_id();
896
897        for chunk in &chunks {
898            if chunk.get_chunkset_id() == chunkset_id {
899                let _ = repairer_ready.add_chunk(chunk);
900
901                if repairer_ready.is_chunkset_ready_to_repair(chunkset_id).unwrap() {
902                    break;
903                }
904            }
905        }
906
907        assert!(repairer_ready.is_chunkset_ready_to_repair(chunkset_id).unwrap());
908
909        // Try adding another chunk to a chunkset already ready for repair
910        let extra_chunk = &chunks
911            .iter()
912            .find(|c| c.get_chunkset_id() == chunkset_id && c.get_global_chunk_id() != chunks[0].get_global_chunk_id())
913            .unwrap();
914
915        assert_eq!(
916            repairer_ready.add_chunk(extra_chunk).unwrap_err(),
917            DecdsError::ChunksetReadyToRepair(chunkset_id)
918        );
919
920        // Repair the chunkset, then try adding a chunk to it
921        repairer_ready.get_repaired_chunkset(chunkset_id).unwrap();
922
923        assert!(!repairer_ready.is_chunkset_ready_to_repair(chunkset_id).unwrap());
924        assert!(repairer_ready.is_chunkset_already_repaired(chunkset_id).unwrap());
925        assert_eq!(
926            repairer_ready.add_chunk(chunk_to_add).unwrap_err(),
927            DecdsError::ChunksetAlreadyRepaired(chunkset_id)
928        );
929    }
930
931    #[test]
932    fn test_repairing_blob_get_repaired_chunkset() {
933        let mut rng = rand::rng();
934
935        let blob_data: Vec<u8> = (0..(ChunkSet::BYTE_LENGTH * 2 + ChunkSet::BYTE_LENGTH / 2)).map(|_| rng.random()).collect();
936        let original_blob_data_copy = blob_data.clone();
937
938        let (mut chunks, blob_header) = {
939            let mut all_chunks = Vec::new();
940
941            let mut blob_builder = BlobBuilder::init();
942            if let Some(chunks) = blob_builder.update(&blob_data) {
943                all_chunks.extend(chunks);
944            }
945
946            let (chunks, header) = blob_builder.finalize().expect("Must be able to prepare blob");
947            all_chunks.extend(chunks);
948
949            (all_chunks, header)
950        };
951
952        let chunkset_root_commitments = (0..blob_header.get_num_chunksets())
953            .map(|chunkset_id| unsafe { blob_header.get_chunkset_commitment(chunkset_id).unwrap_unchecked() })
954            .collect();
955
956        let merkle_tree = MerkleTree::new(chunkset_root_commitments).expect("Must be able to build Merkle tree");
957        let merkle_proofs = (0..blob_header.get_num_chunksets())
958            .into_par_iter()
959            .map(|chunkset_id| unsafe { (chunkset_id, merkle_tree.generate_proof(chunkset_id).unwrap_unchecked()) })
960            .collect::<HashMap<usize, Vec<blake3::Hash>>>();
961
962        chunks.par_iter_mut().for_each(|chunk| {
963            chunk.append_proof_to_blob_root(&merkle_proofs[&chunk.get_chunkset_id()]);
964        });
965
966        let mut repairer = RepairingBlob::new(blob_header.clone());
967
968        // Test `ChunksetNotYetReadyToRepair`
969        let chunkset_id_0 = 0;
970        assert_eq!(
971            repairer.get_repaired_chunkset(chunkset_id_0).unwrap_err(),
972            DecdsError::ChunksetNotYetReadyToRepair(chunkset_id_0)
973        );
974
975        // Add enough chunks for the first chunkset
976        for chunk in &chunks {
977            if chunk.get_chunkset_id() == chunkset_id_0 {
978                let _ = repairer.add_chunk(chunk);
979
980                if repairer.is_chunkset_ready_to_repair(chunkset_id_0).unwrap() {
981                    break;
982                }
983            }
984        }
985        assert!(repairer.is_chunkset_ready_to_repair(chunkset_id_0).unwrap());
986
987        // Test successful repair
988        let repaired_data_0 = repairer.get_repaired_chunkset(chunkset_id_0).unwrap();
989        let expected_data_0 = original_blob_data_copy[0..ChunkSet::BYTE_LENGTH].to_vec();
990
991        assert_eq!(repaired_data_0, expected_data_0);
992        assert!(repairer.is_chunkset_already_repaired(chunkset_id_0).unwrap());
993
994        // Test `ChunksetAlreadyRepaired`
995        assert_eq!(
996            repairer.get_repaired_chunkset(chunkset_id_0).unwrap_err(),
997            DecdsError::ChunksetAlreadyRepaired(chunkset_id_0)
998        );
999
1000        // Test for a partial last chunkset
1001        let chunkset_id_2 = 2;
1002
1003        for chunk in &chunks {
1004            if chunk.get_chunkset_id() == chunkset_id_2 {
1005                let _ = repairer.add_chunk(chunk);
1006
1007                if repairer.is_chunkset_ready_to_repair(chunkset_id_2).unwrap() {
1008                    break;
1009                }
1010            }
1011        }
1012        assert!(repairer.is_chunkset_ready_to_repair(chunkset_id_2).unwrap());
1013
1014        let repaired_data_2 = repairer.get_repaired_chunkset(chunkset_id_2).unwrap();
1015        let expected_data_2 = original_blob_data_copy[ChunkSet::BYTE_LENGTH * 2..].to_vec();
1016        assert_eq!(repaired_data_2, expected_data_2);
1017
1018        // Test invalid chunkset ID
1019        let invalid_chunkset_id = blob_header.get_num_chunksets();
1020        assert_eq!(
1021            repairer.get_repaired_chunkset(invalid_chunkset_id).unwrap_err(),
1022            DecdsError::InvalidChunksetId(invalid_chunkset_id, blob_header.get_num_chunksets())
1023        );
1024    }
1025}