decds-lib 0.2.0

A Library for Distributed Erasure-Coded Data Storage System
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
use crate::{
    chunk::{self, Chunk},
    consts::DECDS_NUM_ERASURE_CODED_SHARES,
    errors::DecdsError,
    merkle_tree::MerkleTree,
};

/// Represents a fixed set (= 16) of erasure-coded chunks, along with its Merkle root commitment.
/// This structure is used for encoding a fixed size (10MB = 10 * 2^20 bytes) portion of the original
/// blob data into `NUM_ERASURE_CODED_CHUNKS` (= 16) erasure-coded verifiable chunks, each carrying
/// a merkle proof of inclusion in both this chunkset and the blob.
#[derive(Clone, Debug, PartialEq)]
pub(crate) struct ChunkSet {
    commitment: blake3::Hash,
    chunks: Vec<chunk::ProofCarryingChunk>,
}

impl ChunkSet {
    pub const NUM_ORIGINAL_CHUNKS: usize = 10;
    pub const BYTE_LENGTH: usize = Self::NUM_ORIGINAL_CHUNKS * Chunk::BYTE_LENGTH;
    pub const NUM_ERASURE_CODED_CHUNKS: usize = DECDS_NUM_ERASURE_CODED_SHARES;
    pub const PROOF_SIZE: usize = usize::ilog2(Self::NUM_ERASURE_CODED_CHUNKS) as usize;

    /// Creates a new `ChunkSet` by taking a fixed sized block of data, splits into 10 equal sized chunks,
    /// each of 1MB, RLNC encoding them into 16 erasure-coded chunks, and building a Merkle tree over these chunks.
    ///
    /// # Arguments
    ///
    /// * `chunkset_id` - The unique identifier for this chunkset.
    /// * `data` - The raw data (10MB) to be erasure-coded into chunks for this chunkset.
    ///
    /// # Returns
    ///
    /// Returns a `Result` which is:
    /// - `Ok(ChunkSet)` containing the newly created `ChunkSet` if successful.
    /// - `Err(DecdsError::InvalidChunksetSize)` if the `data` length does not match `ChunkSet::BYTE_LENGTH`.
    pub fn new(chunkset_id: usize, data: Vec<u8>) -> Result<ChunkSet, DecdsError> {
        if data.len() != Self::BYTE_LENGTH {
            return Err(DecdsError::InvalidChunksetSize(data.len()));
        }

        let mut rng = rand::rng();
        let encoder = unsafe { rlnc::full::encoder::Encoder::new(data, Self::NUM_ORIGINAL_CHUNKS).unwrap_unchecked() };

        let chunks = (0..Self::NUM_ERASURE_CODED_CHUNKS)
            .map(|i| {
                let chunk_id = chunkset_id * Self::NUM_ERASURE_CODED_CHUNKS + i;
                let erasure_coded_data = encoder.code(&mut rng);

                chunk::Chunk::new(chunkset_id, chunk_id, erasure_coded_data)
            })
            .collect::<Vec<Chunk>>();

        let merkle_leaves = chunks.iter().map(|chunk| chunk.digest()).collect::<Vec<blake3::Hash>>();
        let merkle_tree = unsafe { MerkleTree::new(merkle_leaves).unwrap_unchecked() };

        let commitment = merkle_tree.get_root_commitment();

        let proof_carrying_chunks = chunks
            .into_iter()
            .enumerate()
            .map(|(leaf_idx, chunk)| chunk::ProofCarryingChunk::new(chunk, unsafe { merkle_tree.generate_proof(leaf_idx).unwrap_unchecked() }))
            .collect::<Vec<chunk::ProofCarryingChunk>>();

        Ok(ChunkSet {
            commitment,
            chunks: proof_carrying_chunks,
        })
    }

    /// Returns the Merkle root commitment of this `ChunkSet`.
    pub fn get_root_commitment(&self) -> blake3::Hash {
        self.commitment
    }

    /// Retrieves a specific `ProofCarryingChunk` from the `ChunkSet` by its local chunk ID.
    ///
    /// # Arguments
    ///
    /// * `chunk_id` - The local ID (`>= 0 && < Self::NUM_ERASURE_CODED_CHUNKS`) of the chunk to retrieve within this chunkset.
    ///
    /// # Returns
    ///
    /// Returns a `Result` which is:
    /// - `Ok(&chunk::ProofCarryingChunk)` containing a reference to the chunk if found.
    /// - `Err(DecdsError::InvalidErasureCodedShareId)` if `chunk_id` is out of bounds for this chunkset.
    pub fn get_chunk(&self, chunk_id: usize) -> Result<&chunk::ProofCarryingChunk, DecdsError> {
        self.chunks.get(chunk_id).ok_or(DecdsError::InvalidErasureCodedShareId(chunk_id))
    }
}

/// A structure designed to help incrementally reconstruct the original data of a `ChunkSet`
/// by collecting enough erasure-coded chunks, verifying their integrity, and performing RLNC decoding.
pub struct RepairingChunkSet {
    chunkset_id: usize,
    commitment: blake3::Hash,
    decoder: rlnc::full::decoder::Decoder,
}

impl RepairingChunkSet {
    /// The padded byte length of individual chunks used in RLNC encoding.
    /// It ensures that the total chunkset size is a multiple of `NUM_ORIGINAL_CHUNKS`,
    /// after appending a single byte end-of-data marker.
    const PADDED_CHUNK_BYTE_LEN: usize = (ChunkSet::BYTE_LENGTH + 1).div_ceil(ChunkSet::NUM_ORIGINAL_CHUNKS);

    /// Creates a new `RepairingChunkSet` instance.
    ///
    /// # Arguments
    ///
    /// * `chunkset_id` - The ID of the chunkset being repaired.
    /// * `commitment` - The expected Merkle root commitment of the chunkset, used for validating chunk inclusion in chunkset.
    ///
    /// # Returns
    ///
    /// A new `RepairingChunkSet` instance.
    pub fn new(chunkset_id: usize, commitment: blake3::Hash) -> Self {
        RepairingChunkSet {
            chunkset_id,
            commitment,
            decoder: unsafe { rlnc::full::decoder::Decoder::new(Self::PADDED_CHUNK_BYTE_LEN, ChunkSet::NUM_ORIGINAL_CHUNKS).unwrap_unchecked() },
        }
    }

    /// Adds a `ProofCarryingChunk` to the `RepairingChunkSet` after validating its Merkle proof.
    /// The chunk's inclusion proof in this chunkset is verified against the `commitment` stored in `RepairingChunkSet`.
    ///
    /// # Arguments
    ///
    /// * `chunk` - The `ProofCarryingChunk` to add.
    ///
    /// # Returns
    ///
    /// Returns a `Result` which is:
    /// - `Ok(())` if the chunk is successfully added and validated.
    /// - `Err(DecdsError::InvalidProofInChunk)` if the chunk's inclusion proof is invalid for this chunkset.
    /// - `Err(DecdsError::InvalidChunkMetadata)` if the chunk's `chunkset_id` does not match this `RepairingChunkSet`.
    /// - `Err(DecdsError::ChunkDecodingFailed)` if the underlying RLNC decoding operation fails.
    pub fn add_chunk(&mut self, chunk: &chunk::ProofCarryingChunk) -> Result<(), DecdsError> {
        if chunk.validate_inclusion_in_chunkset(self.commitment) {
            self.add_chunk_unvalidated(chunk)
        } else {
            Err(DecdsError::InvalidProofInChunk(chunk.get_chunkset_id()))
        }
    }

    /// Adds a `ProofCarryingChunk` to the `RepairingChunkSet` without validating its Merkle proof.
    /// This method is intended for use when proof validation has already occurred.
    ///
    /// # Arguments
    ///
    /// * `chunk` - The `ProofCarryingChunk` to add.
    ///
    /// # Returns
    ///
    /// Returns a `Result` which is:
    /// - `Ok(())` if the chunk is successfully added.
    /// - `Err(DecdsError::InvalidChunkMetadata)` if the chunk's `chunkset_id` does not match this `RepairingChunkSet`.
    /// - `Err(DecdsError::ChunksetReadyToRepair)` if the chunkset is ready to repair, no more chunks are required. Just call `repair`.
    /// - `Err(DecdsError::ChunkDecodingFailed)` if the underlying RLNC decoding operation fails.
    pub fn add_chunk_unvalidated(&mut self, chunk: &chunk::ProofCarryingChunk) -> Result<(), DecdsError> {
        if self.chunkset_id != chunk.get_chunkset_id() {
            return Err(DecdsError::InvalidChunkMetadata(chunk.get_chunkset_id()));
        }
        if self.is_ready_to_repair() {
            return Err(DecdsError::ChunksetReadyToRepair(self.chunkset_id));
        }

        self.decoder
            .decode(chunk.get_erasure_coded_data())
            .map_err(|err| DecdsError::ChunkDecodingFailed(chunk.get_chunkset_id(), err.to_string()))
    }

    /// Checks if enough useful erasure-coded chunks have been collected to repair the original data for this chunkset.
    pub fn is_ready_to_repair(&self) -> bool {
        self.decoder.is_already_decoded()
    }

    /// Repairs the original data of the chunkset if enough chunks have been collected.
    /// This consumes the `RepairingChunkSet` as the decoding process is final.
    ///
    /// # Returns
    ///
    /// Returns a `Result` which is:
    /// - `Ok(Vec<u8>)` containing the repaired original data if successful.
    /// - `Err(DecdsError::ChunksetNotYetReadyToRepair)` if not enough chunks have been added yet.
    /// - `Err(DecdsError::ChunksetRepairingFailed)` if an error occurs during the RLNC decoding process.
    pub fn repair(self) -> Result<Vec<u8>, DecdsError> {
        if self.is_ready_to_repair() {
            self.decoder
                .get_decoded_data()
                .map_err(|err| DecdsError::ChunksetRepairingFailed(self.chunkset_id, format!("RLNC Decoding error: {}", err)))
        } else {
            Err(DecdsError::ChunksetNotYetReadyToRepair(self.chunkset_id))
        }
    }
}

#[cfg(test)]
mod tests {
    use crate::{
        DecdsError,
        chunk::ProofCarryingChunk,
        chunkset::{ChunkSet, RepairingChunkSet},
        merkle_tree::tests::flip_a_bit,
    };
    use rand::{Rng, seq::SliceRandom};

    fn flip_a_single_bit_in_proof_carrying_chunk<R: Rng + ?Sized>(mut chunk_bytes: Vec<u8>, rng: &mut R) -> Vec<u8> {
        if chunk_bytes.is_empty() {
            return chunk_bytes;
        }

        let random_byte_index = rng.random_range(0..chunk_bytes.len());
        let random_bit_index = rng.random_range(0..u8::BITS) as usize;

        chunk_bytes[random_byte_index] = flip_a_bit(chunk_bytes[random_byte_index], random_bit_index);
        chunk_bytes
    }

    #[test]
    fn prop_test_erasure_coding_chunks_and_validating_proofs_work() {
        const NUM_TEST_ITERATIONS: usize = 10;
        let mut rng = rand::rng();

        (0..NUM_TEST_ITERATIONS).for_each(|_| {
            let data = (0..ChunkSet::BYTE_LENGTH).map(|_| rng.random()).collect::<Vec<u8>>();
            let chunkset = ChunkSet::new(0, data).expect("Must be able to build erasure-coded ChunkSet");

            for i in 0..ChunkSet::NUM_ERASURE_CODED_CHUNKS {
                let chunk = chunkset.get_chunk(i).expect("Must be able to lookup chunk by id");
                assert!(chunk.validate_inclusion_in_chunkset(chunkset.get_root_commitment()));

                let chunk_bytes = chunk.to_bytes().expect("Must be able to serialize proof-carrying chunk as bytes");
                let bit_flipped_chunk_bytes = flip_a_single_bit_in_proof_carrying_chunk(chunk_bytes, &mut rng);

                match ProofCarryingChunk::from_bytes(&bit_flipped_chunk_bytes) {
                    Ok((bit_flipped_chunk, _)) => assert!(!bit_flipped_chunk.validate_inclusion_in_chunkset(chunkset.get_root_commitment())),
                    Err(e) => assert!(e.to_string().starts_with("failed to deserialize proof carrying chunk: ")),
                }
            }
        });
    }

    #[test]
    fn prop_test_repairing_erasure_coded_chunks_work() {
        const NUM_TEST_ITERATIONS: usize = 10;
        let mut rng = rand::rng();

        (0..NUM_TEST_ITERATIONS).for_each(|_| {
            let data = (0..ChunkSet::BYTE_LENGTH).map(|_| rng.random()).collect::<Vec<u8>>();
            let data_copy = data.clone();

            let chunkset = ChunkSet::new(0, data).expect("Must be able to build erasure-coded ChunkSet");
            let mut repairing_chunkset = RepairingChunkSet::new(0, chunkset.get_root_commitment());

            let mut chunks = (0..ChunkSet::NUM_ERASURE_CODED_CHUNKS)
                .map(|i| chunkset.get_chunk(i).expect("Must be able to lookup chunk by id"))
                .collect::<Vec<&ProofCarryingChunk>>();
            chunks.shuffle(&mut rng);

            let mut chunk_idx = 0;
            while !repairing_chunkset.is_ready_to_repair() {
                let _ = repairing_chunkset.add_chunk(chunks[chunk_idx]);
                chunk_idx += 1;
            }

            let repaired_data = repairing_chunkset.repair().expect("Data must be reconstructed by this point!");
            assert_eq!(data_copy, repaired_data);
        });
    }

    #[test]
    fn test_chunkset_new_invalid_size() {
        let data_too_small = vec![0u8; ChunkSet::BYTE_LENGTH - 1];
        let data_too_large = vec![0u8; ChunkSet::BYTE_LENGTH + 1];

        assert_eq!(
            ChunkSet::new(0, data_too_small),
            Err(DecdsError::InvalidChunksetSize(ChunkSet::BYTE_LENGTH - 1))
        );
        assert_eq!(
            ChunkSet::new(0, data_too_large),
            Err(DecdsError::InvalidChunksetSize(ChunkSet::BYTE_LENGTH + 1))
        );
    }

    #[test]
    fn test_chunkset_get_chunk_out_of_bounds() {
        let mut rng = rand::rng();

        let data = (0..ChunkSet::BYTE_LENGTH).map(|_| rng.random()).collect::<Vec<u8>>();
        let chunkset = ChunkSet::new(0, data).expect("Must be able to build erasure-coded ChunkSet");

        assert_eq!(
            chunkset.get_chunk(ChunkSet::NUM_ERASURE_CODED_CHUNKS),
            Err(DecdsError::InvalidErasureCodedShareId(ChunkSet::NUM_ERASURE_CODED_CHUNKS))
        );
        assert_eq!(
            chunkset.get_chunk(ChunkSet::NUM_ERASURE_CODED_CHUNKS + 100),
            Err(DecdsError::InvalidErasureCodedShareId(ChunkSet::NUM_ERASURE_CODED_CHUNKS + 100))
        );
    }

    #[test]
    fn test_repairing_chunkset_new() {
        let mut rng = rand::rng();

        let data = (0..ChunkSet::BYTE_LENGTH).map(|_| rng.random()).collect::<Vec<u8>>();
        let chunkset = ChunkSet::new(0, data).expect("Must be able to build erasure-coded ChunkSet");
        let commitment = chunkset.get_root_commitment();
        let chunkset_id = 0;

        let repairing_chunkset = RepairingChunkSet::new(chunkset_id, commitment);

        assert_eq!(repairing_chunkset.chunkset_id, chunkset_id);
        assert_eq!(repairing_chunkset.commitment, commitment);
        assert!(!repairing_chunkset.is_ready_to_repair());
    }

    #[test]
    fn test_repairing_chunkset_add_chunk_invalid_proof_in_chunk() {
        let mut rng = rand::rng();

        let data = (0..ChunkSet::BYTE_LENGTH).map(|_| rng.random()).collect::<Vec<u8>>();
        let chunkset = ChunkSet::new(0, data).expect("Must be able to build erasure-coded ChunkSet");

        // Create a repairing chunkset with a *different* commitment
        let tampered_commitment = blake3::hash(b"tampered_commitment");
        let mut repairing_chunkset = RepairingChunkSet::new(0, tampered_commitment);

        // Get a valid chunk from the original chunkset
        let valid_chunk = chunkset.get_chunk(0).unwrap();

        // Adding this valid chunk to a repairing_chunkset with a tampered commitment should fail
        assert_eq!(
            repairing_chunkset.add_chunk(valid_chunk).unwrap_err(),
            DecdsError::InvalidProofInChunk(valid_chunk.get_chunkset_id())
        );
    }

    #[test]
    fn test_repairing_chunkset_add_chunk_unvalidated_invalid_chunk_metadata() {
        let mut rng = rand::rng();

        let data = (0..ChunkSet::BYTE_LENGTH).map(|_| rng.random()).collect::<Vec<u8>>();
        let chunkset = ChunkSet::new(0, data).expect("Must be able to build erasure-coded ChunkSet");

        let chunk_from_chunkset_0 = chunkset.get_chunk(0).unwrap();

        // Create a repairing chunkset for a different ID (e.g., ID 1 instead of 0)
        let mut repairing_chunkset = RepairingChunkSet::new(1, chunkset.get_root_commitment());

        // Attempt to add a chunk that belongs to chunkset_id 0 to a repairing_chunkset for chunkset_id 1
        assert_eq!(
            repairing_chunkset.add_chunk_unvalidated(chunk_from_chunkset_0).unwrap_err(),
            DecdsError::InvalidChunkMetadata(chunk_from_chunkset_0.get_chunkset_id())
        );
    }

    #[test]
    fn test_repairing_chunkset_repair_when_not_ready() {
        let mut rng = rand::rng();

        let data = (0..ChunkSet::BYTE_LENGTH).map(|_| rng.random()).collect::<Vec<u8>>();
        let chunkset = ChunkSet::new(0, data).expect("Must be able to build erasure-coded ChunkSet");
        let mut repairing_chunkset = RepairingChunkSet::new(0, chunkset.get_root_commitment());

        // Add fewer than NUM_ORIGINAL_CHUNKS chunks
        let mut chunk_idx = 0;
        let mut useful_count = 0;

        while chunk_idx < ChunkSet::NUM_ERASURE_CODED_CHUNKS {
            let chunk = chunkset.get_chunk(chunk_idx).expect("Must be able to lookup chunk by id");
            if let Ok(_) = repairing_chunkset.add_chunk(chunk) {
                useful_count += 1;
            }

            chunk_idx += 1;
            if useful_count == ChunkSet::NUM_ORIGINAL_CHUNKS - 1 {
                break;
            }
        }

        assert!(!repairing_chunkset.is_ready_to_repair());
        assert_eq!(repairing_chunkset.repair(), Err(DecdsError::ChunksetNotYetReadyToRepair(0)));
    }

    #[test]
    fn test_repairing_chunkset_add_chunk_after_ready_to_repair() {
        let mut rng = rand::rng();

        let data = (0..ChunkSet::BYTE_LENGTH).map(|_| rng.random()).collect::<Vec<u8>>();
        let chunkset = ChunkSet::new(0, data.clone()).expect("Must be able to build erasure-coded ChunkSet");
        let mut repairing_chunkset = RepairingChunkSet::new(0, chunkset.get_root_commitment());

        let mut chunk_idx = 0;
        while !repairing_chunkset.is_ready_to_repair() && chunk_idx < ChunkSet::NUM_ERASURE_CODED_CHUNKS {
            let chunk = chunkset.get_chunk(chunk_idx).expect("Must be able to lookup chunk by id");
            let _ = repairing_chunkset.add_chunk(chunk);

            chunk_idx += 1;
        }

        while chunk_idx < ChunkSet::NUM_ERASURE_CODED_CHUNKS {
            let chunk = chunkset.get_chunk(chunk_idx).expect("Must be able to lookup chunk by id");
            assert_eq!(repairing_chunkset.add_chunk(chunk), Err(DecdsError::ChunksetReadyToRepair(0)));

            chunk_idx += 1;
        }

        let repaired_chunkset = repairing_chunkset.repair().expect("Must be able to repair chunkset");
        assert_eq!(repaired_chunkset, data);
    }
}