decds_lib/
lib.rs

1//! # DECDS-lib: Decentralized Erasure-Coded Data Storage Library
2//!
3//! `decds-lib` provides functionalities for disseminating, verifying and reconstructing arbitrary size data blobs
4//! using Random Linear Network Coding (RLNC) based erasure-coding and binary Merkle trees for data integrity verification.
5//!
6//! This library is designed to enable decentralized data storage solutions by breaking down
7//! large data into smaller, verifiable, and reconstructible chunks.
8//!
9//! ## How to Use
10//!
11//! To build a blob, first initialize the `BlobBuilder` struct, and incrementally keep putting data into it
12//! by calling `BlobBuilder::update` function arbirary many times, possibly returning some `ProofCarryingChunk`s.
13//! Once all data has been absorbed by the `BlobBuilder`` instance, it can be finalized, which returns the `BlobHeader`,
14//! holding all necessary metadata for chunk validation and reconstruction.
15//!
16//! Note, the `ProofCarryingChunk`s which are generated during blob building, does only carry proof of
17//! inclusion in respective `ChunkSet`s. Once the full blob is built, the proof carried by each chunk
18//! needs to be extended for validating blob inclusion, during reconstruction. Following example demonstrates that.
19//!
20//! To reconstruct the original blob data, you need to collect *enough* `ProofCarryingChunk`s for all chunksets.
21//! You initialize a `RepairingBlob` with the `BlobHeader`, which is the source of truth for validating `ProofCarryingChunk`s,
22//! and then add chunks to it. Once *enough* chunks for a specific chunkset are collected, you can retrieve
23//! its repaired data. Each chunkset requires at least 10 valid chunks to be recovered. To recover the full blob,
24//! you need to collect at least 10 valid chunks per chunkset.
25//!
26//! For more details see README in `decds` repository @ <https://github.com/itzmeanjan/decds>.
27//!
28//! ```rust
29//! use decds_lib::{BlobBuilder, BlobHeader, ProofCarryingChunk, RepairingBlob, DECDS_NUM_ERASURE_CODED_SHARES, DecdsError, MerkleTree};
30//! use rand::{Rng, seq::SliceRandom};
31//! use rayon::prelude::*;
32//! use std::collections::HashMap;
33//!
34//! let mut rng = rand::rng();
35//!
36//! const ONE_MB: usize = 1usize << 20;
37//! let original_data: Vec<u8> = (0..42 * ONE_MB).map(|_| rng.random()).collect(); // 42MB of random data
38//! let original_data_copy = original_data.clone();
39//!
40//!
41//! // Build the blob and collect all generated chunks.
42//! // Remember these proof-carrying chunks don't carry a proof-of-inclusion
43//! // in the blob, rather they carry only proof-of-inclusion in corresponding chunkset
44//! // that they belong to.
45//! let (mut chunks, blob_header) = {
46//!     let mut all_chunks = Vec::new();
47//!
48//!     let mut blob_builder = BlobBuilder::init();
49//!     if let Some(chunks) = blob_builder.update(&original_data) {
50//!         all_chunks.extend(chunks);
51//!     }
52//!
53//!     let (final_chunks, blob_header) = blob_builder.finalize().expect("Failed to finalize blob");
54//!     all_chunks.extend(final_chunks);
55//!
56//!     (all_chunks, blob_header)
57//! };
58//!
59//! // We have to collect root commitments of all chunksets, as we will build a Merkle tree on them,
60//! // considering they are the leaf nodes of the Merkle tree.
61//! let chunkset_root_commitments = (0..blob_header.get_num_chunksets())
62//!     .map(|chunkset_id| unsafe { blob_header.get_chunkset_commitment(chunkset_id).unwrap_unchecked() })
63//!     .collect();
64//!
65//! // This is the Merkle tree build on chunkset root commitments, giving us a blob level root commitment.
66//! let merkle_tree = MerkleTree::new(chunkset_root_commitments).expect("Must be able to build Merkle tree");
67//! // Let's generate Merkle proof-of-inclusion for each chunkset. We will extend each chunk's proof to include
68//! // corresponding chunkset's inclusion proof in the blob root commitment.
69//! let merkle_proofs = (0..blob_header.get_num_chunksets())
70//!     .into_par_iter()
71//!     .map(|chunkset_id| unsafe { (chunkset_id, merkle_tree.generate_proof(chunkset_id).unwrap_unchecked()) })
72//!     .collect::<HashMap<usize, Vec<blake3::Hash>>>();
73//! // Extend each proof-carrying chunk to include proof-of-inclusion in the blob.
74//! // And this completes blob building phase.
75//! chunks.par_iter_mut().for_each(|chunk| {
76//!     chunk.append_proof_to_blob_root(&merkle_proofs[&chunk.get_chunkset_id()]);
77//! });
78//!
79//! // Simulate data loss and reordering by shuffling and taking only a subset (but enough for repair).
80//! // In a real scenario, you'd receive chunks from various sources.
81//! chunks.shuffle(&mut rng);
82//!
83//! // Let's try to repair the full blob.
84//! let mut repairer = RepairingBlob::new(blob_header.clone());
85//! let num_chunksets = blob_header.get_num_chunksets();
86//!
87//! // Add chunks to the repairer until all chunksets are repaired.
88//! let mut chunk_idx = 0;
89//! let mut repaired_chunksets_count = 0;
90//!
91//! while repaired_chunksets_count < num_chunksets {
92//!     if chunk_idx >= chunks.len() {
93//!         println!("Not enough chunks to repair the entire blob!");
94//!         break;
95//!     }
96//!
97//!     let chunk = &chunks[chunk_idx];
98//!     let chunkset_id = chunk.get_chunkset_id();
99//!
100//!     // Try to add the chunk, handling various repair states.
101//!     match repairer.add_chunk(chunk) {
102//!         Ok(_) => {
103//!             if repairer.is_chunkset_ready_to_repair(chunkset_id).expect("Failed to check chunkset repair status") {
104//!                 repaired_chunksets_count += 1;
105//!                 println!("Repaired chunkset {}!", chunkset_id);
106//!             }
107//!         },
108//!         Err(e) => {
109//!             // Handle cases where the chunk is not useful or chunkset is already repaired
110//!             match e {
111//!                 DecdsError::ChunksetReadyToRepair(_) | DecdsError::ChunksetAlreadyRepaired(_) | DecdsError::InvalidProofInChunk(_) => {
112//!                     // Chunk is redundant, already repaired, or invalid; simply skip it.
113//!                     // In case we get invalid proof-of-inclusion, it means some intermediary must have tampered with it.
114//!                 },
115//!                 _ => {
116//!                     eprintln!("Error adding chunk: {:?}", e);
117//!                     std::process::exit(1);
118//!                 },
119//!             }
120//!         },
121//!     }
122//!
123//!     chunk_idx += 1;
124//! }
125//!
126//! // As we didn't tamper with any proof-carrying chunks, it must be possible for us to recover the
127//! // whole blob. Let's recover and compare.
128//! let final_repaired_data = (0..blob_header.get_num_chunksets()).flat_map(|chunkset_id| {
129//!     repairer.get_repaired_chunkset(chunkset_id).expect("Failed to get repaired chunkset")
130//! }).collect::<Vec<u8>>();
131//!
132//! assert_eq!(original_data_copy, final_repaired_data);
133//! println!("Blob successfully repaired and verified!");
134//! ```
135
136mod blob;
137mod chunk;
138mod chunkset;
139mod consts;
140mod errors;
141mod merkle_tree;
142
143#[cfg(test)]
144mod tests;
145
146pub use blob::{BlobBuilder, BlobHeader, RepairingBlob};
147pub use chunk::ProofCarryingChunk;
148pub use chunkset::RepairingChunkSet;
149pub use consts::DECDS_NUM_ERASURE_CODED_SHARES;
150pub use errors::DecdsError;
151pub use merkle_tree::MerkleTree;