Skip to main content

cdx_core/provenance/
block_index.rs

1//! Block index for content integrity verification.
2//!
3//! The block index (`content/block-index.json`) stores individual block hashes
4//! for Merkle proof generation and selective disclosure.
5
6use serde::{Deserialize, Serialize};
7
8use crate::content::Content;
9use crate::{DocumentId, HashAlgorithm, Hasher};
10
11/// Index of content blocks with their hashes.
12///
13/// This structure is stored at `content/block-index.json` and enables:
14/// - Merkle proof generation for individual blocks
15/// - Efficient verification of specific content sections
16/// - Selective disclosure without revealing full content
17#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
18#[serde(rename_all = "camelCase")]
19pub struct BlockIndex {
20    /// Version of the block index format.
21    pub version: String,
22
23    /// Hash algorithm used for all hashes.
24    pub algorithm: HashAlgorithm,
25
26    /// Merkle root hash of all blocks.
27    pub root: DocumentId,
28
29    /// Individual block entries with their hashes.
30    pub blocks: Vec<BlockHashEntry>,
31}
32
33impl BlockIndex {
34    /// Current version of the block index format.
35    pub const VERSION: &'static str = "0.1";
36
37    /// Create a block index from document content.
38    ///
39    /// This computes hashes for each block and builds a Merkle tree
40    /// to derive the root hash.
41    ///
42    /// # Errors
43    ///
44    /// Returns an error if the content has no blocks.
45    pub fn from_content(content: &Content, algorithm: HashAlgorithm) -> crate::Result<Self> {
46        let blocks = &content.blocks;
47        if blocks.is_empty() {
48            return Err(crate::Error::InvalidManifest {
49                reason: "Cannot create block index from empty content".to_string(),
50            });
51        }
52
53        // Compute hash for each block
54        let mut entries = Vec::with_capacity(blocks.len());
55        let mut hashes = Vec::with_capacity(blocks.len());
56
57        for (index, block) in blocks.iter().enumerate() {
58            // Serialize block to canonical JSON for hashing
59            let block_json = serde_json::to_vec(block)?;
60            let canonical =
61                json_canon::to_string(&serde_json::from_slice::<serde_json::Value>(&block_json)?)?;
62            let hash = Hasher::hash(algorithm, canonical.as_bytes());
63
64            entries.push(BlockHashEntry {
65                id: block
66                    .id()
67                    .map_or_else(|| format!("block-{index}"), String::from),
68                hash: hash.clone(),
69                index,
70            });
71            hashes.push(hash);
72        }
73
74        // Build Merkle tree to get root
75        let tree = super::MerkleTree::from_hashes(&hashes, algorithm)?;
76
77        Ok(Self {
78            version: Self::VERSION.to_string(),
79            algorithm,
80            root: tree.root_hash().clone(),
81            blocks: entries,
82        })
83    }
84
85    /// Get the Merkle root hash.
86    #[must_use]
87    pub fn merkle_root(&self) -> &DocumentId {
88        &self.root
89    }
90
91    /// Get the number of blocks.
92    #[must_use]
93    pub fn block_count(&self) -> usize {
94        self.blocks.len()
95    }
96
97    /// Find a block entry by ID.
98    #[must_use]
99    pub fn find_block(&self, id: &str) -> Option<&BlockHashEntry> {
100        self.blocks.iter().find(|b| b.id == id)
101    }
102
103    /// Find a block entry by index.
104    #[must_use]
105    pub fn get_block(&self, index: usize) -> Option<&BlockHashEntry> {
106        self.blocks.get(index)
107    }
108
109    /// Get all block hashes in order.
110    #[must_use]
111    pub fn hashes(&self) -> Vec<&DocumentId> {
112        self.blocks.iter().map(|b| &b.hash).collect()
113    }
114
115    /// Serialize to JSON.
116    ///
117    /// # Errors
118    ///
119    /// Returns an error if serialization fails.
120    pub fn to_json(&self) -> crate::Result<String> {
121        serde_json::to_string_pretty(self).map_err(Into::into)
122    }
123
124    /// Deserialize from JSON.
125    ///
126    /// # Errors
127    ///
128    /// Returns an error if deserialization fails.
129    pub fn from_json(json: &str) -> crate::Result<Self> {
130        serde_json::from_str(json).map_err(Into::into)
131    }
132}
133
134/// Entry for a single block in the index.
135#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
136#[serde(rename_all = "camelCase")]
137pub struct BlockHashEntry {
138    /// Block identifier.
139    pub id: String,
140
141    /// Hash of the canonicalized block JSON.
142    pub hash: DocumentId,
143
144    /// Position in the block list (0-indexed).
145    pub index: usize,
146}
147
148#[cfg(test)]
149mod tests {
150    use super::*;
151    use crate::content::{Block, Text};
152
153    fn create_test_content() -> Content {
154        Content::new(vec![
155            Block::heading(1, vec![Text::plain("Title")]),
156            Block::paragraph(vec![Text::plain("First paragraph.")]),
157            Block::paragraph(vec![Text::plain("Second paragraph.")]),
158        ])
159    }
160
161    #[test]
162    fn test_block_index_creation() {
163        let content = create_test_content();
164        let index = BlockIndex::from_content(&content, HashAlgorithm::Sha256).unwrap();
165
166        assert_eq!(index.version, "0.1");
167        assert_eq!(index.algorithm, HashAlgorithm::Sha256);
168        assert_eq!(index.blocks.len(), 3);
169        assert!(!index.root.is_pending());
170    }
171
172    #[test]
173    fn test_block_index_deterministic() {
174        let content = create_test_content();
175        let index1 = BlockIndex::from_content(&content, HashAlgorithm::Sha256).unwrap();
176        let index2 = BlockIndex::from_content(&content, HashAlgorithm::Sha256).unwrap();
177
178        assert_eq!(index1.root, index2.root);
179        assert_eq!(index1.blocks, index2.blocks);
180    }
181
182    #[test]
183    fn test_block_index_find_block() {
184        let content = create_test_content();
185        let index = BlockIndex::from_content(&content, HashAlgorithm::Sha256).unwrap();
186
187        // Find by index
188        let entry = index.get_block(1).unwrap();
189        assert_eq!(entry.index, 1);
190
191        // Verify hash is not pending
192        assert!(!entry.hash.is_pending());
193    }
194
195    #[test]
196    fn test_block_index_empty_content_fails() {
197        let content = Content::new(vec![]);
198        let result = BlockIndex::from_content(&content, HashAlgorithm::Sha256);
199        assert!(result.is_err());
200    }
201
202    #[test]
203    fn test_block_index_serialization() {
204        let content = create_test_content();
205        let index = BlockIndex::from_content(&content, HashAlgorithm::Sha256).unwrap();
206
207        let json = index.to_json().unwrap();
208        assert!(json.contains("\"version\": \"0.1\""));
209        assert!(json.contains("\"algorithm\": \"sha256\""));
210
211        let deserialized = BlockIndex::from_json(&json).unwrap();
212        assert_eq!(deserialized.root, index.root);
213        assert_eq!(deserialized.blocks.len(), index.blocks.len());
214    }
215
216    #[test]
217    fn test_block_index_content_changes_root() {
218        let content1 = Content::new(vec![Block::paragraph(vec![Text::plain("Hello")])]);
219        let content2 = Content::new(vec![Block::paragraph(vec![Text::plain("World")])]);
220
221        let index1 = BlockIndex::from_content(&content1, HashAlgorithm::Sha256).unwrap();
222        let index2 = BlockIndex::from_content(&content2, HashAlgorithm::Sha256).unwrap();
223
224        assert_ne!(index1.root, index2.root);
225    }
226
227    #[test]
228    fn test_block_index_version() {
229        assert_eq!(BlockIndex::VERSION, "0.1");
230    }
231
232    #[test]
233    fn test_block_index_merkle_root() {
234        let content = create_test_content();
235        let index = BlockIndex::from_content(&content, HashAlgorithm::Sha256).unwrap();
236
237        let root = index.merkle_root();
238        assert!(!root.is_pending());
239        assert_eq!(root, &index.root);
240    }
241
242    #[test]
243    fn test_block_index_block_count() {
244        let content = create_test_content();
245        let index = BlockIndex::from_content(&content, HashAlgorithm::Sha256).unwrap();
246
247        assert_eq!(index.block_count(), 3);
248        assert_eq!(index.block_count(), index.blocks.len());
249    }
250
251    #[test]
252    fn test_block_index_find_block_not_found() {
253        let content = create_test_content();
254        let index = BlockIndex::from_content(&content, HashAlgorithm::Sha256).unwrap();
255
256        assert!(index.find_block("nonexistent").is_none());
257    }
258
259    #[test]
260    fn test_block_index_get_block_out_of_bounds() {
261        let content = create_test_content();
262        let index = BlockIndex::from_content(&content, HashAlgorithm::Sha256).unwrap();
263
264        assert!(index.get_block(100).is_none());
265    }
266
267    #[test]
268    fn test_block_index_hashes() {
269        let content = create_test_content();
270        let index = BlockIndex::from_content(&content, HashAlgorithm::Sha256).unwrap();
271
272        let hashes = index.hashes();
273        assert_eq!(hashes.len(), 3);
274        for hash in hashes {
275            assert!(!hash.is_pending());
276        }
277    }
278
279    #[test]
280    fn test_block_index_different_algorithms() {
281        let content = create_test_content();
282        let index_sha256 = BlockIndex::from_content(&content, HashAlgorithm::Sha256).unwrap();
283        let index_sha384 = BlockIndex::from_content(&content, HashAlgorithm::Sha384).unwrap();
284
285        assert_eq!(index_sha256.algorithm, HashAlgorithm::Sha256);
286        assert_eq!(index_sha384.algorithm, HashAlgorithm::Sha384);
287        assert_ne!(index_sha256.root, index_sha384.root);
288    }
289
290    #[test]
291    fn test_block_hash_entry_fields() {
292        let content = create_test_content();
293        let index = BlockIndex::from_content(&content, HashAlgorithm::Sha256).unwrap();
294
295        let entry = index.get_block(0).unwrap();
296        assert_eq!(entry.index, 0);
297        assert!(!entry.id.is_empty());
298        assert!(!entry.hash.is_pending());
299    }
300
301    #[test]
302    fn test_block_index_single_block() {
303        let content = Content::new(vec![Block::paragraph(vec![Text::plain("Only one")])]);
304        let index = BlockIndex::from_content(&content, HashAlgorithm::Sha256).unwrap();
305
306        assert_eq!(index.block_count(), 1);
307        assert!(!index.root.is_pending());
308    }
309
310    #[test]
311    fn test_block_index_from_json_invalid() {
312        let invalid_json = "{ invalid }";
313        let result = BlockIndex::from_json(invalid_json);
314        assert!(result.is_err());
315    }
316}