Skip to main content

cdx_core/provenance/
merkle.rs

1//! Merkle tree implementation for content integrity.
2
3use serde::{Deserialize, Serialize};
4
5use crate::{DocumentId, HashAlgorithm, Hasher, Result};
6
7/// A node in a Merkle tree.
8#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
9#[serde(rename_all = "camelCase")]
10pub struct MerkleNode {
11    /// Hash of this node.
12    pub hash: DocumentId,
13
14    /// Left child hash (None for leaf nodes).
15    #[serde(default, skip_serializing_if = "Option::is_none")]
16    pub left: Option<Box<MerkleNode>>,
17
18    /// Right child hash (None for leaf nodes).
19    #[serde(default, skip_serializing_if = "Option::is_none")]
20    pub right: Option<Box<MerkleNode>>,
21}
22
23impl MerkleNode {
24    /// Create a leaf node from data.
25    #[must_use]
26    pub fn leaf(hash: DocumentId) -> Self {
27        Self {
28            hash,
29            left: None,
30            right: None,
31        }
32    }
33
34    /// Create a branch node from two children.
35    #[must_use]
36    pub fn branch(left: MerkleNode, right: MerkleNode, algorithm: HashAlgorithm) -> Self {
37        // Combine child hashes to compute parent hash
38        let combined = format!("{}{}", left.hash.hex_digest(), right.hash.hex_digest());
39        let hash = Hasher::hash(algorithm, combined.as_bytes());
40
41        Self {
42            hash,
43            left: Some(Box::new(left)),
44            right: Some(Box::new(right)),
45        }
46    }
47
48    /// Check if this is a leaf node.
49    #[must_use]
50    pub fn is_leaf(&self) -> bool {
51        self.left.is_none() && self.right.is_none()
52    }
53}
54
55/// A Merkle tree for content blocks.
56///
57/// Merkle trees enable efficient verification of content integrity
58/// and support selective disclosure proofs.
59#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
60#[serde(rename_all = "camelCase")]
61pub struct MerkleTree {
62    /// Root node of the tree.
63    root: MerkleNode,
64
65    /// Hash algorithm used.
66    algorithm: HashAlgorithm,
67
68    /// Number of leaf nodes.
69    leaf_count: usize,
70}
71
72impl MerkleTree {
73    /// Build a Merkle tree from a list of data items.
74    ///
75    /// # Errors
76    ///
77    /// Returns an error if the items list is empty.
78    ///
79    /// # Panics
80    ///
81    /// This function will not panic if the items list is non-empty.
82    pub fn from_items<T: AsRef<[u8]>>(items: &[T], algorithm: HashAlgorithm) -> Result<Self> {
83        if items.is_empty() {
84            return Err(crate::Error::InvalidManifest {
85                reason: "Cannot build Merkle tree from empty items".to_string(),
86            });
87        }
88
89        let leaf_count = items.len();
90
91        // Create leaf nodes
92        let mut nodes: Vec<MerkleNode> = items
93            .iter()
94            .map(|item| MerkleNode::leaf(Hasher::hash(algorithm, item.as_ref())))
95            .collect();
96
97        // Build tree bottom-up
98        while nodes.len() > 1 {
99            let mut next_level = Vec::with_capacity(nodes.len().div_ceil(2));
100            let mut iter = nodes.into_iter();
101
102            while let Some(left) = iter.next() {
103                let right = iter.next().unwrap_or_else(|| left.clone());
104                next_level.push(MerkleNode::branch(left, right, algorithm));
105            }
106
107            nodes = next_level;
108        }
109
110        Ok(Self {
111            root: nodes.into_iter().next().expect("nodes should not be empty"),
112            algorithm,
113            leaf_count,
114        })
115    }
116
117    /// Build a Merkle tree from pre-computed hashes.
118    ///
119    /// # Errors
120    ///
121    /// Returns an error if the hashes list is empty.
122    ///
123    /// # Panics
124    ///
125    /// This function will not panic if the hashes list is non-empty.
126    pub fn from_hashes(hashes: &[DocumentId], algorithm: HashAlgorithm) -> Result<Self> {
127        if hashes.is_empty() {
128            return Err(crate::Error::InvalidManifest {
129                reason: "Cannot build Merkle tree from empty hashes".to_string(),
130            });
131        }
132
133        let leaf_count = hashes.len();
134
135        // Create leaf nodes (clone needed since hashes is borrowed)
136        let mut nodes: Vec<MerkleNode> =
137            hashes.iter().map(|h| MerkleNode::leaf(h.clone())).collect();
138
139        // Build tree bottom-up
140        while nodes.len() > 1 {
141            let mut next_level = Vec::with_capacity(nodes.len().div_ceil(2));
142            let mut iter = nodes.into_iter();
143
144            while let Some(left) = iter.next() {
145                let right = iter.next().unwrap_or_else(|| left.clone());
146                next_level.push(MerkleNode::branch(left, right, algorithm));
147            }
148
149            nodes = next_level;
150        }
151
152        Ok(Self {
153            root: nodes.into_iter().next().expect("nodes should not be empty"),
154            algorithm,
155            leaf_count,
156        })
157    }
158
159    /// Get the root hash of the tree.
160    #[must_use]
161    pub fn root_hash(&self) -> &DocumentId {
162        &self.root.hash
163    }
164
165    /// Get the root node.
166    #[must_use]
167    pub fn root(&self) -> &MerkleNode {
168        &self.root
169    }
170
171    /// Get the hash algorithm used.
172    #[must_use]
173    pub fn algorithm(&self) -> HashAlgorithm {
174        self.algorithm
175    }
176
177    /// Get the number of leaf nodes.
178    #[must_use]
179    pub fn leaf_count(&self) -> usize {
180        self.leaf_count
181    }
182
183    /// Generate a proof for a leaf at the given index.
184    ///
185    /// # Errors
186    ///
187    /// Returns an error if the index is out of bounds.
188    pub fn prove(&self, index: usize) -> Result<super::BlockProof> {
189        if index >= self.leaf_count {
190            return Err(crate::Error::InvalidManifest {
191                reason: format!(
192                    "Index {} out of bounds for tree with {} leaves",
193                    index, self.leaf_count
194                ),
195            });
196        }
197
198        let mut path = Vec::new();
199
200        // Collect sibling hashes along the path to root
201        collect_proof_path(&self.root, index, 0, self.leaf_count, &mut path);
202
203        Ok(super::BlockProof {
204            index,
205            path,
206            root_hash: self.root.hash.clone(),
207            algorithm: self.algorithm,
208        })
209    }
210}
211
212fn collect_proof_path(
213    node: &MerkleNode,
214    target_index: usize,
215    current_start: usize,
216    level_size: usize,
217    path: &mut Vec<(DocumentId, bool)>,
218) {
219    if node.is_leaf() {
220        return;
221    }
222
223    let mid = current_start + level_size / 2;
224    let left = node
225        .left
226        .as_ref()
227        .expect("branch node should have left child");
228    let right = node
229        .right
230        .as_ref()
231        .expect("branch node should have right child");
232
233    if target_index < mid {
234        // Target is in left subtree, recurse first then add sibling
235        collect_proof_path(left, target_index, current_start, level_size / 2, path);
236        // Add right sibling to path (sibling is on right)
237        path.push((right.hash.clone(), true));
238    } else {
239        // Target is in right subtree, recurse first then add sibling
240        collect_proof_path(right, target_index, mid, level_size / 2, path);
241        // Add left sibling to path (sibling is on left)
242        path.push((left.hash.clone(), false));
243    }
244}
245
246#[cfg(test)]
247mod tests {
248    use super::*;
249
250    #[test]
251    fn test_merkle_tree_from_items() {
252        let items = vec!["item1", "item2", "item3", "item4"];
253        let tree = MerkleTree::from_items(&items, HashAlgorithm::Sha256).unwrap();
254
255        assert_eq!(tree.leaf_count(), 4);
256        assert!(!tree.root_hash().is_pending());
257    }
258
259    #[test]
260    fn test_merkle_tree_odd_count() {
261        let items = vec!["item1", "item2", "item3"];
262        let tree = MerkleTree::from_items(&items, HashAlgorithm::Sha256).unwrap();
263
264        assert_eq!(tree.leaf_count(), 3);
265    }
266
267    #[test]
268    fn test_merkle_tree_single_item() {
269        let items = vec!["single"];
270        let tree = MerkleTree::from_items(&items, HashAlgorithm::Sha256).unwrap();
271
272        assert_eq!(tree.leaf_count(), 1);
273        // Root should be hash of the single item (doubled for branch)
274    }
275
276    #[test]
277    fn test_merkle_tree_empty_fails() {
278        let items: Vec<&str> = vec![];
279        let result = MerkleTree::from_items(&items, HashAlgorithm::Sha256);
280        assert!(result.is_err());
281    }
282
283    #[test]
284    fn test_merkle_tree_deterministic() {
285        let items = vec!["a", "b", "c", "d"];
286        let tree1 = MerkleTree::from_items(&items, HashAlgorithm::Sha256).unwrap();
287        let tree2 = MerkleTree::from_items(&items, HashAlgorithm::Sha256).unwrap();
288
289        assert_eq!(tree1.root_hash(), tree2.root_hash());
290    }
291
292    #[test]
293    fn test_merkle_tree_changes_with_content() {
294        let items1 = vec!["a", "b", "c", "d"];
295        let items2 = vec!["a", "b", "c", "e"];
296
297        let tree1 = MerkleTree::from_items(&items1, HashAlgorithm::Sha256).unwrap();
298        let tree2 = MerkleTree::from_items(&items2, HashAlgorithm::Sha256).unwrap();
299
300        assert_ne!(tree1.root_hash(), tree2.root_hash());
301    }
302
303    #[test]
304    fn test_generate_proof() {
305        let items = vec!["item0", "item1", "item2", "item3"];
306        let tree = MerkleTree::from_items(&items, HashAlgorithm::Sha256).unwrap();
307
308        let proof = tree.prove(2).unwrap();
309        assert_eq!(proof.index, 2);
310        assert!(!proof.path.is_empty());
311    }
312
313    #[test]
314    fn test_proof_out_of_bounds() {
315        let items = vec!["a", "b"];
316        let tree = MerkleTree::from_items(&items, HashAlgorithm::Sha256).unwrap();
317
318        let result = tree.prove(5);
319        assert!(result.is_err());
320    }
321
322    #[test]
323    fn test_merkle_node_leaf() {
324        let hash = Hasher::hash(HashAlgorithm::Sha256, b"test");
325        let node = MerkleNode::leaf(hash.clone());
326
327        assert!(node.is_leaf());
328        assert_eq!(node.hash, hash);
329        assert!(node.left.is_none());
330        assert!(node.right.is_none());
331    }
332
333    #[test]
334    fn test_merkle_node_branch() {
335        let left = MerkleNode::leaf(Hasher::hash(HashAlgorithm::Sha256, b"left"));
336        let right = MerkleNode::leaf(Hasher::hash(HashAlgorithm::Sha256, b"right"));
337        let branch = MerkleNode::branch(left.clone(), right.clone(), HashAlgorithm::Sha256);
338
339        assert!(!branch.is_leaf());
340        assert!(branch.left.is_some());
341        assert!(branch.right.is_some());
342        // Branch hash should differ from children
343        assert_ne!(branch.hash, left.hash);
344        assert_ne!(branch.hash, right.hash);
345    }
346
347    #[test]
348    fn test_merkle_tree_from_hashes() {
349        let hashes = vec![
350            Hasher::hash(HashAlgorithm::Sha256, b"a"),
351            Hasher::hash(HashAlgorithm::Sha256, b"b"),
352            Hasher::hash(HashAlgorithm::Sha256, b"c"),
353            Hasher::hash(HashAlgorithm::Sha256, b"d"),
354        ];
355        let tree = MerkleTree::from_hashes(&hashes, HashAlgorithm::Sha256).unwrap();
356
357        assert_eq!(tree.leaf_count(), 4);
358        assert!(!tree.root_hash().is_pending());
359    }
360
361    #[test]
362    fn test_merkle_tree_from_hashes_empty_fails() {
363        let hashes: Vec<DocumentId> = vec![];
364        let result = MerkleTree::from_hashes(&hashes, HashAlgorithm::Sha256);
365        assert!(result.is_err());
366    }
367
368    #[test]
369    fn test_merkle_tree_serialization_roundtrip() {
370        let items = vec!["a", "b", "c", "d"];
371        let tree = MerkleTree::from_items(&items, HashAlgorithm::Sha256).unwrap();
372
373        let json = serde_json::to_string(&tree).unwrap();
374        let deserialized: MerkleTree = serde_json::from_str(&json).unwrap();
375
376        assert_eq!(tree.root_hash(), deserialized.root_hash());
377        assert_eq!(tree.leaf_count(), deserialized.leaf_count());
378        assert_eq!(tree.algorithm(), deserialized.algorithm());
379    }
380
381    #[test]
382    fn test_merkle_tree_different_algorithms() {
383        let items = vec!["test1", "test2"];
384        let tree_sha256 = MerkleTree::from_items(&items, HashAlgorithm::Sha256).unwrap();
385        let tree_sha384 = MerkleTree::from_items(&items, HashAlgorithm::Sha384).unwrap();
386        let tree_sha512 = MerkleTree::from_items(&items, HashAlgorithm::Sha512).unwrap();
387
388        // Different algorithms produce different root hashes
389        assert_ne!(tree_sha256.root_hash(), tree_sha384.root_hash());
390        assert_ne!(tree_sha256.root_hash(), tree_sha512.root_hash());
391        assert_ne!(tree_sha384.root_hash(), tree_sha512.root_hash());
392
393        // Algorithm is correctly stored
394        assert_eq!(tree_sha256.algorithm(), HashAlgorithm::Sha256);
395        assert_eq!(tree_sha384.algorithm(), HashAlgorithm::Sha384);
396        assert_eq!(tree_sha512.algorithm(), HashAlgorithm::Sha512);
397    }
398
399    #[test]
400    fn test_merkle_tree_large() {
401        // Test with 100 items to ensure tree builds correctly at scale
402        let items: Vec<String> = (0..100).map(|i| format!("item{i}")).collect();
403        let tree = MerkleTree::from_items(&items, HashAlgorithm::Sha256).unwrap();
404
405        assert_eq!(tree.leaf_count(), 100);
406
407        // Every index should be provable
408        for i in 0..100 {
409            let proof = tree.prove(i);
410            assert!(proof.is_ok(), "Failed to generate proof for index {i}");
411        }
412    }
413
414    #[test]
415    fn test_merkle_tree_two_items() {
416        let items = vec!["left", "right"];
417        let tree = MerkleTree::from_items(&items, HashAlgorithm::Sha256).unwrap();
418
419        assert_eq!(tree.leaf_count(), 2);
420        assert!(!tree.root().is_leaf());
421
422        // Both proofs should work
423        let proof0 = tree.prove(0).unwrap();
424        let proof1 = tree.prove(1).unwrap();
425
426        // Each proof should have one sibling
427        assert_eq!(proof0.path.len(), 1);
428        assert_eq!(proof1.path.len(), 1);
429    }
430
431    #[test]
432    fn test_merkle_tree_root_accessor() {
433        let items = vec!["a", "b"];
434        let tree = MerkleTree::from_items(&items, HashAlgorithm::Sha256).unwrap();
435
436        let root = tree.root();
437        assert!(!root.is_leaf());
438        assert_eq!(&root.hash, tree.root_hash());
439    }
440}