Skip to main content

cp_sync/
merkle.rs

1//! Merkle tree for cognitive state
2
3/// Merkle tree for computing state roots
4pub struct MerkleTree {
5    /// Root hash of the tree
6    root: Option<[u8; 32]>,
7    /// Leaf hashes
8    leaves: Vec<[u8; 32]>,
9}
10
11impl MerkleTree {
12    /// Create an empty Merkle tree
13    pub fn new() -> Self {
14        Self {
15            root: None,
16            leaves: Vec::new(),
17        }
18    }
19
20    /// Add a leaf to the tree
21    pub fn add_leaf(&mut self, data: &[u8]) {
22        let hash = blake3::hash(data);
23        self.leaves.push(*hash.as_bytes());
24        self.root = None; // Invalidate cached root
25    }
26
27    /// Compute the root hash
28    pub fn root(&mut self) -> [u8; 32] {
29        if let Some(root) = self.root {
30            return root;
31        }
32
33        if self.leaves.is_empty() {
34            return [0u8; 32];
35        }
36
37        let root = Self::compute_root(&self.leaves);
38        self.root = Some(root);
39        root
40    }
41
42    /// Recursively compute root from leaves
43    fn compute_root(hashes: &[[u8; 32]]) -> [u8; 32] {
44        if hashes.is_empty() {
45            return [0u8; 32];
46        }
47        if hashes.len() == 1 {
48            return hashes[0];
49        }
50
51        let mut next_level = Vec::with_capacity(hashes.len().div_ceil(2));
52
53        for chunk in hashes.chunks(2) {
54            let mut hasher = blake3::Hasher::new();
55            hasher.update(&chunk[0]);
56            if chunk.len() > 1 {
57                hasher.update(&chunk[1]);
58            } else {
59                // Duplicate last hash if odd number
60                hasher.update(&chunk[0]);
61            }
62            next_level.push(*hasher.finalize().as_bytes());
63        }
64
65        Self::compute_root(&next_level)
66    }
67
68    /// Get number of leaves
69    pub fn len(&self) -> usize {
70        self.leaves.len()
71    }
72
73    /// Check if tree is empty
74    pub fn is_empty(&self) -> bool {
75        self.leaves.is_empty()
76    }
77
78    /// Clear all leaves
79    pub fn clear(&mut self) {
80        self.leaves.clear();
81        self.root = None;
82    }
83
84    /// Generate a Merkle inclusion proof for the leaf at the given index.
85    ///
86    /// Returns a vector of sibling hashes from leaf to root. Each entry is
87    /// (`sibling_hash`, `is_left`) where `is_left` indicates the sibling is on the
88    /// left side of the pair.
89    pub fn generate_proof(&mut self, leaf_index: usize) -> Option<Vec<([u8; 32], bool)>> {
90        if leaf_index >= self.leaves.len() {
91            return None;
92        }
93
94        // Ensure root is computed (builds the tree)
95        let _ = self.root();
96
97        let mut proof = Vec::new();
98        let mut level = self.leaves.clone();
99        let mut index = leaf_index;
100
101        while level.len() > 1 {
102            let sibling_index = if index.is_multiple_of(2) {
103                // Even index: sibling is to the right
104                if index + 1 < level.len() {
105                    index + 1
106                } else {
107                    index // odd number of nodes, duplicate self
108                }
109            } else {
110                // Odd index: sibling is to the left
111                index - 1
112            };
113
114            let is_left = sibling_index < index;
115            proof.push((level[sibling_index], is_left));
116
117            // Build next level
118            let mut next_level = Vec::with_capacity(level.len().div_ceil(2));
119            for chunk in level.chunks(2) {
120                let mut hasher = blake3::Hasher::new();
121                hasher.update(&chunk[0]);
122                if chunk.len() > 1 {
123                    hasher.update(&chunk[1]);
124                } else {
125                    hasher.update(&chunk[0]);
126                }
127                next_level.push(*hasher.finalize().as_bytes());
128            }
129
130            index /= 2;
131            level = next_level;
132        }
133
134        Some(proof)
135    }
136
137    /// Verify a Merkle inclusion proof for a given leaf hash against a root.
138    ///
139    /// `proof` is a sequence of (`sibling_hash`, `is_left`) from leaf to root.
140    pub fn verify_proof(
141        leaf_hash: &[u8; 32],
142        proof: &[([u8; 32], bool)],
143        expected_root: &[u8; 32],
144    ) -> bool {
145        let mut current = *leaf_hash;
146
147        for (sibling, is_left) in proof {
148            let mut hasher = blake3::Hasher::new();
149            if *is_left {
150                hasher.update(sibling);
151                hasher.update(&current);
152            } else {
153                hasher.update(&current);
154                hasher.update(sibling);
155            }
156            current = *hasher.finalize().as_bytes();
157        }
158
159        current == *expected_root
160    }
161}
162
163impl Default for MerkleTree {
164    fn default() -> Self {
165        Self::new()
166    }
167}
168
169/// Compute a Merkle root from a set of items (used in tests)
170#[cfg(test)]
171pub fn compute_merkle_root<T, F>(items: &[T], hash_fn: F) -> [u8; 32]
172where
173    F: Fn(&T) -> [u8; 32],
174{
175    let mut tree = MerkleTree::new();
176    for item in items {
177        let hash = hash_fn(item);
178        tree.leaves.push(hash);
179    }
180    tree.root()
181}
182
183#[cfg(test)]
184mod tests {
185    use super::*;
186    use cp_core::{Chunk, CognitiveDiff, Document, Edge, EdgeKind, Embedding, Hlc};
187    use uuid::Uuid;
188
189    // Helper to create a simple embedding from f32 vector
190    fn create_test_embedding(chunk_id: Uuid) -> Embedding {
191        // Create a simple f32 vector for testing
192        let vector: Vec<f32> = vec![0.0; 1536];
193        Embedding::new(chunk_id, &vector, [0u8; 32], 0)
194    }
195
196    #[test]
197    fn test_empty_tree() {
198        let mut tree = MerkleTree::new();
199        assert_eq!(tree.root(), [0u8; 32]);
200    }
201
202    #[test]
203    fn test_single_leaf() {
204        let mut tree = MerkleTree::new();
205        tree.add_leaf(b"hello");
206
207        let expected = blake3::hash(b"hello");
208        assert_eq!(tree.root(), *expected.as_bytes());
209    }
210
211    #[test]
212    fn test_two_leaves() {
213        let mut tree = MerkleTree::new();
214        tree.add_leaf(b"hello");
215        tree.add_leaf(b"world");
216
217        let root = tree.root();
218        assert_ne!(root, [0u8; 32]);
219    }
220
221    #[test]
222    fn test_deterministic() {
223        let mut tree1 = MerkleTree::new();
224        tree1.add_leaf(b"a");
225        tree1.add_leaf(b"b");
226        tree1.add_leaf(b"c");
227
228        let mut tree2 = MerkleTree::new();
229        tree2.add_leaf(b"a");
230        tree2.add_leaf(b"b");
231        tree2.add_leaf(b"c");
232
233        assert_eq!(tree1.root(), tree2.root());
234    }
235
236    // Additional comprehensive tests for MerkleTree
237
238    #[test]
239    fn test_merkle_three_leaves() {
240        let mut tree = MerkleTree::new();
241        tree.add_leaf(b"first");
242        tree.add_leaf(b"second");
243        tree.add_leaf(b"third");
244
245        let root = tree.root();
246        assert_ne!(root, [0u8; 32]);
247        assert_eq!(tree.len(), 3);
248    }
249
250    #[test]
251    fn test_merkle_four_leaves() {
252        let mut tree = MerkleTree::new();
253        tree.add_leaf(b"one");
254        tree.add_leaf(b"two");
255        tree.add_leaf(b"three");
256        tree.add_leaf(b"four");
257
258        let root = tree.root();
259        assert_ne!(root, [0u8; 32]);
260        assert_eq!(tree.len(), 4);
261    }
262
263    #[test]
264    fn test_merkle_many_leaves() {
265        let mut tree = MerkleTree::new();
266        for i in 0..100 {
267            tree.add_leaf(format!("item{i}").as_bytes());
268        }
269
270        let root = tree.root();
271        assert_ne!(root, [0u8; 32]);
272        assert_eq!(tree.len(), 100);
273        assert!(!tree.is_empty());
274    }
275
276    #[test]
277    fn test_merkle_clear() {
278        let mut tree = MerkleTree::new();
279        tree.add_leaf(b"test");
280        assert_eq!(tree.len(), 1);
281
282        tree.clear();
283        assert_eq!(tree.len(), 0);
284        assert!(tree.is_empty());
285        assert_eq!(tree.root(), [0u8; 32]);
286    }
287
288    #[test]
289    fn test_merkle_root_caching() {
290        let mut tree = MerkleTree::new();
291        tree.add_leaf(b"data");
292
293        let root1 = tree.root();
294        let root2 = tree.root();
295
296        // Should return cached root
297        assert_eq!(root1, root2);
298    }
299
300    #[test]
301    fn test_merkle_root_invalidated_on_new_leaf() {
302        let mut tree = MerkleTree::new();
303        tree.add_leaf(b"first");
304
305        let root1 = tree.root();
306
307        // Adding new leaf should invalidate cache
308        tree.add_leaf(b"second");
309
310        let root2 = tree.root();
311
312        assert_ne!(root1, root2);
313    }
314
315    #[test]
316    fn test_merkle_different_data_different_root() {
317        let mut tree1 = MerkleTree::new();
318        tree1.add_leaf(b"hello");
319        tree1.add_leaf(b"world");
320
321        let mut tree2 = MerkleTree::new();
322        tree2.add_leaf(b"foo");
323        tree2.add_leaf(b"bar");
324
325        assert_ne!(tree1.root(), tree2.root());
326    }
327
328    #[test]
329    fn test_merkle_order_matters() {
330        let mut tree1 = MerkleTree::new();
331        tree1.add_leaf(b"a");
332        tree1.add_leaf(b"b");
333
334        let mut tree2 = MerkleTree::new();
335        tree2.add_leaf(b"b");
336        tree2.add_leaf(b"a");
337
338        // Different order should produce different root
339        assert_ne!(tree1.root(), tree2.root());
340    }
341
342    #[test]
343    fn test_compute_merkle_root_function() {
344        let items = vec![[1u8; 32], [2u8; 32], [3u8; 32]];
345
346        let root = compute_merkle_root(&items, |item| *item);
347        assert_ne!(root, [0u8; 32]);
348    }
349
350    #[test]
351    fn test_compute_merkle_root_empty() {
352        let items: Vec<[u8; 32]> = vec![];
353        let root = compute_merkle_root(&items, |item| *item);
354        assert_eq!(root, [0u8; 32]);
355    }
356
357    #[test]
358    fn test_compute_merkle_root_single_item() {
359        let items = vec![[42u8; 32]];
360        let root = compute_merkle_root(&items, |item| *item);
361        assert_eq!(root, [42u8; 32]);
362    }
363
364    // Tests for diff-related functionality using CognitiveDiff
365
366    #[test]
367    fn test_merkle_diff_empty() {
368        // Test that an empty diff produces expected results
369        let diff = CognitiveDiff::empty([0u8; 32], Uuid::nil(), 0, Hlc::new(0, [0u8; 16]));
370
371        assert!(diff.is_empty());
372        assert_eq!(diff.change_count(), 0);
373    }
374
375    #[test]
376    fn test_merkle_diff_document_added() {
377        let mut diff = CognitiveDiff::empty([0u8; 32], Uuid::nil(), 0, Hlc::new(0, [0u8; 16]));
378
379        let doc = Document::new(std::path::PathBuf::from("test.md"), b"test content", 0);
380        diff.added_docs.push(doc);
381
382        assert!(!diff.is_empty());
383        assert_eq!(diff.change_count(), 1);
384        assert_eq!(diff.added_docs.len(), 1);
385    }
386
387    #[test]
388    fn test_merkle_diff_document_modified() {
389        // Document modification is modeled as remove + add
390        let mut diff = CognitiveDiff::empty([0u8; 32], Uuid::nil(), 0, Hlc::new(0, [0u8; 16]));
391
392        let old_doc_id = Uuid::new_v4();
393        diff.removed_doc_ids.push(old_doc_id);
394
395        let new_doc = Document::new(std::path::PathBuf::from("test.md"), b"updated content", 0);
396        diff.added_docs.push(new_doc);
397
398        assert!(!diff.is_empty());
399        assert_eq!(diff.change_count(), 2);
400    }
401
402    #[test]
403    fn test_merkle_diff_document_deleted() {
404        let mut diff = CognitiveDiff::empty([0u8; 32], Uuid::nil(), 0, Hlc::new(0, [0u8; 16]));
405
406        let doc_id = Uuid::new_v4();
407        diff.removed_doc_ids.push(doc_id);
408
409        assert!(!diff.is_empty());
410        assert_eq!(diff.change_count(), 1);
411        assert_eq!(diff.removed_doc_ids.len(), 1);
412    }
413
414    #[test]
415    fn test_merkle_diff_chunk_changes() {
416        let mut diff = CognitiveDiff::empty([0u8; 32], Uuid::nil(), 0, Hlc::new(0, [0u8; 16]));
417
418        let doc_id = Uuid::new_v4();
419        let chunk = Chunk::new(doc_id, "test chunk content", 0, 0);
420        diff.added_chunks.push(chunk);
421
422        let removed_chunk_id = Uuid::new_v4();
423        diff.removed_chunk_ids.push(removed_chunk_id);
424
425        assert_eq!(diff.change_count(), 2);
426        assert_eq!(diff.added_chunks.len(), 1);
427        assert_eq!(diff.removed_chunk_ids.len(), 1);
428    }
429
430    #[test]
431    fn test_merkle_diff_embedding_changes() {
432        let mut diff = CognitiveDiff::empty([0u8; 32], Uuid::nil(), 0, Hlc::new(0, [0u8; 16]));
433
434        let chunk_id = Uuid::new_v4();
435        let embedding = create_test_embedding(chunk_id);
436        diff.added_embeddings.push(embedding);
437
438        let removed_embedding_id = Uuid::new_v4();
439        diff.removed_embedding_ids.push(removed_embedding_id);
440
441        assert_eq!(diff.change_count(), 2);
442    }
443
444    #[test]
445    fn test_merkle_diff_edge_changes() {
446        let mut diff = CognitiveDiff::empty([0u8; 32], Uuid::nil(), 0, Hlc::new(0, [0u8; 16]));
447
448        let edge = Edge::new(Uuid::new_v4(), Uuid::new_v4(), EdgeKind::DocToChunk);
449        diff.added_edges.push(edge);
450
451        let removed_edge = (Uuid::new_v4(), Uuid::new_v4(), EdgeKind::ChunkToEmbedding);
452        diff.removed_edges.push(removed_edge);
453
454        assert_eq!(diff.change_count(), 2);
455    }
456
457    #[test]
458    fn test_merkle_diff_multiple_changes() {
459        let mut diff = CognitiveDiff::empty([0u8; 32], Uuid::nil(), 0, Hlc::new(0, [0u8; 16]));
460
461        // Add document
462        let doc = Document::new(std::path::PathBuf::from("test.md"), b"content", 0);
463        diff.added_docs.push(doc);
464
465        // Add chunk
466        let chunk = Chunk::new(Uuid::new_v4(), "content", 0, 0);
467        diff.added_chunks.push(chunk);
468
469        // Add embedding
470        let embedding = create_test_embedding(Uuid::new_v4());
471        diff.added_embeddings.push(embedding);
472
473        // Add edge
474        let edge = Edge::new(Uuid::new_v4(), Uuid::new_v4(), EdgeKind::DocToChunk);
475        diff.added_edges.push(edge);
476
477        assert_eq!(diff.change_count(), 4);
478    }
479
480    #[test]
481    fn test_merkle_diff_serialization() {
482        use crate::{deserialize_diff, serialize_diff};
483
484        let diff = CognitiveDiff::empty([0u8; 32], Uuid::nil(), 0, Hlc::new(0, [0u8; 16]));
485
486        let serialized = serialize_diff(&diff).unwrap();
487        assert!(!serialized.is_empty());
488
489        let deserialized = deserialize_diff(&serialized).unwrap();
490        assert_eq!(diff.metadata, deserialized.metadata);
491    }
492
493    #[test]
494    fn test_merkle_diff_serialization_with_content() {
495        use crate::{deserialize_diff, serialize_diff};
496
497        let mut diff = CognitiveDiff::empty([0u8; 32], Uuid::nil(), 0, Hlc::new(0, [0u8; 16]));
498
499        let doc = Document::new(std::path::PathBuf::from("test.md"), b"Test content", 0);
500        diff.added_docs.push(doc);
501
502        let serialized = serialize_diff(&diff).unwrap();
503        let deserialized = deserialize_diff(&serialized).unwrap();
504
505        assert_eq!(diff.added_docs.len(), deserialized.added_docs.len());
506        assert_eq!(diff.added_docs[0].path, deserialized.added_docs[0].path);
507    }
508
509    #[test]
510    fn test_merkle_diff_estimated_size() {
511        let diff = CognitiveDiff::empty([0u8; 32], Uuid::nil(), 0, Hlc::new(0, [0u8; 16]));
512
513        let size = diff.estimated_size();
514        assert!(size > 0);
515    }
516
517    #[test]
518    fn test_merkle_diff_apply_order() {
519        // Tests that changes are tracked in the correct order
520        let mut diff = CognitiveDiff::empty([0u8; 32], Uuid::nil(), 0, Hlc::new(0, [0u8; 16]));
521
522        // Add in specific order
523        for i in 0..5 {
524            let doc = Document::new(
525                std::path::PathBuf::from(format!("doc{i}.md")),
526                format!("content{i}").as_bytes(),
527                0,
528            );
529            diff.added_docs.push(doc);
530        }
531
532        assert_eq!(diff.added_docs.len(), 5);
533        // Verify order is preserved
534        for (i, doc) in diff.added_docs.iter().enumerate() {
535            assert!(doc.path.to_string_lossy().contains(&format!("{i}")));
536        }
537    }
538
539    #[test]
540    fn test_merkle_diff_idempotent() {
541        // Test that adding the same item twice creates a diff that would
542        // contain both additions (idempotency is handled at application time)
543        let mut diff = CognitiveDiff::empty([0u8; 32], Uuid::nil(), 0, Hlc::new(0, [0u8; 16]));
544
545        let doc = Document::new(std::path::PathBuf::from("test.md"), b"content", 0);
546
547        // Add same document type twice (in real use, this would be deduplicated at apply time)
548        diff.added_docs.push(doc.clone());
549        diff.added_docs.push(doc);
550
551        assert_eq!(diff.change_count(), 2);
552    }
553
554    #[test]
555    fn test_merkle_comparison_using_blake3() {
556        // Test using Merkle tree to compare two states
557        let mut old_tree = MerkleTree::new();
558        old_tree.add_leaf(b"document1");
559        old_tree.add_leaf(b"document2");
560
561        let mut new_tree = MerkleTree::new();
562        new_tree.add_leaf(b"document1");
563        new_tree.add_leaf(b"document2");
564        new_tree.add_leaf(b"document3"); // New document
565
566        let old_root = old_tree.root();
567        let new_root = new_tree.root();
568
569        // Roots should be different because states are different
570        assert_ne!(old_root, new_root);
571    }
572
573    #[test]
574    fn test_merkle_proof_single_leaf() {
575        let mut tree = MerkleTree::new();
576        tree.add_leaf(b"only");
577
578        let root = tree.root();
579        let proof = tree.generate_proof(0).unwrap();
580        assert!(proof.is_empty(), "Single leaf proof should be empty");
581
582        let leaf_hash = blake3::hash(b"only");
583        assert!(MerkleTree::verify_proof(
584            leaf_hash.as_bytes(),
585            &proof,
586            &root
587        ));
588    }
589
590    #[test]
591    fn test_merkle_proof_two_leaves() {
592        let mut tree = MerkleTree::new();
593        tree.add_leaf(b"left");
594        tree.add_leaf(b"right");
595
596        let root = tree.root();
597
598        // Proof for leaf 0
599        let proof0 = tree.generate_proof(0).unwrap();
600        let leaf0 = blake3::hash(b"left");
601        assert!(MerkleTree::verify_proof(leaf0.as_bytes(), &proof0, &root));
602
603        // Proof for leaf 1
604        let proof1 = tree.generate_proof(1).unwrap();
605        let leaf1 = blake3::hash(b"right");
606        assert!(MerkleTree::verify_proof(leaf1.as_bytes(), &proof1, &root));
607    }
608
609    #[test]
610    fn test_merkle_proof_many_leaves() {
611        let mut tree = MerkleTree::new();
612        let data: Vec<Vec<u8>> = (0..17).map(|i| format!("leaf_{i}").into_bytes()).collect();
613        for d in &data {
614            tree.add_leaf(d);
615        }
616
617        let root = tree.root();
618
619        for (i, d) in data.iter().enumerate() {
620            let proof = tree.generate_proof(i).unwrap();
621            let leaf_hash = blake3::hash(d);
622            assert!(
623                MerkleTree::verify_proof(leaf_hash.as_bytes(), &proof, &root),
624                "Proof failed for leaf {i}"
625            );
626        }
627    }
628
629    #[test]
630    fn test_merkle_proof_wrong_leaf_fails() {
631        let mut tree = MerkleTree::new();
632        tree.add_leaf(b"a");
633        tree.add_leaf(b"b");
634        tree.add_leaf(b"c");
635
636        let root = tree.root();
637
638        let proof = tree.generate_proof(0).unwrap();
639        let wrong_leaf = blake3::hash(b"wrong");
640        assert!(!MerkleTree::verify_proof(
641            wrong_leaf.as_bytes(),
642            &proof,
643            &root
644        ));
645    }
646
647    #[test]
648    fn test_merkle_proof_out_of_bounds() {
649        let mut tree = MerkleTree::new();
650        tree.add_leaf(b"a");
651        assert!(tree.generate_proof(1).is_none());
652        assert!(tree.generate_proof(100).is_none());
653    }
654
655    #[test]
656    fn test_merkle_prove_change_detection() {
657        // Test that Merkle tree can detect changes
658        let mut tree1 = MerkleTree::new();
659        tree1.add_leaf(b"a");
660        tree1.add_leaf(b"b");
661
662        let mut tree2 = MerkleTree::new();
663        tree2.add_leaf(b"a");
664        tree2.add_leaf(b"c"); // Changed from b to c
665
666        assert_ne!(tree1.root(), tree2.root());
667    }
668}