Skip to main content

cp_sync/
merkle.rs

1//! Merkle tree for cognitive state
2
3use cp_core::Result;
4
5/// Merkle tree for computing state roots
6pub struct MerkleTree {
7    /// Root hash of the tree
8    root: Option<[u8; 32]>,
9    /// Leaf hashes
10    leaves: Vec<[u8; 32]>,
11}
12
13impl MerkleTree {
14    /// Create an empty Merkle tree
15    pub fn new() -> Self {
16        Self {
17            root: None,
18            leaves: Vec::new(),
19        }
20    }
21
22    /// Add a leaf to the tree
23    pub fn add_leaf(&mut self, data: &[u8]) {
24        let hash = blake3::hash(data);
25        self.leaves.push(*hash.as_bytes());
26        self.root = None; // Invalidate cached root
27    }
28
29    /// Compute the root hash
30    pub fn root(&mut self) -> [u8; 32] {
31        if let Some(root) = self.root {
32            return root;
33        }
34
35        if self.leaves.is_empty() {
36            return [0u8; 32];
37        }
38
39        let root = self.compute_root(&self.leaves);
40        self.root = Some(root);
41        root
42    }
43
44    /// Recursively compute root from leaves
45    fn compute_root(&self, hashes: &[[u8; 32]]) -> [u8; 32] {
46        if hashes.is_empty() {
47            return [0u8; 32];
48        }
49        if hashes.len() == 1 {
50            return hashes[0];
51        }
52
53        let mut next_level = Vec::with_capacity((hashes.len() + 1) / 2);
54
55        for chunk in hashes.chunks(2) {
56            let mut hasher = blake3::Hasher::new();
57            hasher.update(&chunk[0]);
58            if chunk.len() > 1 {
59                hasher.update(&chunk[1]);
60            } else {
61                // Duplicate last hash if odd number
62                hasher.update(&chunk[0]);
63            }
64            next_level.push(*hasher.finalize().as_bytes());
65        }
66
67        self.compute_root(&next_level)
68    }
69
70    /// Get number of leaves
71    pub fn len(&self) -> usize {
72        self.leaves.len()
73    }
74
75    /// Check if tree is empty
76    pub fn is_empty(&self) -> bool {
77        self.leaves.is_empty()
78    }
79
80    /// Clear all leaves
81    pub fn clear(&mut self) {
82        self.leaves.clear();
83        self.root = None;
84    }
85
86    /// Generate a Merkle inclusion proof for the leaf at the given index.
87    ///
88    /// Returns a vector of sibling hashes from leaf to root. Each entry is
89    /// (sibling_hash, is_left) where is_left indicates the sibling is on the
90    /// left side of the pair.
91    pub fn generate_proof(&mut self, leaf_index: usize) -> Option<Vec<([u8; 32], bool)>> {
92        if leaf_index >= self.leaves.len() {
93            return None;
94        }
95
96        // Ensure root is computed (builds the tree)
97        let _ = self.root();
98
99        let mut proof = Vec::new();
100        let mut level = self.leaves.clone();
101        let mut index = leaf_index;
102
103        while level.len() > 1 {
104            let sibling_index = if index % 2 == 0 {
105                // Even index: sibling is to the right
106                if index + 1 < level.len() {
107                    index + 1
108                } else {
109                    index // odd number of nodes, duplicate self
110                }
111            } else {
112                // Odd index: sibling is to the left
113                index - 1
114            };
115
116            let is_left = sibling_index < index;
117            proof.push((level[sibling_index], is_left));
118
119            // Build next level
120            let mut next_level = Vec::with_capacity((level.len() + 1) / 2);
121            for chunk in level.chunks(2) {
122                let mut hasher = blake3::Hasher::new();
123                hasher.update(&chunk[0]);
124                if chunk.len() > 1 {
125                    hasher.update(&chunk[1]);
126                } else {
127                    hasher.update(&chunk[0]);
128                }
129                next_level.push(*hasher.finalize().as_bytes());
130            }
131
132            index /= 2;
133            level = next_level;
134        }
135
136        Some(proof)
137    }
138
139    /// Verify a Merkle inclusion proof for a given leaf hash against a root.
140    ///
141    /// `proof` is a sequence of (sibling_hash, is_left) from leaf to root.
142    pub fn verify_proof(
143        leaf_hash: &[u8; 32],
144        proof: &[([u8; 32], bool)],
145        expected_root: &[u8; 32],
146    ) -> bool {
147        let mut current = *leaf_hash;
148
149        for (sibling, is_left) in proof {
150            let mut hasher = blake3::Hasher::new();
151            if *is_left {
152                hasher.update(sibling);
153                hasher.update(&current);
154            } else {
155                hasher.update(&current);
156                hasher.update(sibling);
157            }
158            current = *hasher.finalize().as_bytes();
159        }
160
161        current == *expected_root
162    }
163}
164
165impl Default for MerkleTree {
166    fn default() -> Self {
167        Self::new()
168    }
169}
170
171/// Compute a Merkle root from a set of items
172#[allow(dead_code)]
173pub fn compute_merkle_root<T, F>(items: &[T], hash_fn: F) -> Result<[u8; 32]>
174where
175    F: Fn(&T) -> [u8; 32],
176{
177    let mut tree = MerkleTree::new();
178    for item in items {
179        let hash = hash_fn(item);
180        tree.leaves.push(hash);
181    }
182    Ok(tree.root())
183}
184
185#[cfg(test)]
186mod tests {
187    use super::*;
188    use cp_core::{Chunk, CognitiveDiff, Document, Edge, EdgeKind, Embedding, Hlc};
189    use uuid::Uuid;
190
191    // Helper to create a simple embedding from f32 vector
192    fn create_test_embedding(chunk_id: Uuid) -> Embedding {
193        // Create a simple f32 vector for testing
194        let vector: Vec<f32> = vec![0.0; 1536];
195        Embedding::new(chunk_id, &vector, [0u8; 32], 0)
196    }
197
198    #[test]
199    fn test_empty_tree() {
200        let mut tree = MerkleTree::new();
201        assert_eq!(tree.root(), [0u8; 32]);
202    }
203
204    #[test]
205    fn test_single_leaf() {
206        let mut tree = MerkleTree::new();
207        tree.add_leaf(b"hello");
208
209        let expected = blake3::hash(b"hello");
210        assert_eq!(tree.root(), *expected.as_bytes());
211    }
212
213    #[test]
214    fn test_two_leaves() {
215        let mut tree = MerkleTree::new();
216        tree.add_leaf(b"hello");
217        tree.add_leaf(b"world");
218
219        let root = tree.root();
220        assert_ne!(root, [0u8; 32]);
221    }
222
223    #[test]
224    fn test_deterministic() {
225        let mut tree1 = MerkleTree::new();
226        tree1.add_leaf(b"a");
227        tree1.add_leaf(b"b");
228        tree1.add_leaf(b"c");
229
230        let mut tree2 = MerkleTree::new();
231        tree2.add_leaf(b"a");
232        tree2.add_leaf(b"b");
233        tree2.add_leaf(b"c");
234
235        assert_eq!(tree1.root(), tree2.root());
236    }
237
238    // Additional comprehensive tests for MerkleTree
239
240    #[test]
241    fn test_merkle_three_leaves() {
242        let mut tree = MerkleTree::new();
243        tree.add_leaf(b"first");
244        tree.add_leaf(b"second");
245        tree.add_leaf(b"third");
246
247        let root = tree.root();
248        assert_ne!(root, [0u8; 32]);
249        assert_eq!(tree.len(), 3);
250    }
251
252    #[test]
253    fn test_merkle_four_leaves() {
254        let mut tree = MerkleTree::new();
255        tree.add_leaf(b"one");
256        tree.add_leaf(b"two");
257        tree.add_leaf(b"three");
258        tree.add_leaf(b"four");
259
260        let root = tree.root();
261        assert_ne!(root, [0u8; 32]);
262        assert_eq!(tree.len(), 4);
263    }
264
265    #[test]
266    fn test_merkle_many_leaves() {
267        let mut tree = MerkleTree::new();
268        for i in 0..100 {
269            tree.add_leaf(format!("item{}", i).as_bytes());
270        }
271
272        let root = tree.root();
273        assert_ne!(root, [0u8; 32]);
274        assert_eq!(tree.len(), 100);
275        assert!(!tree.is_empty());
276    }
277
278    #[test]
279    fn test_merkle_clear() {
280        let mut tree = MerkleTree::new();
281        tree.add_leaf(b"test");
282        assert_eq!(tree.len(), 1);
283
284        tree.clear();
285        assert_eq!(tree.len(), 0);
286        assert!(tree.is_empty());
287        assert_eq!(tree.root(), [0u8; 32]);
288    }
289
290    #[test]
291    fn test_merkle_root_caching() {
292        let mut tree = MerkleTree::new();
293        tree.add_leaf(b"data");
294
295        let root1 = tree.root();
296        let root2 = tree.root();
297
298        // Should return cached root
299        assert_eq!(root1, root2);
300    }
301
302    #[test]
303    fn test_merkle_root_invalidated_on_new_leaf() {
304        let mut tree = MerkleTree::new();
305        tree.add_leaf(b"first");
306
307        let root1 = tree.root();
308
309        // Adding new leaf should invalidate cache
310        tree.add_leaf(b"second");
311
312        let root2 = tree.root();
313
314        assert_ne!(root1, root2);
315    }
316
317    #[test]
318    fn test_merkle_different_data_different_root() {
319        let mut tree1 = MerkleTree::new();
320        tree1.add_leaf(b"hello");
321        tree1.add_leaf(b"world");
322
323        let mut tree2 = MerkleTree::new();
324        tree2.add_leaf(b"foo");
325        tree2.add_leaf(b"bar");
326
327        assert_ne!(tree1.root(), tree2.root());
328    }
329
330    #[test]
331    fn test_merkle_order_matters() {
332        let mut tree1 = MerkleTree::new();
333        tree1.add_leaf(b"a");
334        tree1.add_leaf(b"b");
335
336        let mut tree2 = MerkleTree::new();
337        tree2.add_leaf(b"b");
338        tree2.add_leaf(b"a");
339
340        // Different order should produce different root
341        assert_ne!(tree1.root(), tree2.root());
342    }
343
344    #[test]
345    fn test_compute_merkle_root_function() {
346        let items = vec![
347            [1u8; 32],
348            [2u8; 32],
349            [3u8; 32],
350        ];
351
352        let root = compute_merkle_root(&items, |item| *item).unwrap();
353        assert_ne!(root, [0u8; 32]);
354    }
355
356    #[test]
357    fn test_compute_merkle_root_empty() {
358        let items: Vec<[u8; 32]> = vec![];
359        let root = compute_merkle_root(&items, |item| *item).unwrap();
360        assert_eq!(root, [0u8; 32]);
361    }
362
363    #[test]
364    fn test_compute_merkle_root_single_item() {
365        let items = vec![[42u8; 32]];
366        let root = compute_merkle_root(&items, |item| *item).unwrap();
367        assert_eq!(root, [42u8; 32]);
368    }
369
370    // Tests for diff-related functionality using CognitiveDiff
371
372    #[test]
373    fn test_merkle_diff_empty() {
374        // Test that an empty diff produces expected results
375        let diff = CognitiveDiff::empty(
376            [0u8; 32],
377            Uuid::nil(),
378            0,
379            Hlc::new(0, [0u8; 16]),
380        );
381
382        assert!(diff.is_empty());
383        assert_eq!(diff.change_count(), 0);
384    }
385
386    #[test]
387    fn test_merkle_diff_document_added() {
388        let mut diff = CognitiveDiff::empty(
389            [0u8; 32],
390            Uuid::nil(),
391            0,
392            Hlc::new(0, [0u8; 16]),
393        );
394
395        let doc = Document::new(
396            std::path::PathBuf::from("test.md"),
397            b"test content",
398            0,
399        );
400        diff.added_docs.push(doc);
401
402        assert!(!diff.is_empty());
403        assert_eq!(diff.change_count(), 1);
404        assert_eq!(diff.added_docs.len(), 1);
405    }
406
407    #[test]
408    fn test_merkle_diff_document_modified() {
409        // Document modification is modeled as remove + add
410        let mut diff = CognitiveDiff::empty(
411            [0u8; 32],
412            Uuid::nil(),
413            0,
414            Hlc::new(0, [0u8; 16]),
415        );
416
417        let old_doc_id = Uuid::new_v4();
418        diff.removed_doc_ids.push(old_doc_id);
419
420        let new_doc = Document::new(
421            std::path::PathBuf::from("test.md"),
422            b"updated content",
423            0,
424        );
425        diff.added_docs.push(new_doc);
426
427        assert!(!diff.is_empty());
428        assert_eq!(diff.change_count(), 2);
429    }
430
431    #[test]
432    fn test_merkle_diff_document_deleted() {
433        let mut diff = CognitiveDiff::empty(
434            [0u8; 32],
435            Uuid::nil(),
436            0,
437            Hlc::new(0, [0u8; 16]),
438        );
439
440        let doc_id = Uuid::new_v4();
441        diff.removed_doc_ids.push(doc_id);
442
443        assert!(!diff.is_empty());
444        assert_eq!(diff.change_count(), 1);
445        assert_eq!(diff.removed_doc_ids.len(), 1);
446    }
447
448    #[test]
449    fn test_merkle_diff_chunk_changes() {
450        let mut diff = CognitiveDiff::empty(
451            [0u8; 32],
452            Uuid::nil(),
453            0,
454            Hlc::new(0, [0u8; 16]),
455        );
456
457        let doc_id = Uuid::new_v4();
458        let chunk = Chunk::new(
459            doc_id,
460            "test chunk content".to_string(),
461            0,
462            0,
463        );
464        diff.added_chunks.push(chunk);
465
466        let removed_chunk_id = Uuid::new_v4();
467        diff.removed_chunk_ids.push(removed_chunk_id);
468
469        assert_eq!(diff.change_count(), 2);
470        assert_eq!(diff.added_chunks.len(), 1);
471        assert_eq!(diff.removed_chunk_ids.len(), 1);
472    }
473
474    #[test]
475    fn test_merkle_diff_embedding_changes() {
476        let mut diff = CognitiveDiff::empty(
477            [0u8; 32],
478            Uuid::nil(),
479            0,
480            Hlc::new(0, [0u8; 16]),
481        );
482
483        let chunk_id = Uuid::new_v4();
484        let embedding = create_test_embedding(chunk_id);
485        diff.added_embeddings.push(embedding);
486
487        let removed_embedding_id = Uuid::new_v4();
488        diff.removed_embedding_ids.push(removed_embedding_id);
489
490        assert_eq!(diff.change_count(), 2);
491    }
492
493    #[test]
494    fn test_merkle_diff_edge_changes() {
495        let mut diff = CognitiveDiff::empty(
496            [0u8; 32],
497            Uuid::nil(),
498            0,
499            Hlc::new(0, [0u8; 16]),
500        );
501
502        let edge = Edge::new(
503            Uuid::new_v4(),
504            Uuid::new_v4(),
505            EdgeKind::DocToChunk,
506        );
507        diff.added_edges.push(edge);
508
509        let removed_edge = (
510            Uuid::new_v4(),
511            Uuid::new_v4(),
512            EdgeKind::ChunkToEmbedding,
513        );
514        diff.removed_edges.push(removed_edge);
515
516        assert_eq!(diff.change_count(), 2);
517    }
518
519    #[test]
520    fn test_merkle_diff_multiple_changes() {
521        let mut diff = CognitiveDiff::empty(
522            [0u8; 32],
523            Uuid::nil(),
524            0,
525            Hlc::new(0, [0u8; 16]),
526        );
527
528        // Add document
529        let doc = Document::new(
530            std::path::PathBuf::from("test.md"),
531            b"content",
532            0,
533        );
534        diff.added_docs.push(doc);
535
536        // Add chunk
537        let chunk = Chunk::new(
538            Uuid::new_v4(),
539            "content".to_string(),
540            0,
541            0,
542        );
543        diff.added_chunks.push(chunk);
544
545        // Add embedding
546        let embedding = create_test_embedding(Uuid::new_v4());
547        diff.added_embeddings.push(embedding);
548
549        // Add edge
550        let edge = Edge::new(
551            Uuid::new_v4(),
552            Uuid::new_v4(),
553            EdgeKind::DocToChunk,
554        );
555        diff.added_edges.push(edge);
556
557        assert_eq!(diff.change_count(), 4);
558    }
559
560    #[test]
561    fn test_merkle_diff_serialization() {
562        use crate::{serialize_diff, deserialize_diff};
563
564        let diff = CognitiveDiff::empty(
565            [0u8; 32],
566            Uuid::nil(),
567            0,
568            Hlc::new(0, [0u8; 16]),
569        );
570
571        let serialized = serialize_diff(&diff).unwrap();
572        assert!(!serialized.is_empty());
573
574        let deserialized = deserialize_diff(&serialized).unwrap();
575        assert_eq!(diff.metadata, deserialized.metadata);
576    }
577
578    #[test]
579    fn test_merkle_diff_serialization_with_content() {
580        use crate::{serialize_diff, deserialize_diff};
581
582        let mut diff = CognitiveDiff::empty(
583            [0u8; 32],
584            Uuid::nil(),
585            0,
586            Hlc::new(0, [0u8; 16]),
587        );
588
589        let doc = Document::new(
590            std::path::PathBuf::from("test.md"),
591            b"Test content",
592            0,
593        );
594        diff.added_docs.push(doc);
595
596        let serialized = serialize_diff(&diff).unwrap();
597        let deserialized = deserialize_diff(&serialized).unwrap();
598
599        assert_eq!(diff.added_docs.len(), deserialized.added_docs.len());
600        assert_eq!(diff.added_docs[0].path, deserialized.added_docs[0].path);
601    }
602
603    #[test]
604    fn test_merkle_diff_estimated_size() {
605        let diff = CognitiveDiff::empty(
606            [0u8; 32],
607            Uuid::nil(),
608            0,
609            Hlc::new(0, [0u8; 16]),
610        );
611
612        let size = diff.estimated_size();
613        assert!(size > 0);
614    }
615
616    #[test]
617    fn test_merkle_diff_apply_order() {
618        // Tests that changes are tracked in the correct order
619        let mut diff = CognitiveDiff::empty(
620            [0u8; 32],
621            Uuid::nil(),
622            0,
623            Hlc::new(0, [0u8; 16]),
624        );
625
626        // Add in specific order
627        for i in 0..5 {
628            let doc = Document::new(
629                std::path::PathBuf::from(format!("doc{}.md", i)),
630                format!("content{}", i).as_bytes(),
631                0,
632            );
633            diff.added_docs.push(doc);
634        }
635
636        assert_eq!(diff.added_docs.len(), 5);
637        // Verify order is preserved
638        for (i, doc) in diff.added_docs.iter().enumerate() {
639            assert!(doc.path.to_string_lossy().contains(&format!("{}", i)));
640        }
641    }
642
643    #[test]
644    fn test_merkle_diff_idempotent() {
645        // Test that adding the same item twice creates a diff that would
646        // contain both additions (idempotency is handled at application time)
647        let mut diff = CognitiveDiff::empty(
648            [0u8; 32],
649            Uuid::nil(),
650            0,
651            Hlc::new(0, [0u8; 16]),
652        );
653
654        let doc = Document::new(
655            std::path::PathBuf::from("test.md"),
656            b"content",
657            0,
658        );
659
660        // Add same document type twice (in real use, this would be deduplicated at apply time)
661        diff.added_docs.push(doc.clone());
662        diff.added_docs.push(doc);
663
664        assert_eq!(diff.change_count(), 2);
665    }
666
667    #[test]
668    fn test_merkle_comparison_using_blake3() {
669        // Test using Merkle tree to compare two states
670        let mut old_tree = MerkleTree::new();
671        old_tree.add_leaf(b"document1");
672        old_tree.add_leaf(b"document2");
673
674        let mut new_tree = MerkleTree::new();
675        new_tree.add_leaf(b"document1");
676        new_tree.add_leaf(b"document2");
677        new_tree.add_leaf(b"document3"); // New document
678
679        let old_root = old_tree.root();
680        let new_root = new_tree.root();
681
682        // Roots should be different because states are different
683        assert_ne!(old_root, new_root);
684    }
685
686    #[test]
687    fn test_merkle_proof_single_leaf() {
688        let mut tree = MerkleTree::new();
689        tree.add_leaf(b"only");
690
691        let root = tree.root();
692        let proof = tree.generate_proof(0).unwrap();
693        assert!(proof.is_empty(), "Single leaf proof should be empty");
694
695        let leaf_hash = blake3::hash(b"only");
696        assert!(MerkleTree::verify_proof(leaf_hash.as_bytes(), &proof, &root));
697    }
698
699    #[test]
700    fn test_merkle_proof_two_leaves() {
701        let mut tree = MerkleTree::new();
702        tree.add_leaf(b"left");
703        tree.add_leaf(b"right");
704
705        let root = tree.root();
706
707        // Proof for leaf 0
708        let proof0 = tree.generate_proof(0).unwrap();
709        let leaf0 = blake3::hash(b"left");
710        assert!(MerkleTree::verify_proof(leaf0.as_bytes(), &proof0, &root));
711
712        // Proof for leaf 1
713        let proof1 = tree.generate_proof(1).unwrap();
714        let leaf1 = blake3::hash(b"right");
715        assert!(MerkleTree::verify_proof(leaf1.as_bytes(), &proof1, &root));
716    }
717
718    #[test]
719    fn test_merkle_proof_many_leaves() {
720        let mut tree = MerkleTree::new();
721        let data: Vec<Vec<u8>> = (0..17).map(|i| format!("leaf_{}", i).into_bytes()).collect();
722        for d in &data {
723            tree.add_leaf(d);
724        }
725
726        let root = tree.root();
727
728        for (i, d) in data.iter().enumerate() {
729            let proof = tree.generate_proof(i).unwrap();
730            let leaf_hash = blake3::hash(d);
731            assert!(
732                MerkleTree::verify_proof(leaf_hash.as_bytes(), &proof, &root),
733                "Proof failed for leaf {}",
734                i
735            );
736        }
737    }
738
739    #[test]
740    fn test_merkle_proof_wrong_leaf_fails() {
741        let mut tree = MerkleTree::new();
742        tree.add_leaf(b"a");
743        tree.add_leaf(b"b");
744        tree.add_leaf(b"c");
745
746        let root = tree.root();
747
748        let proof = tree.generate_proof(0).unwrap();
749        let wrong_leaf = blake3::hash(b"wrong");
750        assert!(!MerkleTree::verify_proof(wrong_leaf.as_bytes(), &proof, &root));
751    }
752
753    #[test]
754    fn test_merkle_proof_out_of_bounds() {
755        let mut tree = MerkleTree::new();
756        tree.add_leaf(b"a");
757        assert!(tree.generate_proof(1).is_none());
758        assert!(tree.generate_proof(100).is_none());
759    }
760
761    #[test]
762    fn test_merkle_prove_change_detection() {
763        // Test that Merkle tree can detect changes
764        let mut tree1 = MerkleTree::new();
765        tree1.add_leaf(b"a");
766        tree1.add_leaf(b"b");
767
768        let mut tree2 = MerkleTree::new();
769        tree2.add_leaf(b"a");
770        tree2.add_leaf(b"c"); // Changed from b to c
771
772        assert_ne!(tree1.root(), tree2.root());
773    }
774}