Skip to main content

cp_sync/
merkle.rs

1//! Merkle tree for cognitive state
2
3use cp_core::Result;
4
5/// Merkle tree for computing state roots
6pub struct MerkleTree {
7    /// Root hash of the tree
8    root: Option<[u8; 32]>,
9    /// Leaf hashes
10    leaves: Vec<[u8; 32]>,
11}
12
13impl MerkleTree {
14    /// Create an empty Merkle tree
15    pub fn new() -> Self {
16        Self {
17            root: None,
18            leaves: Vec::new(),
19        }
20    }
21
22    /// Add a leaf to the tree
23    pub fn add_leaf(&mut self, data: &[u8]) {
24        let hash = blake3::hash(data);
25        self.leaves.push(*hash.as_bytes());
26        self.root = None; // Invalidate cached root
27    }
28
29    /// Compute the root hash
30    pub fn root(&mut self) -> [u8; 32] {
31        if let Some(root) = self.root {
32            return root;
33        }
34
35        if self.leaves.is_empty() {
36            return [0u8; 32];
37        }
38
39        let root = self.compute_root(&self.leaves);
40        self.root = Some(root);
41        root
42    }
43
44    /// Recursively compute root from leaves
45    fn compute_root(&self, hashes: &[[u8; 32]]) -> [u8; 32] {
46        if hashes.is_empty() {
47            return [0u8; 32];
48        }
49        if hashes.len() == 1 {
50            return hashes[0];
51        }
52
53        let mut next_level = Vec::with_capacity((hashes.len() + 1) / 2);
54
55        for chunk in hashes.chunks(2) {
56            let mut hasher = blake3::Hasher::new();
57            hasher.update(&chunk[0]);
58            if chunk.len() > 1 {
59                hasher.update(&chunk[1]);
60            } else {
61                // Duplicate last hash if odd number
62                hasher.update(&chunk[0]);
63            }
64            next_level.push(*hasher.finalize().as_bytes());
65        }
66
67        self.compute_root(&next_level)
68    }
69
70    /// Get number of leaves
71    pub fn len(&self) -> usize {
72        self.leaves.len()
73    }
74
75    /// Check if tree is empty
76    pub fn is_empty(&self) -> bool {
77        self.leaves.is_empty()
78    }
79
80    /// Clear all leaves
81    pub fn clear(&mut self) {
82        self.leaves.clear();
83        self.root = None;
84    }
85}
86
87impl Default for MerkleTree {
88    fn default() -> Self {
89        Self::new()
90    }
91}
92
93/// Compute a Merkle root from a set of items
94#[allow(dead_code)]
95pub fn compute_merkle_root<T, F>(items: &[T], hash_fn: F) -> Result<[u8; 32]>
96where
97    F: Fn(&T) -> [u8; 32],
98{
99    let mut tree = MerkleTree::new();
100    for item in items {
101        let hash = hash_fn(item);
102        tree.leaves.push(hash);
103    }
104    Ok(tree.root())
105}
106
107#[cfg(test)]
108mod tests {
109    use super::*;
110    use cp_core::{Chunk, CognitiveDiff, Document, Edge, EdgeKind, Embedding, Hlc};
111    use uuid::Uuid;
112
113    // Helper to create a simple embedding from f32 vector
114    fn create_test_embedding(chunk_id: Uuid) -> Embedding {
115        // Create a simple f32 vector for testing
116        let vector: Vec<f32> = vec![0.0; 384];
117        Embedding::new(chunk_id, &vector, [0u8; 32], 0)
118    }
119
120    #[test]
121    fn test_empty_tree() {
122        let mut tree = MerkleTree::new();
123        assert_eq!(tree.root(), [0u8; 32]);
124    }
125
126    #[test]
127    fn test_single_leaf() {
128        let mut tree = MerkleTree::new();
129        tree.add_leaf(b"hello");
130
131        let expected = blake3::hash(b"hello");
132        assert_eq!(tree.root(), *expected.as_bytes());
133    }
134
135    #[test]
136    fn test_two_leaves() {
137        let mut tree = MerkleTree::new();
138        tree.add_leaf(b"hello");
139        tree.add_leaf(b"world");
140
141        let root = tree.root();
142        assert_ne!(root, [0u8; 32]);
143    }
144
145    #[test]
146    fn test_deterministic() {
147        let mut tree1 = MerkleTree::new();
148        tree1.add_leaf(b"a");
149        tree1.add_leaf(b"b");
150        tree1.add_leaf(b"c");
151
152        let mut tree2 = MerkleTree::new();
153        tree2.add_leaf(b"a");
154        tree2.add_leaf(b"b");
155        tree2.add_leaf(b"c");
156
157        assert_eq!(tree1.root(), tree2.root());
158    }
159
160    // Additional comprehensive tests for MerkleTree
161
162    #[test]
163    fn test_merkle_three_leaves() {
164        let mut tree = MerkleTree::new();
165        tree.add_leaf(b"first");
166        tree.add_leaf(b"second");
167        tree.add_leaf(b"third");
168
169        let root = tree.root();
170        assert_ne!(root, [0u8; 32]);
171        assert_eq!(tree.len(), 3);
172    }
173
174    #[test]
175    fn test_merkle_four_leaves() {
176        let mut tree = MerkleTree::new();
177        tree.add_leaf(b"one");
178        tree.add_leaf(b"two");
179        tree.add_leaf(b"three");
180        tree.add_leaf(b"four");
181
182        let root = tree.root();
183        assert_ne!(root, [0u8; 32]);
184        assert_eq!(tree.len(), 4);
185    }
186
187    #[test]
188    fn test_merkle_many_leaves() {
189        let mut tree = MerkleTree::new();
190        for i in 0..100 {
191            tree.add_leaf(format!("item{}", i).as_bytes());
192        }
193
194        let root = tree.root();
195        assert_ne!(root, [0u8; 32]);
196        assert_eq!(tree.len(), 100);
197        assert!(!tree.is_empty());
198    }
199
200    #[test]
201    fn test_merkle_clear() {
202        let mut tree = MerkleTree::new();
203        tree.add_leaf(b"test");
204        assert_eq!(tree.len(), 1);
205
206        tree.clear();
207        assert_eq!(tree.len(), 0);
208        assert!(tree.is_empty());
209        assert_eq!(tree.root(), [0u8; 32]);
210    }
211
212    #[test]
213    fn test_merkle_root_caching() {
214        let mut tree = MerkleTree::new();
215        tree.add_leaf(b"data");
216
217        let root1 = tree.root();
218        let root2 = tree.root();
219
220        // Should return cached root
221        assert_eq!(root1, root2);
222    }
223
224    #[test]
225    fn test_merkle_root_invalidated_on_new_leaf() {
226        let mut tree = MerkleTree::new();
227        tree.add_leaf(b"first");
228
229        let root1 = tree.root();
230
231        // Adding new leaf should invalidate cache
232        tree.add_leaf(b"second");
233
234        let root2 = tree.root();
235
236        assert_ne!(root1, root2);
237    }
238
239    #[test]
240    fn test_merkle_different_data_different_root() {
241        let mut tree1 = MerkleTree::new();
242        tree1.add_leaf(b"hello");
243        tree1.add_leaf(b"world");
244
245        let mut tree2 = MerkleTree::new();
246        tree2.add_leaf(b"foo");
247        tree2.add_leaf(b"bar");
248
249        assert_ne!(tree1.root(), tree2.root());
250    }
251
252    #[test]
253    fn test_merkle_order_matters() {
254        let mut tree1 = MerkleTree::new();
255        tree1.add_leaf(b"a");
256        tree1.add_leaf(b"b");
257
258        let mut tree2 = MerkleTree::new();
259        tree2.add_leaf(b"b");
260        tree2.add_leaf(b"a");
261
262        // Different order should produce different root
263        assert_ne!(tree1.root(), tree2.root());
264    }
265
266    #[test]
267    fn test_compute_merkle_root_function() {
268        let items = vec![
269            [1u8; 32],
270            [2u8; 32],
271            [3u8; 32],
272        ];
273
274        let root = compute_merkle_root(&items, |item| *item).unwrap();
275        assert_ne!(root, [0u8; 32]);
276    }
277
278    #[test]
279    fn test_compute_merkle_root_empty() {
280        let items: Vec<[u8; 32]> = vec![];
281        let root = compute_merkle_root(&items, |item| *item).unwrap();
282        assert_eq!(root, [0u8; 32]);
283    }
284
285    #[test]
286    fn test_compute_merkle_root_single_item() {
287        let items = vec![[42u8; 32]];
288        let root = compute_merkle_root(&items, |item| *item).unwrap();
289        assert_eq!(root, [42u8; 32]);
290    }
291
292    // Tests for diff-related functionality using CognitiveDiff
293
294    #[test]
295    fn test_merkle_diff_empty() {
296        // Test that an empty diff produces expected results
297        let diff = CognitiveDiff::empty(
298            [0u8; 32],
299            Uuid::nil(),
300            0,
301            Hlc::new(0, [0u8; 16]),
302        );
303
304        assert!(diff.is_empty());
305        assert_eq!(diff.change_count(), 0);
306    }
307
308    #[test]
309    fn test_merkle_diff_document_added() {
310        let mut diff = CognitiveDiff::empty(
311            [0u8; 32],
312            Uuid::nil(),
313            0,
314            Hlc::new(0, [0u8; 16]),
315        );
316
317        let doc = Document::new(
318            std::path::PathBuf::from("test.md"),
319            b"test content",
320            0,
321        );
322        diff.added_docs.push(doc);
323
324        assert!(!diff.is_empty());
325        assert_eq!(diff.change_count(), 1);
326        assert_eq!(diff.added_docs.len(), 1);
327    }
328
329    #[test]
330    fn test_merkle_diff_document_modified() {
331        // Document modification is modeled as remove + add
332        let mut diff = CognitiveDiff::empty(
333            [0u8; 32],
334            Uuid::nil(),
335            0,
336            Hlc::new(0, [0u8; 16]),
337        );
338
339        let old_doc_id = Uuid::new_v4();
340        diff.removed_doc_ids.push(old_doc_id);
341
342        let new_doc = Document::new(
343            std::path::PathBuf::from("test.md"),
344            b"updated content",
345            0,
346        );
347        diff.added_docs.push(new_doc);
348
349        assert!(!diff.is_empty());
350        assert_eq!(diff.change_count(), 2);
351    }
352
353    #[test]
354    fn test_merkle_diff_document_deleted() {
355        let mut diff = CognitiveDiff::empty(
356            [0u8; 32],
357            Uuid::nil(),
358            0,
359            Hlc::new(0, [0u8; 16]),
360        );
361
362        let doc_id = Uuid::new_v4();
363        diff.removed_doc_ids.push(doc_id);
364
365        assert!(!diff.is_empty());
366        assert_eq!(diff.change_count(), 1);
367        assert_eq!(diff.removed_doc_ids.len(), 1);
368    }
369
370    #[test]
371    fn test_merkle_diff_chunk_changes() {
372        let mut diff = CognitiveDiff::empty(
373            [0u8; 32],
374            Uuid::nil(),
375            0,
376            Hlc::new(0, [0u8; 16]),
377        );
378
379        let doc_id = Uuid::new_v4();
380        let chunk = Chunk::new(
381            doc_id,
382            "test chunk content".to_string(),
383            0,
384            0,
385        );
386        diff.added_chunks.push(chunk);
387
388        let removed_chunk_id = Uuid::new_v4();
389        diff.removed_chunk_ids.push(removed_chunk_id);
390
391        assert_eq!(diff.change_count(), 2);
392        assert_eq!(diff.added_chunks.len(), 1);
393        assert_eq!(diff.removed_chunk_ids.len(), 1);
394    }
395
396    #[test]
397    fn test_merkle_diff_embedding_changes() {
398        let mut diff = CognitiveDiff::empty(
399            [0u8; 32],
400            Uuid::nil(),
401            0,
402            Hlc::new(0, [0u8; 16]),
403        );
404
405        let chunk_id = Uuid::new_v4();
406        let embedding = create_test_embedding(chunk_id);
407        diff.added_embeddings.push(embedding);
408
409        let removed_embedding_id = Uuid::new_v4();
410        diff.removed_embedding_ids.push(removed_embedding_id);
411
412        assert_eq!(diff.change_count(), 2);
413    }
414
415    #[test]
416    fn test_merkle_diff_edge_changes() {
417        let mut diff = CognitiveDiff::empty(
418            [0u8; 32],
419            Uuid::nil(),
420            0,
421            Hlc::new(0, [0u8; 16]),
422        );
423
424        let edge = Edge::new(
425            Uuid::new_v4(),
426            Uuid::new_v4(),
427            EdgeKind::DocToChunk,
428        );
429        diff.added_edges.push(edge);
430
431        let removed_edge = (
432            Uuid::new_v4(),
433            Uuid::new_v4(),
434            EdgeKind::ChunkToEmbedding,
435        );
436        diff.removed_edges.push(removed_edge);
437
438        assert_eq!(diff.change_count(), 2);
439    }
440
441    #[test]
442    fn test_merkle_diff_multiple_changes() {
443        let mut diff = CognitiveDiff::empty(
444            [0u8; 32],
445            Uuid::nil(),
446            0,
447            Hlc::new(0, [0u8; 16]),
448        );
449
450        // Add document
451        let doc = Document::new(
452            std::path::PathBuf::from("test.md"),
453            b"content",
454            0,
455        );
456        diff.added_docs.push(doc);
457
458        // Add chunk
459        let chunk = Chunk::new(
460            Uuid::new_v4(),
461            "content".to_string(),
462            0,
463            0,
464        );
465        diff.added_chunks.push(chunk);
466
467        // Add embedding
468        let embedding = create_test_embedding(Uuid::new_v4());
469        diff.added_embeddings.push(embedding);
470
471        // Add edge
472        let edge = Edge::new(
473            Uuid::new_v4(),
474            Uuid::new_v4(),
475            EdgeKind::DocToChunk,
476        );
477        diff.added_edges.push(edge);
478
479        assert_eq!(diff.change_count(), 4);
480    }
481
482    #[test]
483    fn test_merkle_diff_serialization() {
484        use crate::{serialize_diff, deserialize_diff};
485
486        let diff = CognitiveDiff::empty(
487            [0u8; 32],
488            Uuid::nil(),
489            0,
490            Hlc::new(0, [0u8; 16]),
491        );
492
493        let serialized = serialize_diff(&diff).unwrap();
494        assert!(!serialized.is_empty());
495
496        let deserialized = deserialize_diff(&serialized).unwrap();
497        assert_eq!(diff.metadata, deserialized.metadata);
498    }
499
500    #[test]
501    fn test_merkle_diff_serialization_with_content() {
502        use crate::{serialize_diff, deserialize_diff};
503
504        let mut diff = CognitiveDiff::empty(
505            [0u8; 32],
506            Uuid::nil(),
507            0,
508            Hlc::new(0, [0u8; 16]),
509        );
510
511        let doc = Document::new(
512            std::path::PathBuf::from("test.md"),
513            b"Test content",
514            0,
515        );
516        diff.added_docs.push(doc);
517
518        let serialized = serialize_diff(&diff).unwrap();
519        let deserialized = deserialize_diff(&serialized).unwrap();
520
521        assert_eq!(diff.added_docs.len(), deserialized.added_docs.len());
522        assert_eq!(diff.added_docs[0].path, deserialized.added_docs[0].path);
523    }
524
525    #[test]
526    fn test_merkle_diff_estimated_size() {
527        let diff = CognitiveDiff::empty(
528            [0u8; 32],
529            Uuid::nil(),
530            0,
531            Hlc::new(0, [0u8; 16]),
532        );
533
534        let size = diff.estimated_size();
535        assert!(size > 0);
536    }
537
538    #[test]
539    fn test_merkle_diff_apply_order() {
540        // Tests that changes are tracked in the correct order
541        let mut diff = CognitiveDiff::empty(
542            [0u8; 32],
543            Uuid::nil(),
544            0,
545            Hlc::new(0, [0u8; 16]),
546        );
547
548        // Add in specific order
549        for i in 0..5 {
550            let doc = Document::new(
551                std::path::PathBuf::from(format!("doc{}.md", i)),
552                format!("content{}", i).as_bytes(),
553                0,
554            );
555            diff.added_docs.push(doc);
556        }
557
558        assert_eq!(diff.added_docs.len(), 5);
559        // Verify order is preserved
560        for (i, doc) in diff.added_docs.iter().enumerate() {
561            assert!(doc.path.to_string_lossy().contains(&format!("{}", i)));
562        }
563    }
564
565    #[test]
566    fn test_merkle_diff_idempotent() {
567        // Test that adding the same item twice creates a diff that would
568        // contain both additions (idempotency is handled at application time)
569        let mut diff = CognitiveDiff::empty(
570            [0u8; 32],
571            Uuid::nil(),
572            0,
573            Hlc::new(0, [0u8; 16]),
574        );
575
576        let doc = Document::new(
577            std::path::PathBuf::from("test.md"),
578            b"content",
579            0,
580        );
581
582        // Add same document type twice (in real use, this would be deduplicated at apply time)
583        diff.added_docs.push(doc.clone());
584        diff.added_docs.push(doc);
585
586        assert_eq!(diff.change_count(), 2);
587    }
588
589    #[test]
590    fn test_merkle_comparison_using_blake3() {
591        // Test using Merkle tree to compare two states
592        let mut old_tree = MerkleTree::new();
593        old_tree.add_leaf(b"document1");
594        old_tree.add_leaf(b"document2");
595
596        let mut new_tree = MerkleTree::new();
597        new_tree.add_leaf(b"document1");
598        new_tree.add_leaf(b"document2");
599        new_tree.add_leaf(b"document3"); // New document
600
601        let old_root = old_tree.root();
602        let new_root = new_tree.root();
603
604        // Roots should be different because states are different
605        assert_ne!(old_root, new_root);
606    }
607
608    #[test]
609    fn test_merkle_prove_change_detection() {
610        // Test that Merkle tree can detect changes
611        let mut tree1 = MerkleTree::new();
612        tree1.add_leaf(b"a");
613        tree1.add_leaf(b"b");
614
615        let mut tree2 = MerkleTree::new();
616        tree2.add_leaf(b"a");
617        tree2.add_leaf(b"c"); // Changed from b to c
618
619        assert_ne!(tree1.root(), tree2.root());
620    }
621}