1use cp_core::Result;
4
5pub struct MerkleTree {
7 root: Option<[u8; 32]>,
9 leaves: Vec<[u8; 32]>,
11}
12
13impl MerkleTree {
14 pub fn new() -> Self {
16 Self {
17 root: None,
18 leaves: Vec::new(),
19 }
20 }
21
22 pub fn add_leaf(&mut self, data: &[u8]) {
24 let hash = blake3::hash(data);
25 self.leaves.push(*hash.as_bytes());
26 self.root = None; }
28
29 pub fn root(&mut self) -> [u8; 32] {
31 if let Some(root) = self.root {
32 return root;
33 }
34
35 if self.leaves.is_empty() {
36 return [0u8; 32];
37 }
38
39 let root = self.compute_root(&self.leaves);
40 self.root = Some(root);
41 root
42 }
43
44 fn compute_root(&self, hashes: &[[u8; 32]]) -> [u8; 32] {
46 if hashes.is_empty() {
47 return [0u8; 32];
48 }
49 if hashes.len() == 1 {
50 return hashes[0];
51 }
52
53 let mut next_level = Vec::with_capacity((hashes.len() + 1) / 2);
54
55 for chunk in hashes.chunks(2) {
56 let mut hasher = blake3::Hasher::new();
57 hasher.update(&chunk[0]);
58 if chunk.len() > 1 {
59 hasher.update(&chunk[1]);
60 } else {
61 hasher.update(&chunk[0]);
63 }
64 next_level.push(*hasher.finalize().as_bytes());
65 }
66
67 self.compute_root(&next_level)
68 }
69
70 pub fn len(&self) -> usize {
72 self.leaves.len()
73 }
74
75 pub fn is_empty(&self) -> bool {
77 self.leaves.is_empty()
78 }
79
80 pub fn clear(&mut self) {
82 self.leaves.clear();
83 self.root = None;
84 }
85
86 pub fn generate_proof(&mut self, leaf_index: usize) -> Option<Vec<([u8; 32], bool)>> {
92 if leaf_index >= self.leaves.len() {
93 return None;
94 }
95
96 let _ = self.root();
98
99 let mut proof = Vec::new();
100 let mut level = self.leaves.clone();
101 let mut index = leaf_index;
102
103 while level.len() > 1 {
104 let sibling_index = if index % 2 == 0 {
105 if index + 1 < level.len() {
107 index + 1
108 } else {
109 index }
111 } else {
112 index - 1
114 };
115
116 let is_left = sibling_index < index;
117 proof.push((level[sibling_index], is_left));
118
119 let mut next_level = Vec::with_capacity((level.len() + 1) / 2);
121 for chunk in level.chunks(2) {
122 let mut hasher = blake3::Hasher::new();
123 hasher.update(&chunk[0]);
124 if chunk.len() > 1 {
125 hasher.update(&chunk[1]);
126 } else {
127 hasher.update(&chunk[0]);
128 }
129 next_level.push(*hasher.finalize().as_bytes());
130 }
131
132 index /= 2;
133 level = next_level;
134 }
135
136 Some(proof)
137 }
138
139 pub fn verify_proof(
143 leaf_hash: &[u8; 32],
144 proof: &[([u8; 32], bool)],
145 expected_root: &[u8; 32],
146 ) -> bool {
147 let mut current = *leaf_hash;
148
149 for (sibling, is_left) in proof {
150 let mut hasher = blake3::Hasher::new();
151 if *is_left {
152 hasher.update(sibling);
153 hasher.update(¤t);
154 } else {
155 hasher.update(¤t);
156 hasher.update(sibling);
157 }
158 current = *hasher.finalize().as_bytes();
159 }
160
161 current == *expected_root
162 }
163}
164
165impl Default for MerkleTree {
166 fn default() -> Self {
167 Self::new()
168 }
169}
170
171#[allow(dead_code)]
173pub fn compute_merkle_root<T, F>(items: &[T], hash_fn: F) -> Result<[u8; 32]>
174where
175 F: Fn(&T) -> [u8; 32],
176{
177 let mut tree = MerkleTree::new();
178 for item in items {
179 let hash = hash_fn(item);
180 tree.leaves.push(hash);
181 }
182 Ok(tree.root())
183}
184
185#[cfg(test)]
186mod tests {
187 use super::*;
188 use cp_core::{Chunk, CognitiveDiff, Document, Edge, EdgeKind, Embedding, Hlc};
189 use uuid::Uuid;
190
191 fn create_test_embedding(chunk_id: Uuid) -> Embedding {
193 let vector: Vec<f32> = vec![0.0; 1536];
195 Embedding::new(chunk_id, &vector, [0u8; 32], 0)
196 }
197
198 #[test]
199 fn test_empty_tree() {
200 let mut tree = MerkleTree::new();
201 assert_eq!(tree.root(), [0u8; 32]);
202 }
203
204 #[test]
205 fn test_single_leaf() {
206 let mut tree = MerkleTree::new();
207 tree.add_leaf(b"hello");
208
209 let expected = blake3::hash(b"hello");
210 assert_eq!(tree.root(), *expected.as_bytes());
211 }
212
213 #[test]
214 fn test_two_leaves() {
215 let mut tree = MerkleTree::new();
216 tree.add_leaf(b"hello");
217 tree.add_leaf(b"world");
218
219 let root = tree.root();
220 assert_ne!(root, [0u8; 32]);
221 }
222
223 #[test]
224 fn test_deterministic() {
225 let mut tree1 = MerkleTree::new();
226 tree1.add_leaf(b"a");
227 tree1.add_leaf(b"b");
228 tree1.add_leaf(b"c");
229
230 let mut tree2 = MerkleTree::new();
231 tree2.add_leaf(b"a");
232 tree2.add_leaf(b"b");
233 tree2.add_leaf(b"c");
234
235 assert_eq!(tree1.root(), tree2.root());
236 }
237
238 #[test]
241 fn test_merkle_three_leaves() {
242 let mut tree = MerkleTree::new();
243 tree.add_leaf(b"first");
244 tree.add_leaf(b"second");
245 tree.add_leaf(b"third");
246
247 let root = tree.root();
248 assert_ne!(root, [0u8; 32]);
249 assert_eq!(tree.len(), 3);
250 }
251
252 #[test]
253 fn test_merkle_four_leaves() {
254 let mut tree = MerkleTree::new();
255 tree.add_leaf(b"one");
256 tree.add_leaf(b"two");
257 tree.add_leaf(b"three");
258 tree.add_leaf(b"four");
259
260 let root = tree.root();
261 assert_ne!(root, [0u8; 32]);
262 assert_eq!(tree.len(), 4);
263 }
264
265 #[test]
266 fn test_merkle_many_leaves() {
267 let mut tree = MerkleTree::new();
268 for i in 0..100 {
269 tree.add_leaf(format!("item{}", i).as_bytes());
270 }
271
272 let root = tree.root();
273 assert_ne!(root, [0u8; 32]);
274 assert_eq!(tree.len(), 100);
275 assert!(!tree.is_empty());
276 }
277
278 #[test]
279 fn test_merkle_clear() {
280 let mut tree = MerkleTree::new();
281 tree.add_leaf(b"test");
282 assert_eq!(tree.len(), 1);
283
284 tree.clear();
285 assert_eq!(tree.len(), 0);
286 assert!(tree.is_empty());
287 assert_eq!(tree.root(), [0u8; 32]);
288 }
289
290 #[test]
291 fn test_merkle_root_caching() {
292 let mut tree = MerkleTree::new();
293 tree.add_leaf(b"data");
294
295 let root1 = tree.root();
296 let root2 = tree.root();
297
298 assert_eq!(root1, root2);
300 }
301
302 #[test]
303 fn test_merkle_root_invalidated_on_new_leaf() {
304 let mut tree = MerkleTree::new();
305 tree.add_leaf(b"first");
306
307 let root1 = tree.root();
308
309 tree.add_leaf(b"second");
311
312 let root2 = tree.root();
313
314 assert_ne!(root1, root2);
315 }
316
317 #[test]
318 fn test_merkle_different_data_different_root() {
319 let mut tree1 = MerkleTree::new();
320 tree1.add_leaf(b"hello");
321 tree1.add_leaf(b"world");
322
323 let mut tree2 = MerkleTree::new();
324 tree2.add_leaf(b"foo");
325 tree2.add_leaf(b"bar");
326
327 assert_ne!(tree1.root(), tree2.root());
328 }
329
330 #[test]
331 fn test_merkle_order_matters() {
332 let mut tree1 = MerkleTree::new();
333 tree1.add_leaf(b"a");
334 tree1.add_leaf(b"b");
335
336 let mut tree2 = MerkleTree::new();
337 tree2.add_leaf(b"b");
338 tree2.add_leaf(b"a");
339
340 assert_ne!(tree1.root(), tree2.root());
342 }
343
344 #[test]
345 fn test_compute_merkle_root_function() {
346 let items = vec![
347 [1u8; 32],
348 [2u8; 32],
349 [3u8; 32],
350 ];
351
352 let root = compute_merkle_root(&items, |item| *item).unwrap();
353 assert_ne!(root, [0u8; 32]);
354 }
355
356 #[test]
357 fn test_compute_merkle_root_empty() {
358 let items: Vec<[u8; 32]> = vec![];
359 let root = compute_merkle_root(&items, |item| *item).unwrap();
360 assert_eq!(root, [0u8; 32]);
361 }
362
363 #[test]
364 fn test_compute_merkle_root_single_item() {
365 let items = vec![[42u8; 32]];
366 let root = compute_merkle_root(&items, |item| *item).unwrap();
367 assert_eq!(root, [42u8; 32]);
368 }
369
370 #[test]
373 fn test_merkle_diff_empty() {
374 let diff = CognitiveDiff::empty(
376 [0u8; 32],
377 Uuid::nil(),
378 0,
379 Hlc::new(0, [0u8; 16]),
380 );
381
382 assert!(diff.is_empty());
383 assert_eq!(diff.change_count(), 0);
384 }
385
386 #[test]
387 fn test_merkle_diff_document_added() {
388 let mut diff = CognitiveDiff::empty(
389 [0u8; 32],
390 Uuid::nil(),
391 0,
392 Hlc::new(0, [0u8; 16]),
393 );
394
395 let doc = Document::new(
396 std::path::PathBuf::from("test.md"),
397 b"test content",
398 0,
399 );
400 diff.added_docs.push(doc);
401
402 assert!(!diff.is_empty());
403 assert_eq!(diff.change_count(), 1);
404 assert_eq!(diff.added_docs.len(), 1);
405 }
406
407 #[test]
408 fn test_merkle_diff_document_modified() {
409 let mut diff = CognitiveDiff::empty(
411 [0u8; 32],
412 Uuid::nil(),
413 0,
414 Hlc::new(0, [0u8; 16]),
415 );
416
417 let old_doc_id = Uuid::new_v4();
418 diff.removed_doc_ids.push(old_doc_id);
419
420 let new_doc = Document::new(
421 std::path::PathBuf::from("test.md"),
422 b"updated content",
423 0,
424 );
425 diff.added_docs.push(new_doc);
426
427 assert!(!diff.is_empty());
428 assert_eq!(diff.change_count(), 2);
429 }
430
431 #[test]
432 fn test_merkle_diff_document_deleted() {
433 let mut diff = CognitiveDiff::empty(
434 [0u8; 32],
435 Uuid::nil(),
436 0,
437 Hlc::new(0, [0u8; 16]),
438 );
439
440 let doc_id = Uuid::new_v4();
441 diff.removed_doc_ids.push(doc_id);
442
443 assert!(!diff.is_empty());
444 assert_eq!(diff.change_count(), 1);
445 assert_eq!(diff.removed_doc_ids.len(), 1);
446 }
447
448 #[test]
449 fn test_merkle_diff_chunk_changes() {
450 let mut diff = CognitiveDiff::empty(
451 [0u8; 32],
452 Uuid::nil(),
453 0,
454 Hlc::new(0, [0u8; 16]),
455 );
456
457 let doc_id = Uuid::new_v4();
458 let chunk = Chunk::new(
459 doc_id,
460 "test chunk content".to_string(),
461 0,
462 0,
463 );
464 diff.added_chunks.push(chunk);
465
466 let removed_chunk_id = Uuid::new_v4();
467 diff.removed_chunk_ids.push(removed_chunk_id);
468
469 assert_eq!(diff.change_count(), 2);
470 assert_eq!(diff.added_chunks.len(), 1);
471 assert_eq!(diff.removed_chunk_ids.len(), 1);
472 }
473
474 #[test]
475 fn test_merkle_diff_embedding_changes() {
476 let mut diff = CognitiveDiff::empty(
477 [0u8; 32],
478 Uuid::nil(),
479 0,
480 Hlc::new(0, [0u8; 16]),
481 );
482
483 let chunk_id = Uuid::new_v4();
484 let embedding = create_test_embedding(chunk_id);
485 diff.added_embeddings.push(embedding);
486
487 let removed_embedding_id = Uuid::new_v4();
488 diff.removed_embedding_ids.push(removed_embedding_id);
489
490 assert_eq!(diff.change_count(), 2);
491 }
492
493 #[test]
494 fn test_merkle_diff_edge_changes() {
495 let mut diff = CognitiveDiff::empty(
496 [0u8; 32],
497 Uuid::nil(),
498 0,
499 Hlc::new(0, [0u8; 16]),
500 );
501
502 let edge = Edge::new(
503 Uuid::new_v4(),
504 Uuid::new_v4(),
505 EdgeKind::DocToChunk,
506 );
507 diff.added_edges.push(edge);
508
509 let removed_edge = (
510 Uuid::new_v4(),
511 Uuid::new_v4(),
512 EdgeKind::ChunkToEmbedding,
513 );
514 diff.removed_edges.push(removed_edge);
515
516 assert_eq!(diff.change_count(), 2);
517 }
518
519 #[test]
520 fn test_merkle_diff_multiple_changes() {
521 let mut diff = CognitiveDiff::empty(
522 [0u8; 32],
523 Uuid::nil(),
524 0,
525 Hlc::new(0, [0u8; 16]),
526 );
527
528 let doc = Document::new(
530 std::path::PathBuf::from("test.md"),
531 b"content",
532 0,
533 );
534 diff.added_docs.push(doc);
535
536 let chunk = Chunk::new(
538 Uuid::new_v4(),
539 "content".to_string(),
540 0,
541 0,
542 );
543 diff.added_chunks.push(chunk);
544
545 let embedding = create_test_embedding(Uuid::new_v4());
547 diff.added_embeddings.push(embedding);
548
549 let edge = Edge::new(
551 Uuid::new_v4(),
552 Uuid::new_v4(),
553 EdgeKind::DocToChunk,
554 );
555 diff.added_edges.push(edge);
556
557 assert_eq!(diff.change_count(), 4);
558 }
559
560 #[test]
561 fn test_merkle_diff_serialization() {
562 use crate::{serialize_diff, deserialize_diff};
563
564 let diff = CognitiveDiff::empty(
565 [0u8; 32],
566 Uuid::nil(),
567 0,
568 Hlc::new(0, [0u8; 16]),
569 );
570
571 let serialized = serialize_diff(&diff).unwrap();
572 assert!(!serialized.is_empty());
573
574 let deserialized = deserialize_diff(&serialized).unwrap();
575 assert_eq!(diff.metadata, deserialized.metadata);
576 }
577
578 #[test]
579 fn test_merkle_diff_serialization_with_content() {
580 use crate::{serialize_diff, deserialize_diff};
581
582 let mut diff = CognitiveDiff::empty(
583 [0u8; 32],
584 Uuid::nil(),
585 0,
586 Hlc::new(0, [0u8; 16]),
587 );
588
589 let doc = Document::new(
590 std::path::PathBuf::from("test.md"),
591 b"Test content",
592 0,
593 );
594 diff.added_docs.push(doc);
595
596 let serialized = serialize_diff(&diff).unwrap();
597 let deserialized = deserialize_diff(&serialized).unwrap();
598
599 assert_eq!(diff.added_docs.len(), deserialized.added_docs.len());
600 assert_eq!(diff.added_docs[0].path, deserialized.added_docs[0].path);
601 }
602
603 #[test]
604 fn test_merkle_diff_estimated_size() {
605 let diff = CognitiveDiff::empty(
606 [0u8; 32],
607 Uuid::nil(),
608 0,
609 Hlc::new(0, [0u8; 16]),
610 );
611
612 let size = diff.estimated_size();
613 assert!(size > 0);
614 }
615
616 #[test]
617 fn test_merkle_diff_apply_order() {
618 let mut diff = CognitiveDiff::empty(
620 [0u8; 32],
621 Uuid::nil(),
622 0,
623 Hlc::new(0, [0u8; 16]),
624 );
625
626 for i in 0..5 {
628 let doc = Document::new(
629 std::path::PathBuf::from(format!("doc{}.md", i)),
630 format!("content{}", i).as_bytes(),
631 0,
632 );
633 diff.added_docs.push(doc);
634 }
635
636 assert_eq!(diff.added_docs.len(), 5);
637 for (i, doc) in diff.added_docs.iter().enumerate() {
639 assert!(doc.path.to_string_lossy().contains(&format!("{}", i)));
640 }
641 }
642
643 #[test]
644 fn test_merkle_diff_idempotent() {
645 let mut diff = CognitiveDiff::empty(
648 [0u8; 32],
649 Uuid::nil(),
650 0,
651 Hlc::new(0, [0u8; 16]),
652 );
653
654 let doc = Document::new(
655 std::path::PathBuf::from("test.md"),
656 b"content",
657 0,
658 );
659
660 diff.added_docs.push(doc.clone());
662 diff.added_docs.push(doc);
663
664 assert_eq!(diff.change_count(), 2);
665 }
666
667 #[test]
668 fn test_merkle_comparison_using_blake3() {
669 let mut old_tree = MerkleTree::new();
671 old_tree.add_leaf(b"document1");
672 old_tree.add_leaf(b"document2");
673
674 let mut new_tree = MerkleTree::new();
675 new_tree.add_leaf(b"document1");
676 new_tree.add_leaf(b"document2");
677 new_tree.add_leaf(b"document3"); let old_root = old_tree.root();
680 let new_root = new_tree.root();
681
682 assert_ne!(old_root, new_root);
684 }
685
686 #[test]
687 fn test_merkle_proof_single_leaf() {
688 let mut tree = MerkleTree::new();
689 tree.add_leaf(b"only");
690
691 let root = tree.root();
692 let proof = tree.generate_proof(0).unwrap();
693 assert!(proof.is_empty(), "Single leaf proof should be empty");
694
695 let leaf_hash = blake3::hash(b"only");
696 assert!(MerkleTree::verify_proof(leaf_hash.as_bytes(), &proof, &root));
697 }
698
699 #[test]
700 fn test_merkle_proof_two_leaves() {
701 let mut tree = MerkleTree::new();
702 tree.add_leaf(b"left");
703 tree.add_leaf(b"right");
704
705 let root = tree.root();
706
707 let proof0 = tree.generate_proof(0).unwrap();
709 let leaf0 = blake3::hash(b"left");
710 assert!(MerkleTree::verify_proof(leaf0.as_bytes(), &proof0, &root));
711
712 let proof1 = tree.generate_proof(1).unwrap();
714 let leaf1 = blake3::hash(b"right");
715 assert!(MerkleTree::verify_proof(leaf1.as_bytes(), &proof1, &root));
716 }
717
718 #[test]
719 fn test_merkle_proof_many_leaves() {
720 let mut tree = MerkleTree::new();
721 let data: Vec<Vec<u8>> = (0..17).map(|i| format!("leaf_{}", i).into_bytes()).collect();
722 for d in &data {
723 tree.add_leaf(d);
724 }
725
726 let root = tree.root();
727
728 for (i, d) in data.iter().enumerate() {
729 let proof = tree.generate_proof(i).unwrap();
730 let leaf_hash = blake3::hash(d);
731 assert!(
732 MerkleTree::verify_proof(leaf_hash.as_bytes(), &proof, &root),
733 "Proof failed for leaf {}",
734 i
735 );
736 }
737 }
738
739 #[test]
740 fn test_merkle_proof_wrong_leaf_fails() {
741 let mut tree = MerkleTree::new();
742 tree.add_leaf(b"a");
743 tree.add_leaf(b"b");
744 tree.add_leaf(b"c");
745
746 let root = tree.root();
747
748 let proof = tree.generate_proof(0).unwrap();
749 let wrong_leaf = blake3::hash(b"wrong");
750 assert!(!MerkleTree::verify_proof(wrong_leaf.as_bytes(), &proof, &root));
751 }
752
753 #[test]
754 fn test_merkle_proof_out_of_bounds() {
755 let mut tree = MerkleTree::new();
756 tree.add_leaf(b"a");
757 assert!(tree.generate_proof(1).is_none());
758 assert!(tree.generate_proof(100).is_none());
759 }
760
761 #[test]
762 fn test_merkle_prove_change_detection() {
763 let mut tree1 = MerkleTree::new();
765 tree1.add_leaf(b"a");
766 tree1.add_leaf(b"b");
767
768 let mut tree2 = MerkleTree::new();
769 tree2.add_leaf(b"a");
770 tree2.add_leaf(b"c"); assert_ne!(tree1.root(), tree2.root());
773 }
774}