1pub struct MerkleTree {
5 root: Option<[u8; 32]>,
7 leaves: Vec<[u8; 32]>,
9}
10
11impl MerkleTree {
12 pub fn new() -> Self {
14 Self {
15 root: None,
16 leaves: Vec::new(),
17 }
18 }
19
20 pub fn add_leaf(&mut self, data: &[u8]) {
22 let hash = blake3::hash(data);
23 self.leaves.push(*hash.as_bytes());
24 self.root = None; }
26
27 pub fn root(&mut self) -> [u8; 32] {
29 if let Some(root) = self.root {
30 return root;
31 }
32
33 if self.leaves.is_empty() {
34 return [0u8; 32];
35 }
36
37 let root = Self::compute_root(&self.leaves);
38 self.root = Some(root);
39 root
40 }
41
42 fn compute_root(hashes: &[[u8; 32]]) -> [u8; 32] {
44 if hashes.is_empty() {
45 return [0u8; 32];
46 }
47 if hashes.len() == 1 {
48 return hashes[0];
49 }
50
51 let mut next_level = Vec::with_capacity(hashes.len().div_ceil(2));
52
53 for chunk in hashes.chunks(2) {
54 let mut hasher = blake3::Hasher::new();
55 hasher.update(&chunk[0]);
56 if chunk.len() > 1 {
57 hasher.update(&chunk[1]);
58 } else {
59 hasher.update(&chunk[0]);
61 }
62 next_level.push(*hasher.finalize().as_bytes());
63 }
64
65 Self::compute_root(&next_level)
66 }
67
68 pub fn len(&self) -> usize {
70 self.leaves.len()
71 }
72
73 pub fn is_empty(&self) -> bool {
75 self.leaves.is_empty()
76 }
77
78 pub fn clear(&mut self) {
80 self.leaves.clear();
81 self.root = None;
82 }
83
84 pub fn generate_proof(&mut self, leaf_index: usize) -> Option<Vec<([u8; 32], bool)>> {
90 if leaf_index >= self.leaves.len() {
91 return None;
92 }
93
94 let _ = self.root();
96
97 let mut proof = Vec::new();
98 let mut level = self.leaves.clone();
99 let mut index = leaf_index;
100
101 while level.len() > 1 {
102 let sibling_index = if index.is_multiple_of(2) {
103 if index + 1 < level.len() {
105 index + 1
106 } else {
107 index }
109 } else {
110 index - 1
112 };
113
114 let is_left = sibling_index < index;
115 proof.push((level[sibling_index], is_left));
116
117 let mut next_level = Vec::with_capacity(level.len().div_ceil(2));
119 for chunk in level.chunks(2) {
120 let mut hasher = blake3::Hasher::new();
121 hasher.update(&chunk[0]);
122 if chunk.len() > 1 {
123 hasher.update(&chunk[1]);
124 } else {
125 hasher.update(&chunk[0]);
126 }
127 next_level.push(*hasher.finalize().as_bytes());
128 }
129
130 index /= 2;
131 level = next_level;
132 }
133
134 Some(proof)
135 }
136
137 pub fn verify_proof(
141 leaf_hash: &[u8; 32],
142 proof: &[([u8; 32], bool)],
143 expected_root: &[u8; 32],
144 ) -> bool {
145 let mut current = *leaf_hash;
146
147 for (sibling, is_left) in proof {
148 let mut hasher = blake3::Hasher::new();
149 if *is_left {
150 hasher.update(sibling);
151 hasher.update(¤t);
152 } else {
153 hasher.update(¤t);
154 hasher.update(sibling);
155 }
156 current = *hasher.finalize().as_bytes();
157 }
158
159 current == *expected_root
160 }
161}
162
163impl Default for MerkleTree {
164 fn default() -> Self {
165 Self::new()
166 }
167}
168
169#[cfg(test)]
171pub fn compute_merkle_root<T, F>(items: &[T], hash_fn: F) -> [u8; 32]
172where
173 F: Fn(&T) -> [u8; 32],
174{
175 let mut tree = MerkleTree::new();
176 for item in items {
177 let hash = hash_fn(item);
178 tree.leaves.push(hash);
179 }
180 tree.root()
181}
182
183#[cfg(test)]
184mod tests {
185 use super::*;
186 use cp_core::{Chunk, CognitiveDiff, Document, Edge, EdgeKind, Embedding, Hlc};
187 use uuid::Uuid;
188
189 fn create_test_embedding(chunk_id: Uuid) -> Embedding {
191 let vector: Vec<f32> = vec![0.0; 1536];
193 Embedding::new(chunk_id, &vector, [0u8; 32], 0)
194 }
195
196 #[test]
197 fn test_empty_tree() {
198 let mut tree = MerkleTree::new();
199 assert_eq!(tree.root(), [0u8; 32]);
200 }
201
202 #[test]
203 fn test_single_leaf() {
204 let mut tree = MerkleTree::new();
205 tree.add_leaf(b"hello");
206
207 let expected = blake3::hash(b"hello");
208 assert_eq!(tree.root(), *expected.as_bytes());
209 }
210
211 #[test]
212 fn test_two_leaves() {
213 let mut tree = MerkleTree::new();
214 tree.add_leaf(b"hello");
215 tree.add_leaf(b"world");
216
217 let root = tree.root();
218 assert_ne!(root, [0u8; 32]);
219 }
220
221 #[test]
222 fn test_deterministic() {
223 let mut tree1 = MerkleTree::new();
224 tree1.add_leaf(b"a");
225 tree1.add_leaf(b"b");
226 tree1.add_leaf(b"c");
227
228 let mut tree2 = MerkleTree::new();
229 tree2.add_leaf(b"a");
230 tree2.add_leaf(b"b");
231 tree2.add_leaf(b"c");
232
233 assert_eq!(tree1.root(), tree2.root());
234 }
235
236 #[test]
239 fn test_merkle_three_leaves() {
240 let mut tree = MerkleTree::new();
241 tree.add_leaf(b"first");
242 tree.add_leaf(b"second");
243 tree.add_leaf(b"third");
244
245 let root = tree.root();
246 assert_ne!(root, [0u8; 32]);
247 assert_eq!(tree.len(), 3);
248 }
249
250 #[test]
251 fn test_merkle_four_leaves() {
252 let mut tree = MerkleTree::new();
253 tree.add_leaf(b"one");
254 tree.add_leaf(b"two");
255 tree.add_leaf(b"three");
256 tree.add_leaf(b"four");
257
258 let root = tree.root();
259 assert_ne!(root, [0u8; 32]);
260 assert_eq!(tree.len(), 4);
261 }
262
263 #[test]
264 fn test_merkle_many_leaves() {
265 let mut tree = MerkleTree::new();
266 for i in 0..100 {
267 tree.add_leaf(format!("item{i}").as_bytes());
268 }
269
270 let root = tree.root();
271 assert_ne!(root, [0u8; 32]);
272 assert_eq!(tree.len(), 100);
273 assert!(!tree.is_empty());
274 }
275
276 #[test]
277 fn test_merkle_clear() {
278 let mut tree = MerkleTree::new();
279 tree.add_leaf(b"test");
280 assert_eq!(tree.len(), 1);
281
282 tree.clear();
283 assert_eq!(tree.len(), 0);
284 assert!(tree.is_empty());
285 assert_eq!(tree.root(), [0u8; 32]);
286 }
287
288 #[test]
289 fn test_merkle_root_caching() {
290 let mut tree = MerkleTree::new();
291 tree.add_leaf(b"data");
292
293 let root1 = tree.root();
294 let root2 = tree.root();
295
296 assert_eq!(root1, root2);
298 }
299
300 #[test]
301 fn test_merkle_root_invalidated_on_new_leaf() {
302 let mut tree = MerkleTree::new();
303 tree.add_leaf(b"first");
304
305 let root1 = tree.root();
306
307 tree.add_leaf(b"second");
309
310 let root2 = tree.root();
311
312 assert_ne!(root1, root2);
313 }
314
315 #[test]
316 fn test_merkle_different_data_different_root() {
317 let mut tree1 = MerkleTree::new();
318 tree1.add_leaf(b"hello");
319 tree1.add_leaf(b"world");
320
321 let mut tree2 = MerkleTree::new();
322 tree2.add_leaf(b"foo");
323 tree2.add_leaf(b"bar");
324
325 assert_ne!(tree1.root(), tree2.root());
326 }
327
328 #[test]
329 fn test_merkle_order_matters() {
330 let mut tree1 = MerkleTree::new();
331 tree1.add_leaf(b"a");
332 tree1.add_leaf(b"b");
333
334 let mut tree2 = MerkleTree::new();
335 tree2.add_leaf(b"b");
336 tree2.add_leaf(b"a");
337
338 assert_ne!(tree1.root(), tree2.root());
340 }
341
342 #[test]
343 fn test_compute_merkle_root_function() {
344 let items = vec![[1u8; 32], [2u8; 32], [3u8; 32]];
345
346 let root = compute_merkle_root(&items, |item| *item);
347 assert_ne!(root, [0u8; 32]);
348 }
349
350 #[test]
351 fn test_compute_merkle_root_empty() {
352 let items: Vec<[u8; 32]> = vec![];
353 let root = compute_merkle_root(&items, |item| *item);
354 assert_eq!(root, [0u8; 32]);
355 }
356
357 #[test]
358 fn test_compute_merkle_root_single_item() {
359 let items = vec![[42u8; 32]];
360 let root = compute_merkle_root(&items, |item| *item);
361 assert_eq!(root, [42u8; 32]);
362 }
363
364 #[test]
367 fn test_merkle_diff_empty() {
368 let diff = CognitiveDiff::empty([0u8; 32], Uuid::nil(), 0, Hlc::new(0, [0u8; 16]));
370
371 assert!(diff.is_empty());
372 assert_eq!(diff.change_count(), 0);
373 }
374
375 #[test]
376 fn test_merkle_diff_document_added() {
377 let mut diff = CognitiveDiff::empty([0u8; 32], Uuid::nil(), 0, Hlc::new(0, [0u8; 16]));
378
379 let doc = Document::new(std::path::PathBuf::from("test.md"), b"test content", 0);
380 diff.added_docs.push(doc);
381
382 assert!(!diff.is_empty());
383 assert_eq!(diff.change_count(), 1);
384 assert_eq!(diff.added_docs.len(), 1);
385 }
386
387 #[test]
388 fn test_merkle_diff_document_modified() {
389 let mut diff = CognitiveDiff::empty([0u8; 32], Uuid::nil(), 0, Hlc::new(0, [0u8; 16]));
391
392 let old_doc_id = Uuid::new_v4();
393 diff.removed_doc_ids.push(old_doc_id);
394
395 let new_doc = Document::new(std::path::PathBuf::from("test.md"), b"updated content", 0);
396 diff.added_docs.push(new_doc);
397
398 assert!(!diff.is_empty());
399 assert_eq!(diff.change_count(), 2);
400 }
401
402 #[test]
403 fn test_merkle_diff_document_deleted() {
404 let mut diff = CognitiveDiff::empty([0u8; 32], Uuid::nil(), 0, Hlc::new(0, [0u8; 16]));
405
406 let doc_id = Uuid::new_v4();
407 diff.removed_doc_ids.push(doc_id);
408
409 assert!(!diff.is_empty());
410 assert_eq!(diff.change_count(), 1);
411 assert_eq!(diff.removed_doc_ids.len(), 1);
412 }
413
414 #[test]
415 fn test_merkle_diff_chunk_changes() {
416 let mut diff = CognitiveDiff::empty([0u8; 32], Uuid::nil(), 0, Hlc::new(0, [0u8; 16]));
417
418 let doc_id = Uuid::new_v4();
419 let chunk = Chunk::new(doc_id, "test chunk content", 0, 0);
420 diff.added_chunks.push(chunk);
421
422 let removed_chunk_id = Uuid::new_v4();
423 diff.removed_chunk_ids.push(removed_chunk_id);
424
425 assert_eq!(diff.change_count(), 2);
426 assert_eq!(diff.added_chunks.len(), 1);
427 assert_eq!(diff.removed_chunk_ids.len(), 1);
428 }
429
430 #[test]
431 fn test_merkle_diff_embedding_changes() {
432 let mut diff = CognitiveDiff::empty([0u8; 32], Uuid::nil(), 0, Hlc::new(0, [0u8; 16]));
433
434 let chunk_id = Uuid::new_v4();
435 let embedding = create_test_embedding(chunk_id);
436 diff.added_embeddings.push(embedding);
437
438 let removed_embedding_id = Uuid::new_v4();
439 diff.removed_embedding_ids.push(removed_embedding_id);
440
441 assert_eq!(diff.change_count(), 2);
442 }
443
444 #[test]
445 fn test_merkle_diff_edge_changes() {
446 let mut diff = CognitiveDiff::empty([0u8; 32], Uuid::nil(), 0, Hlc::new(0, [0u8; 16]));
447
448 let edge = Edge::new(Uuid::new_v4(), Uuid::new_v4(), EdgeKind::DocToChunk);
449 diff.added_edges.push(edge);
450
451 let removed_edge = (Uuid::new_v4(), Uuid::new_v4(), EdgeKind::ChunkToEmbedding);
452 diff.removed_edges.push(removed_edge);
453
454 assert_eq!(diff.change_count(), 2);
455 }
456
457 #[test]
458 fn test_merkle_diff_multiple_changes() {
459 let mut diff = CognitiveDiff::empty([0u8; 32], Uuid::nil(), 0, Hlc::new(0, [0u8; 16]));
460
461 let doc = Document::new(std::path::PathBuf::from("test.md"), b"content", 0);
463 diff.added_docs.push(doc);
464
465 let chunk = Chunk::new(Uuid::new_v4(), "content", 0, 0);
467 diff.added_chunks.push(chunk);
468
469 let embedding = create_test_embedding(Uuid::new_v4());
471 diff.added_embeddings.push(embedding);
472
473 let edge = Edge::new(Uuid::new_v4(), Uuid::new_v4(), EdgeKind::DocToChunk);
475 diff.added_edges.push(edge);
476
477 assert_eq!(diff.change_count(), 4);
478 }
479
480 #[test]
481 fn test_merkle_diff_serialization() {
482 use crate::{deserialize_diff, serialize_diff};
483
484 let diff = CognitiveDiff::empty([0u8; 32], Uuid::nil(), 0, Hlc::new(0, [0u8; 16]));
485
486 let serialized = serialize_diff(&diff).unwrap();
487 assert!(!serialized.is_empty());
488
489 let deserialized = deserialize_diff(&serialized).unwrap();
490 assert_eq!(diff.metadata, deserialized.metadata);
491 }
492
493 #[test]
494 fn test_merkle_diff_serialization_with_content() {
495 use crate::{deserialize_diff, serialize_diff};
496
497 let mut diff = CognitiveDiff::empty([0u8; 32], Uuid::nil(), 0, Hlc::new(0, [0u8; 16]));
498
499 let doc = Document::new(std::path::PathBuf::from("test.md"), b"Test content", 0);
500 diff.added_docs.push(doc);
501
502 let serialized = serialize_diff(&diff).unwrap();
503 let deserialized = deserialize_diff(&serialized).unwrap();
504
505 assert_eq!(diff.added_docs.len(), deserialized.added_docs.len());
506 assert_eq!(diff.added_docs[0].path, deserialized.added_docs[0].path);
507 }
508
509 #[test]
510 fn test_merkle_diff_estimated_size() {
511 let diff = CognitiveDiff::empty([0u8; 32], Uuid::nil(), 0, Hlc::new(0, [0u8; 16]));
512
513 let size = diff.estimated_size();
514 assert!(size > 0);
515 }
516
517 #[test]
518 fn test_merkle_diff_apply_order() {
519 let mut diff = CognitiveDiff::empty([0u8; 32], Uuid::nil(), 0, Hlc::new(0, [0u8; 16]));
521
522 for i in 0..5 {
524 let doc = Document::new(
525 std::path::PathBuf::from(format!("doc{i}.md")),
526 format!("content{i}").as_bytes(),
527 0,
528 );
529 diff.added_docs.push(doc);
530 }
531
532 assert_eq!(diff.added_docs.len(), 5);
533 for (i, doc) in diff.added_docs.iter().enumerate() {
535 assert!(doc.path.to_string_lossy().contains(&format!("{i}")));
536 }
537 }
538
539 #[test]
540 fn test_merkle_diff_idempotent() {
541 let mut diff = CognitiveDiff::empty([0u8; 32], Uuid::nil(), 0, Hlc::new(0, [0u8; 16]));
544
545 let doc = Document::new(std::path::PathBuf::from("test.md"), b"content", 0);
546
547 diff.added_docs.push(doc.clone());
549 diff.added_docs.push(doc);
550
551 assert_eq!(diff.change_count(), 2);
552 }
553
554 #[test]
555 fn test_merkle_comparison_using_blake3() {
556 let mut old_tree = MerkleTree::new();
558 old_tree.add_leaf(b"document1");
559 old_tree.add_leaf(b"document2");
560
561 let mut new_tree = MerkleTree::new();
562 new_tree.add_leaf(b"document1");
563 new_tree.add_leaf(b"document2");
564 new_tree.add_leaf(b"document3"); let old_root = old_tree.root();
567 let new_root = new_tree.root();
568
569 assert_ne!(old_root, new_root);
571 }
572
573 #[test]
574 fn test_merkle_proof_single_leaf() {
575 let mut tree = MerkleTree::new();
576 tree.add_leaf(b"only");
577
578 let root = tree.root();
579 let proof = tree.generate_proof(0).unwrap();
580 assert!(proof.is_empty(), "Single leaf proof should be empty");
581
582 let leaf_hash = blake3::hash(b"only");
583 assert!(MerkleTree::verify_proof(
584 leaf_hash.as_bytes(),
585 &proof,
586 &root
587 ));
588 }
589
590 #[test]
591 fn test_merkle_proof_two_leaves() {
592 let mut tree = MerkleTree::new();
593 tree.add_leaf(b"left");
594 tree.add_leaf(b"right");
595
596 let root = tree.root();
597
598 let proof0 = tree.generate_proof(0).unwrap();
600 let leaf0 = blake3::hash(b"left");
601 assert!(MerkleTree::verify_proof(leaf0.as_bytes(), &proof0, &root));
602
603 let proof1 = tree.generate_proof(1).unwrap();
605 let leaf1 = blake3::hash(b"right");
606 assert!(MerkleTree::verify_proof(leaf1.as_bytes(), &proof1, &root));
607 }
608
609 #[test]
610 fn test_merkle_proof_many_leaves() {
611 let mut tree = MerkleTree::new();
612 let data: Vec<Vec<u8>> = (0..17).map(|i| format!("leaf_{i}").into_bytes()).collect();
613 for d in &data {
614 tree.add_leaf(d);
615 }
616
617 let root = tree.root();
618
619 for (i, d) in data.iter().enumerate() {
620 let proof = tree.generate_proof(i).unwrap();
621 let leaf_hash = blake3::hash(d);
622 assert!(
623 MerkleTree::verify_proof(leaf_hash.as_bytes(), &proof, &root),
624 "Proof failed for leaf {i}"
625 );
626 }
627 }
628
629 #[test]
630 fn test_merkle_proof_wrong_leaf_fails() {
631 let mut tree = MerkleTree::new();
632 tree.add_leaf(b"a");
633 tree.add_leaf(b"b");
634 tree.add_leaf(b"c");
635
636 let root = tree.root();
637
638 let proof = tree.generate_proof(0).unwrap();
639 let wrong_leaf = blake3::hash(b"wrong");
640 assert!(!MerkleTree::verify_proof(
641 wrong_leaf.as_bytes(),
642 &proof,
643 &root
644 ));
645 }
646
647 #[test]
648 fn test_merkle_proof_out_of_bounds() {
649 let mut tree = MerkleTree::new();
650 tree.add_leaf(b"a");
651 assert!(tree.generate_proof(1).is_none());
652 assert!(tree.generate_proof(100).is_none());
653 }
654
655 #[test]
656 fn test_merkle_prove_change_detection() {
657 let mut tree1 = MerkleTree::new();
659 tree1.add_leaf(b"a");
660 tree1.add_leaf(b"b");
661
662 let mut tree2 = MerkleTree::new();
663 tree2.add_leaf(b"a");
664 tree2.add_leaf(b"c"); assert_ne!(tree1.root(), tree2.root());
667 }
668}