1use blake3::Hasher as Blake3;
38use serde::{Deserialize, Serialize};
39use std::fmt;
40
41use crate::event::Event;
42
43const BOUNDARY_BITS: u32 = 6;
50const BOUNDARY_MASK: u64 = (1u64 << BOUNDARY_BITS) - 1;
51
52const MIN_CHUNK_SIZE: usize = 8;
54
55const MAX_CHUNK_SIZE: usize = 256;
57
58const INTERIOR_BOUNDARY_BITS: u32 = 3; const INTERIOR_BOUNDARY_MASK: u64 = (1u64 << INTERIOR_BOUNDARY_BITS) - 1;
62const MIN_INTERIOR_SIZE: usize = 2;
63const MAX_INTERIOR_SIZE: usize = 32;
64
65fn gear_table() -> [u64; 256] {
72 let mut table = [0u64; 256];
73 for i in 0u16..256 {
74 let h = blake3::hash(&i.to_le_bytes());
75 let bytes: [u8; 8] = h.as_bytes()[0..8]
76 .try_into()
77 .expect("BLAKE3 output is 32 bytes; first 8 always valid");
78 table[i as usize] = u64::from_le_bytes(bytes);
79 }
80 table
81}
82
83#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
89pub struct Hash(pub [u8; 32]);
90
91impl fmt::Debug for Hash {
92 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
93 write!(f, "Hash({})", hex::encode(&self.0[..8]))
94 }
95}
96
97impl fmt::Display for Hash {
98 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
99 write!(f, "{}", hex::encode(&self.0))
100 }
101}
102
103fn hash_bytes(data: &[u8]) -> Hash {
104 Hash(*blake3::hash(data).as_bytes())
105}
106
107fn sort_key(e: &Event) -> (String, i64, String) {
115 (
116 e.item_id.as_str().to_string(),
117 e.wall_ts_us,
118 e.event_hash.clone(),
119 )
120}
121
122#[derive(Debug, Clone, Serialize, Deserialize)]
129pub enum ProllyNode {
130 Leaf {
131 hash: Hash,
132 event_hashes: Vec<String>,
136 },
137 Interior {
138 hash: Hash,
139 children: Vec<Self>,
140 },
141}
142
143impl ProllyNode {
144 #[must_use]
145 pub const fn hash(&self) -> Hash {
146 let (Self::Leaf { hash, .. } | Self::Interior { hash, .. }) = self;
147 *hash
148 }
149
150 pub fn collect_event_hashes(&self, out: &mut Vec<String>) {
152 match self {
153 Self::Leaf { event_hashes, .. } => {
154 out.extend(event_hashes.iter().cloned());
155 }
156 Self::Interior { children, .. } => {
157 for child in children {
158 child.collect_event_hashes(out);
159 }
160 }
161 }
162 }
163}
164
165#[derive(Debug, Clone, Serialize, Deserialize)]
171pub struct ProllyTree {
172 pub root: ProllyNode,
173 pub event_count: usize,
174}
175
176impl ProllyTree {
177 #[must_use]
186 pub fn build(events: &[Event]) -> Self {
187 if events.is_empty() {
188 let empty_hash = hash_bytes(b"prolly:empty");
189 return Self {
190 root: ProllyNode::Leaf {
191 hash: empty_hash,
192 event_hashes: vec![],
193 },
194 event_count: 0,
195 };
196 }
197
198 let mut sorted: Vec<&Event> = events.iter().collect();
200 sorted.sort_by_key(|a| sort_key(a));
201
202 let event_hashes: Vec<String> = sorted.iter().map(|e| e.event_hash.clone()).collect();
204 let leaves = chunk_leaf(&event_hashes);
205
206 let root = build_interior(leaves);
208
209 Self {
210 root,
211 event_count: events.len(),
212 }
213 }
214
215 #[must_use]
220 pub fn diff(&self, other: &Self) -> Vec<String> {
221 let mut missing = Vec::new();
222 diff_nodes(&self.root, &other.root, &mut missing);
223 missing
224 }
225
226 #[must_use]
228 pub fn event_hashes(&self) -> Vec<String> {
229 let mut out = Vec::with_capacity(self.event_count);
230 self.root.collect_event_hashes(&mut out);
231 out
232 }
233
234 #[must_use]
241 pub fn to_bytes(&self) -> Vec<u8> {
242 serde_json::to_vec(self).expect("ProllyTree serialization should not fail")
245 }
246
247 pub fn from_bytes(data: &[u8]) -> Result<Self, serde_json::Error> {
254 serde_json::from_slice(data)
255 }
256}
257
258fn chunk_leaf(event_hashes: &[String]) -> Vec<ProllyNode> {
264 let table = gear_table();
265 let mut chunks = Vec::new();
266 let mut chunk_start = 0;
267 let mut gear: u64 = 0;
268
269 for (i, eh) in event_hashes.iter().enumerate() {
270 for &b in eh.as_bytes() {
272 gear = (gear << 1).wrapping_add(table[b as usize]);
273 }
274
275 let chunk_len = i - chunk_start + 1;
276
277 let at_boundary = chunk_len >= MIN_CHUNK_SIZE && (gear & BOUNDARY_MASK) == 0;
278 let at_max = chunk_len >= MAX_CHUNK_SIZE;
279 let at_end = i == event_hashes.len() - 1;
280
281 if at_boundary || at_max || at_end {
282 let slice = &event_hashes[chunk_start..=i];
283 let hash = hash_leaf_chunk(slice);
284 chunks.push(ProllyNode::Leaf {
285 hash,
286 event_hashes: slice.to_vec(),
287 });
288 chunk_start = i + 1;
289 gear = 0;
290 }
291 }
292
293 chunks
294}
295
296fn hash_leaf_chunk(event_hashes: &[String]) -> Hash {
297 let mut hasher = Blake3::new();
298 hasher.update(b"prolly:leaf:");
299 for eh in event_hashes {
300 hasher.update(eh.as_bytes());
301 hasher.update(b"\n");
302 }
303 Hash(*hasher.finalize().as_bytes())
304}
305
306fn build_interior(mut nodes: Vec<ProllyNode>) -> ProllyNode {
312 if nodes.len() == 1 {
313 return nodes.remove(0);
314 }
315
316 let table = gear_table();
318 let mut groups: Vec<Vec<ProllyNode>> = Vec::new();
319 let mut current_group: Vec<ProllyNode> = Vec::new();
320 let mut gear: u64 = 0;
321
322 for node in nodes {
323 let h = node.hash();
324 for &b in &h.0[..8] {
325 gear = (gear << 1).wrapping_add(table[b as usize]);
326 }
327 current_group.push(node);
328
329 let group_len = current_group.len();
330 let at_boundary = group_len >= MIN_INTERIOR_SIZE && (gear & INTERIOR_BOUNDARY_MASK) == 0;
331 let at_max = group_len >= MAX_INTERIOR_SIZE;
332
333 if at_boundary || at_max {
334 groups.push(std::mem::take(&mut current_group));
335 gear = 0;
336 }
337 }
338 if !current_group.is_empty() {
339 groups.push(current_group);
340 }
341
342 let interior_nodes: Vec<ProllyNode> = groups
344 .into_iter()
345 .map(|children| {
346 let hash = hash_interior(&children);
347 ProllyNode::Interior { hash, children }
348 })
349 .collect();
350
351 build_interior(interior_nodes)
353}
354
355fn hash_interior(children: &[ProllyNode]) -> Hash {
356 let mut hasher = Blake3::new();
357 hasher.update(b"prolly:interior:");
358 for child in children {
359 hasher.update(&child.hash().0);
360 }
361 Hash(*hasher.finalize().as_bytes())
362}
363
364fn diff_nodes(local: &ProllyNode, other: &ProllyNode, missing: &mut Vec<String>) {
371 if local.hash() == other.hash() {
373 return;
374 }
375
376 if let (
377 ProllyNode::Interior {
378 children: local_children,
379 ..
380 },
381 ProllyNode::Interior {
382 children: other_children,
383 ..
384 },
385 ) = (local, other)
386 {
387 let local_set: std::collections::HashSet<Hash> =
390 local_children.iter().map(ProllyNode::hash).collect();
391
392 for other_child in other_children {
393 if local_set.contains(&other_child.hash()) {
394 continue;
396 }
397 let local_match = local_children.iter().find(|lc| {
400 matches!(
402 (lc, other_child),
403 (ProllyNode::Interior { .. }, ProllyNode::Interior { .. })
404 | (ProllyNode::Leaf { .. }, ProllyNode::Leaf { .. })
405 )
406 });
407
408 match local_match {
409 Some(lm) => diff_nodes(lm, other_child, missing),
410 None => other_child.collect_event_hashes(missing),
411 }
412 }
413 } else {
414 let mut local_hashes = Vec::new();
417 local.collect_event_hashes(&mut local_hashes);
418 let local_set: std::collections::HashSet<&str> = local_hashes
419 .iter()
420 .map(std::string::String::as_str)
421 .collect();
422
423 let mut other_hashes = Vec::new();
424 other.collect_event_hashes(&mut other_hashes);
425
426 for h in other_hashes {
427 if !local_set.contains(h.as_str()) {
428 missing.push(h);
429 }
430 }
431 }
432}
433
434mod hex {
438 const HEX_CHARS: &[u8; 16] = b"0123456789abcdef";
439
440 pub fn encode(bytes: &[u8]) -> String {
441 let mut s = String::with_capacity(bytes.len() * 2);
442 for &b in bytes {
443 s.push(HEX_CHARS[(b >> 4) as usize] as char);
444 s.push(HEX_CHARS[(b & 0x0f) as usize] as char);
445 }
446 s
447 }
448}
449
450#[cfg(test)]
455mod tests {
456 use super::*;
457 use crate::event::data::{CreateData, EventData};
458 use crate::event::{Event, EventType};
459 use crate::model::item::{Kind, Urgency};
460 use crate::model::item_id::ItemId;
461 use std::collections::BTreeMap;
462
463 fn make_event(item_id: &str, ts: i64, hash_suffix: &str) -> Event {
464 Event {
465 wall_ts_us: ts,
466 agent: "test-agent".to_string(),
467 itc: "0:0".to_string(),
468 parents: vec![],
469 event_type: EventType::Create,
470 item_id: ItemId::new_unchecked(item_id),
471 data: EventData::Create(CreateData {
472 title: format!("Item {item_id}"),
473 kind: Kind::Task,
474 size: None,
475 urgency: Urgency::Default,
476 labels: vec![],
477 parent: None,
478 causation: None,
479 description: None,
480 extra: BTreeMap::new(),
481 }),
482 event_hash: format!("blake3:{item_id}_{ts}_{hash_suffix}"),
483 }
484 }
485
486 #[test]
487 fn empty_tree() {
488 let tree = ProllyTree::build(&[]);
489 assert_eq!(tree.event_count, 0);
490 assert!(tree.event_hashes().is_empty());
491 }
492
493 #[test]
494 fn single_event() {
495 let events = vec![make_event("item-1", 1000, "a")];
496 let tree = ProllyTree::build(&events);
497 assert_eq!(tree.event_count, 1);
498 assert_eq!(tree.event_hashes().len(), 1);
499 }
500
501 #[test]
502 fn deterministic_root_hash_same_order() {
503 let events = vec![
504 make_event("a", 1, "x"),
505 make_event("b", 2, "y"),
506 make_event("c", 3, "z"),
507 ];
508 let t1 = ProllyTree::build(&events);
509 let t2 = ProllyTree::build(&events);
510 assert_eq!(t1.root.hash(), t2.root.hash());
511 }
512
513 #[test]
514 fn deterministic_root_hash_different_insertion_order() {
515 let e1 = make_event("a", 1, "x");
516 let e2 = make_event("b", 2, "y");
517 let e3 = make_event("c", 3, "z");
518
519 let t1 = ProllyTree::build(&[e1.clone(), e2.clone(), e3.clone()]);
520 let t2 = ProllyTree::build(&[e3.clone(), e1.clone(), e2.clone()]);
521 let t3 = ProllyTree::build(&[e2, e3, e1]);
522
523 assert_eq!(t1.root.hash(), t2.root.hash());
524 assert_eq!(t2.root.hash(), t3.root.hash());
525 }
526
527 #[test]
528 fn diff_identical_trees_is_empty() {
529 let events = vec![make_event("a", 1, "x"), make_event("b", 2, "y")];
530 let t1 = ProllyTree::build(&events);
531 let t2 = ProllyTree::build(&events);
532 assert!(t1.diff(&t2).is_empty());
533 }
534
535 #[test]
536 fn diff_finds_new_events() {
537 let shared = vec![make_event("a", 1, "x"), make_event("b", 2, "y")];
538 let mut extended = shared.clone();
539 extended.push(make_event("c", 3, "z"));
540
541 let t_local = ProllyTree::build(&shared);
542 let t_remote = ProllyTree::build(&extended);
543
544 let missing = t_local.diff(&t_remote);
545 assert!(missing.contains(&"blake3:c_3_z".to_string()));
546 }
547
548 #[test]
549 fn diff_empty_vs_populated() {
550 let empty = ProllyTree::build(&[]);
551 let populated = ProllyTree::build(&[make_event("a", 1, "x"), make_event("b", 2, "y")]);
552
553 let missing = empty.diff(&populated);
554 assert_eq!(missing.len(), 2);
555 }
556
557 #[test]
558 fn serialization_roundtrip() {
559 let events = vec![
560 make_event("a", 1, "x"),
561 make_event("b", 2, "y"),
562 make_event("c", 3, "z"),
563 ];
564 let tree = ProllyTree::build(&events);
565 let bytes = tree.to_bytes();
566 let restored = ProllyTree::from_bytes(&bytes).expect("deserialize");
567 assert_eq!(tree.root.hash(), restored.root.hash());
568 assert_eq!(tree.event_count, restored.event_count);
569 }
570
571 #[test]
572 fn many_events_produce_multiple_chunks() {
573 let events: Vec<Event> = (0..200)
575 .map(|i| make_event(&format!("item-{i:04}"), i as i64, &format!("h{i}")))
576 .collect();
577 let tree = ProllyTree::build(&events);
578 assert_eq!(tree.event_count, 200);
579 assert_eq!(tree.event_hashes().len(), 200);
580
581 let tree2 = ProllyTree::build(&events);
583 assert_eq!(tree.root.hash(), tree2.root.hash());
584 }
585
586 #[test]
587 fn diff_with_overlapping_events() {
588 let all_events: Vec<Event> = (0..150)
590 .map(|i| make_event(&format!("item-{i:04}"), i as i64, &format!("h{i}")))
591 .collect();
592
593 let a_events = &all_events[0..100];
594 let b_events = &all_events[50..150];
595
596 let tree_a = ProllyTree::build(a_events);
597 let tree_b = ProllyTree::build(b_events);
598
599 let missing = tree_a.diff(&tree_b);
601 let expected_missing: std::collections::HashSet<String> = (100..150)
602 .map(|i| format!("blake3:item-{i:04}_{i}_h{i}"))
603 .collect();
604
605 let missing_set: std::collections::HashSet<String> = missing.into_iter().collect();
606 for expected in &expected_missing {
607 assert!(
608 missing_set.contains(expected),
609 "Expected {expected} in diff but not found"
610 );
611 }
612 }
613
614 #[test]
615 fn diff_symmetric_finds_both_sides() {
616 let shared = vec![make_event("shared", 1, "s")];
617 let mut a = shared.clone();
618 a.push(make_event("only-a", 2, "a"));
619 let mut b = shared;
620 b.push(make_event("only-b", 3, "b"));
621
622 let tree_a = ProllyTree::build(&a);
623 let tree_b = ProllyTree::build(&b);
624
625 let a_missing = tree_a.diff(&tree_b); let b_missing = tree_b.diff(&tree_a); assert!(a_missing.contains(&"blake3:only-b_3_b".to_string()));
629 assert!(b_missing.contains(&"blake3:only-a_2_a".to_string()));
630 }
631
632 #[test]
633 fn same_item_different_timestamps() {
634 let events = vec![
635 make_event("item-1", 100, "v1"),
636 make_event("item-1", 200, "v2"),
637 make_event("item-1", 300, "v3"),
638 ];
639 let tree = ProllyTree::build(&events);
640 assert_eq!(tree.event_count, 3);
641 let hashes = tree.event_hashes();
642 assert_eq!(hashes[0], "blake3:item-1_100_v1");
644 assert_eq!(hashes[1], "blake3:item-1_200_v2");
645 assert_eq!(hashes[2], "blake3:item-1_300_v3");
646 }
647
648 #[test]
649 fn hash_display() {
650 let h = hash_bytes(b"test");
651 let s = format!("{h}");
652 assert_eq!(s.len(), 64); }
654
655 #[test]
656 fn large_diff_performance() {
657 let shared: Vec<Event> = (0..900)
659 .map(|i| make_event(&format!("s{i:04}"), i, &format!("s{i}")))
660 .collect();
661
662 let mut a = shared.clone();
663 for i in 900..1000 {
664 a.push(make_event(&format!("a{i:04}"), i, &format!("a{i}")));
665 }
666
667 let mut b = shared;
668 for i in 900..1000 {
669 b.push(make_event(&format!("b{i:04}"), i, &format!("b{i}")));
670 }
671
672 let tree_a = ProllyTree::build(&a);
673 let tree_b = ProllyTree::build(&b);
674
675 let missing_from_b = tree_a.diff(&tree_b);
676 assert!(
678 missing_from_b.len() >= 90,
679 "Expected ~100 missing, got {}",
680 missing_from_b.len()
681 );
682 }
683
684 #[test]
685 fn build_and_diff_stress() {
686 let all: Vec<Event> = (0..500)
688 .map(|i| make_event(&format!("x{i:04}"), i, &format!("x{i}")))
689 .collect();
690
691 let subset = &all[0..400];
692
693 let tree_all = ProllyTree::build(&all);
694 let tree_sub = ProllyTree::build(subset);
695
696 let missing = tree_sub.diff(&tree_all);
697 assert!(
699 missing.len() >= 90,
700 "Expected ~100 missing, got {}",
701 missing.len()
702 );
703 }
704
705 #[test]
706 fn gear_table_is_deterministic() {
707 let t1 = gear_table();
708 let t2 = gear_table();
709 assert_eq!(t1, t2);
710 }
711
712 #[test]
713 fn chunk_boundaries_are_content_defined() {
714 let base: Vec<Event> = (0..100)
716 .map(|i| make_event(&format!("e{i:04}"), i * 10, &format!("h{i}")))
717 .collect();
718
719 let tree_base = ProllyTree::build(&base);
720
721 let mut modified = base;
723 modified.push(make_event("e0050x", 505, "hx"));
724
725 let tree_mod = ProllyTree::build(&modified);
726
727 assert_ne!(tree_base.root.hash(), tree_mod.root.hash());
729
730 let missing = tree_base.diff(&tree_mod);
732 assert!(missing.contains(&"blake3:e0050x_505_hx".to_string()));
733 }
734
735 #[test]
736 fn empty_diff_with_empty() {
737 let e1 = ProllyTree::build(&[]);
738 let e2 = ProllyTree::build(&[]);
739 assert!(e1.diff(&e2).is_empty());
740 }
741
742 #[test]
743 fn serialization_preserves_event_count() {
744 let events: Vec<Event> = (0..50)
745 .map(|i| make_event(&format!("item-{i}"), i, &format!("h{i}")))
746 .collect();
747 let tree = ProllyTree::build(&events);
748 let bytes = tree.to_bytes();
749 let restored = ProllyTree::from_bytes(&bytes).unwrap();
750 assert_eq!(restored.event_count, 50);
751 assert_eq!(restored.event_hashes().len(), 50);
752 }
753}