1use blake3::Hasher as Blake3;
9use serde::{Deserialize, Serialize};
10use std::fmt;
11
12use crate::event::Event;
13
14const BOUNDARY_BITS: u32 = 6;
21const BOUNDARY_MASK: u64 = (1u64 << BOUNDARY_BITS) - 1;
22
23const MIN_CHUNK_SIZE: usize = 8;
25
26const MAX_CHUNK_SIZE: usize = 256;
28
29const INTERIOR_BOUNDARY_BITS: u32 = 3; const INTERIOR_BOUNDARY_MASK: u64 = (1u64 << INTERIOR_BOUNDARY_BITS) - 1;
33const MIN_INTERIOR_SIZE: usize = 2;
34const MAX_INTERIOR_SIZE: usize = 32;
35
36fn gear_table() -> [u64; 256] {
43 let mut table = [0u64; 256];
44 for i in 0u16..256 {
45 let h = blake3::hash(&i.to_le_bytes());
46 let bytes: [u8; 8] = h.as_bytes()[0..8]
47 .try_into()
48 .expect("BLAKE3 output is 32 bytes; first 8 always valid");
49 table[i as usize] = u64::from_le_bytes(bytes);
50 }
51 table
52}
53
54#[derive(Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
60pub struct Hash(pub [u8; 32]);
61
62impl fmt::Debug for Hash {
63 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
64 write!(f, "Hash({})", hex::encode(&self.0[..8]))
65 }
66}
67
68impl fmt::Display for Hash {
69 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
70 write!(f, "{}", hex::encode(&self.0))
71 }
72}
73
74fn hash_bytes(data: &[u8]) -> Hash {
75 Hash(*blake3::hash(data).as_bytes())
76}
77
78fn sort_key(e: &Event) -> (String, i64, String) {
86 (
87 e.item_id.as_str().to_string(),
88 e.wall_ts_us,
89 e.event_hash.clone(),
90 )
91}
92
93#[derive(Debug, Clone, Serialize, Deserialize)]
100pub enum ProllyNode {
101 Leaf {
102 hash: Hash,
103 event_hashes: Vec<String>,
107 },
108 Interior {
109 hash: Hash,
110 children: Vec<Self>,
111 },
112}
113
114impl ProllyNode {
115 #[must_use]
116 pub const fn hash(&self) -> Hash {
117 let (Self::Leaf { hash, .. } | Self::Interior { hash, .. }) = self;
118 *hash
119 }
120
121 pub fn collect_event_hashes(&self, out: &mut Vec<String>) {
123 match self {
124 Self::Leaf { event_hashes, .. } => {
125 out.extend(event_hashes.iter().cloned());
126 }
127 Self::Interior { children, .. } => {
128 for child in children {
129 child.collect_event_hashes(out);
130 }
131 }
132 }
133 }
134}
135
136#[derive(Debug, Clone, Serialize, Deserialize)]
142pub struct ProllyTree {
143 pub root: ProllyNode,
144 pub event_count: usize,
145}
146
147impl ProllyTree {
148 #[must_use]
157 pub fn build(events: &[Event]) -> Self {
158 if events.is_empty() {
159 let empty_hash = hash_bytes(b"prolly:empty");
160 return Self {
161 root: ProllyNode::Leaf {
162 hash: empty_hash,
163 event_hashes: vec![],
164 },
165 event_count: 0,
166 };
167 }
168
169 let mut sorted: Vec<&Event> = events.iter().collect();
171 sorted.sort_by_key(|a| sort_key(a));
172
173 let event_hashes: Vec<String> = sorted.iter().map(|e| e.event_hash.clone()).collect();
175 let leaves = chunk_leaf(&event_hashes);
176
177 let root = build_interior(leaves);
179
180 Self {
181 root,
182 event_count: events.len(),
183 }
184 }
185
186 #[must_use]
191 pub fn diff(&self, other: &Self) -> Vec<String> {
192 let mut missing = Vec::new();
193 diff_nodes(&self.root, &other.root, &mut missing);
194 missing
195 }
196
197 #[must_use]
199 pub fn event_hashes(&self) -> Vec<String> {
200 let mut out = Vec::with_capacity(self.event_count);
201 self.root.collect_event_hashes(&mut out);
202 out
203 }
204
205 #[must_use]
212 pub fn to_bytes(&self) -> Vec<u8> {
213 serde_json::to_vec(self).expect("ProllyTree serialization should not fail")
216 }
217
218 pub fn from_bytes(data: &[u8]) -> Result<Self, serde_json::Error> {
225 serde_json::from_slice(data)
226 }
227}
228
229fn chunk_leaf(event_hashes: &[String]) -> Vec<ProllyNode> {
235 let table = gear_table();
236 let mut chunks = Vec::new();
237 let mut chunk_start = 0;
238 let mut gear: u64 = 0;
239
240 for (i, eh) in event_hashes.iter().enumerate() {
241 for &b in eh.as_bytes() {
243 gear = (gear << 1).wrapping_add(table[b as usize]);
244 }
245
246 let chunk_len = i - chunk_start + 1;
247
248 let at_boundary = chunk_len >= MIN_CHUNK_SIZE && (gear & BOUNDARY_MASK) == 0;
249 let at_max = chunk_len >= MAX_CHUNK_SIZE;
250 let at_end = i == event_hashes.len() - 1;
251
252 if at_boundary || at_max || at_end {
253 let slice = &event_hashes[chunk_start..=i];
254 let hash = hash_leaf_chunk(slice);
255 chunks.push(ProllyNode::Leaf {
256 hash,
257 event_hashes: slice.to_vec(),
258 });
259 chunk_start = i + 1;
260 gear = 0;
261 }
262 }
263
264 chunks
265}
266
267fn hash_leaf_chunk(event_hashes: &[String]) -> Hash {
268 let mut hasher = Blake3::new();
269 hasher.update(b"prolly:leaf:");
270 for eh in event_hashes {
271 hasher.update(eh.as_bytes());
272 hasher.update(b"\n");
273 }
274 Hash(*hasher.finalize().as_bytes())
275}
276
277fn build_interior(mut nodes: Vec<ProllyNode>) -> ProllyNode {
283 if nodes.len() == 1 {
284 return nodes.remove(0);
285 }
286
287 let table = gear_table();
289 let mut groups: Vec<Vec<ProllyNode>> = Vec::new();
290 let mut current_group: Vec<ProllyNode> = Vec::new();
291 let mut gear: u64 = 0;
292
293 for node in nodes {
294 let h = node.hash();
295 for &b in &h.0[..8] {
296 gear = (gear << 1).wrapping_add(table[b as usize]);
297 }
298 current_group.push(node);
299
300 let group_len = current_group.len();
301 let at_boundary = group_len >= MIN_INTERIOR_SIZE && (gear & INTERIOR_BOUNDARY_MASK) == 0;
302 let at_max = group_len >= MAX_INTERIOR_SIZE;
303
304 if at_boundary || at_max {
305 groups.push(std::mem::take(&mut current_group));
306 gear = 0;
307 }
308 }
309 if !current_group.is_empty() {
310 groups.push(current_group);
311 }
312
313 let interior_nodes: Vec<ProllyNode> = groups
315 .into_iter()
316 .map(|children| {
317 let hash = hash_interior(&children);
318 ProllyNode::Interior { hash, children }
319 })
320 .collect();
321
322 build_interior(interior_nodes)
324}
325
326fn hash_interior(children: &[ProllyNode]) -> Hash {
327 let mut hasher = Blake3::new();
328 hasher.update(b"prolly:interior:");
329 for child in children {
330 hasher.update(&child.hash().0);
331 }
332 Hash(*hasher.finalize().as_bytes())
333}
334
335fn diff_nodes(local: &ProllyNode, other: &ProllyNode, missing: &mut Vec<String>) {
342 if local.hash() == other.hash() {
344 return;
345 }
346
347 if let (
348 ProllyNode::Interior {
349 children: local_children,
350 ..
351 },
352 ProllyNode::Interior {
353 children: other_children,
354 ..
355 },
356 ) = (local, other)
357 {
358 let local_set: std::collections::HashSet<Hash> =
361 local_children.iter().map(ProllyNode::hash).collect();
362
363 for other_child in other_children {
364 if local_set.contains(&other_child.hash()) {
365 continue;
367 }
368 let local_match = local_children.iter().find(|lc| {
371 matches!(
373 (lc, other_child),
374 (ProllyNode::Interior { .. }, ProllyNode::Interior { .. })
375 | (ProllyNode::Leaf { .. }, ProllyNode::Leaf { .. })
376 )
377 });
378
379 match local_match {
380 Some(lm) => diff_nodes(lm, other_child, missing),
381 None => other_child.collect_event_hashes(missing),
382 }
383 }
384 } else {
385 let mut local_hashes = Vec::new();
388 local.collect_event_hashes(&mut local_hashes);
389 let local_set: std::collections::HashSet<&str> = local_hashes
390 .iter()
391 .map(std::string::String::as_str)
392 .collect();
393
394 let mut other_hashes = Vec::new();
395 other.collect_event_hashes(&mut other_hashes);
396
397 for h in other_hashes {
398 if !local_set.contains(h.as_str()) {
399 missing.push(h);
400 }
401 }
402 }
403}
404
405mod hex {
409 const HEX_CHARS: &[u8; 16] = b"0123456789abcdef";
410
411 pub fn encode(bytes: &[u8]) -> String {
412 let mut s = String::with_capacity(bytes.len() * 2);
413 for &b in bytes {
414 s.push(HEX_CHARS[(b >> 4) as usize] as char);
415 s.push(HEX_CHARS[(b & 0x0f) as usize] as char);
416 }
417 s
418 }
419}
420
421#[cfg(test)]
426mod tests {
427 use super::*;
428 use crate::event::data::{CreateData, EventData};
429 use crate::event::{Event, EventType};
430 use crate::model::item::{Kind, Urgency};
431 use crate::model::item_id::ItemId;
432 use std::collections::BTreeMap;
433
434 fn make_event(item_id: &str, ts: i64, hash_suffix: &str) -> Event {
435 Event {
436 wall_ts_us: ts,
437 agent: "test-agent".to_string(),
438 itc: "0:0".to_string(),
439 parents: vec![],
440 event_type: EventType::Create,
441 item_id: ItemId::new_unchecked(item_id),
442 data: EventData::Create(CreateData {
443 title: format!("Item {item_id}"),
444 kind: Kind::Task,
445 size: None,
446 urgency: Urgency::Default,
447 labels: vec![],
448 parent: None,
449 causation: None,
450 description: None,
451 extra: BTreeMap::new(),
452 }),
453 event_hash: format!("blake3:{item_id}_{ts}_{hash_suffix}"),
454 }
455 }
456
457 #[test]
458 fn empty_tree() {
459 let tree = ProllyTree::build(&[]);
460 assert_eq!(tree.event_count, 0);
461 assert!(tree.event_hashes().is_empty());
462 }
463
464 #[test]
465 fn single_event() {
466 let events = vec![make_event("item-1", 1000, "a")];
467 let tree = ProllyTree::build(&events);
468 assert_eq!(tree.event_count, 1);
469 assert_eq!(tree.event_hashes().len(), 1);
470 }
471
472 #[test]
473 fn deterministic_root_hash_same_order() {
474 let events = vec![
475 make_event("a", 1, "x"),
476 make_event("b", 2, "y"),
477 make_event("c", 3, "z"),
478 ];
479 let t1 = ProllyTree::build(&events);
480 let t2 = ProllyTree::build(&events);
481 assert_eq!(t1.root.hash(), t2.root.hash());
482 }
483
484 #[test]
485 fn deterministic_root_hash_different_insertion_order() {
486 let e1 = make_event("a", 1, "x");
487 let e2 = make_event("b", 2, "y");
488 let e3 = make_event("c", 3, "z");
489
490 let t1 = ProllyTree::build(&[e1.clone(), e2.clone(), e3.clone()]);
491 let t2 = ProllyTree::build(&[e3.clone(), e1.clone(), e2.clone()]);
492 let t3 = ProllyTree::build(&[e2, e3, e1]);
493
494 assert_eq!(t1.root.hash(), t2.root.hash());
495 assert_eq!(t2.root.hash(), t3.root.hash());
496 }
497
498 #[test]
499 fn diff_identical_trees_is_empty() {
500 let events = vec![make_event("a", 1, "x"), make_event("b", 2, "y")];
501 let t1 = ProllyTree::build(&events);
502 let t2 = ProllyTree::build(&events);
503 assert!(t1.diff(&t2).is_empty());
504 }
505
506 #[test]
507 fn diff_finds_new_events() {
508 let shared = vec![make_event("a", 1, "x"), make_event("b", 2, "y")];
509 let mut extended = shared.clone();
510 extended.push(make_event("c", 3, "z"));
511
512 let t_local = ProllyTree::build(&shared);
513 let t_remote = ProllyTree::build(&extended);
514
515 let missing = t_local.diff(&t_remote);
516 assert!(missing.contains(&"blake3:c_3_z".to_string()));
517 }
518
519 #[test]
520 fn diff_empty_vs_populated() {
521 let empty = ProllyTree::build(&[]);
522 let populated = ProllyTree::build(&[make_event("a", 1, "x"), make_event("b", 2, "y")]);
523
524 let missing = empty.diff(&populated);
525 assert_eq!(missing.len(), 2);
526 }
527
528 #[test]
529 fn serialization_roundtrip() {
530 let events = vec![
531 make_event("a", 1, "x"),
532 make_event("b", 2, "y"),
533 make_event("c", 3, "z"),
534 ];
535 let tree = ProllyTree::build(&events);
536 let bytes = tree.to_bytes();
537 let restored = ProllyTree::from_bytes(&bytes).expect("deserialize");
538 assert_eq!(tree.root.hash(), restored.root.hash());
539 assert_eq!(tree.event_count, restored.event_count);
540 }
541
542 #[test]
543 fn many_events_produce_multiple_chunks() {
544 let events: Vec<Event> = (0..200)
546 .map(|i| make_event(&format!("item-{i:04}"), i as i64, &format!("h{i}")))
547 .collect();
548 let tree = ProllyTree::build(&events);
549 assert_eq!(tree.event_count, 200);
550 assert_eq!(tree.event_hashes().len(), 200);
551
552 let tree2 = ProllyTree::build(&events);
554 assert_eq!(tree.root.hash(), tree2.root.hash());
555 }
556
557 #[test]
558 fn diff_with_overlapping_events() {
559 let all_events: Vec<Event> = (0..150)
561 .map(|i| make_event(&format!("item-{i:04}"), i as i64, &format!("h{i}")))
562 .collect();
563
564 let a_events = &all_events[0..100];
565 let b_events = &all_events[50..150];
566
567 let tree_a = ProllyTree::build(a_events);
568 let tree_b = ProllyTree::build(b_events);
569
570 let missing = tree_a.diff(&tree_b);
572 let expected_missing: std::collections::HashSet<String> = (100..150)
573 .map(|i| format!("blake3:item-{i:04}_{i}_h{i}"))
574 .collect();
575
576 let missing_set: std::collections::HashSet<String> = missing.into_iter().collect();
577 for expected in &expected_missing {
578 assert!(
579 missing_set.contains(expected),
580 "Expected {expected} in diff but not found"
581 );
582 }
583 }
584
585 #[test]
586 fn diff_symmetric_finds_both_sides() {
587 let shared = vec![make_event("shared", 1, "s")];
588 let mut a = shared.clone();
589 a.push(make_event("only-a", 2, "a"));
590 let mut b = shared;
591 b.push(make_event("only-b", 3, "b"));
592
593 let tree_a = ProllyTree::build(&a);
594 let tree_b = ProllyTree::build(&b);
595
596 let a_missing = tree_a.diff(&tree_b); let b_missing = tree_b.diff(&tree_a); assert!(a_missing.contains(&"blake3:only-b_3_b".to_string()));
600 assert!(b_missing.contains(&"blake3:only-a_2_a".to_string()));
601 }
602
603 #[test]
604 fn same_item_different_timestamps() {
605 let events = vec![
606 make_event("item-1", 100, "v1"),
607 make_event("item-1", 200, "v2"),
608 make_event("item-1", 300, "v3"),
609 ];
610 let tree = ProllyTree::build(&events);
611 assert_eq!(tree.event_count, 3);
612 let hashes = tree.event_hashes();
613 assert_eq!(hashes[0], "blake3:item-1_100_v1");
615 assert_eq!(hashes[1], "blake3:item-1_200_v2");
616 assert_eq!(hashes[2], "blake3:item-1_300_v3");
617 }
618
619 #[test]
620 fn hash_display() {
621 let h = hash_bytes(b"test");
622 let s = format!("{h}");
623 assert_eq!(s.len(), 64); }
625
626 #[test]
627 fn large_diff_performance() {
628 let shared: Vec<Event> = (0..900)
630 .map(|i| make_event(&format!("s{i:04}"), i, &format!("s{i}")))
631 .collect();
632
633 let mut a = shared.clone();
634 for i in 900..1000 {
635 a.push(make_event(&format!("a{i:04}"), i, &format!("a{i}")));
636 }
637
638 let mut b = shared;
639 for i in 900..1000 {
640 b.push(make_event(&format!("b{i:04}"), i, &format!("b{i}")));
641 }
642
643 let tree_a = ProllyTree::build(&a);
644 let tree_b = ProllyTree::build(&b);
645
646 let missing_from_b = tree_a.diff(&tree_b);
647 assert!(
649 missing_from_b.len() >= 90,
650 "Expected ~100 missing, got {}",
651 missing_from_b.len()
652 );
653 }
654
655 #[test]
656 fn build_and_diff_stress() {
657 let all: Vec<Event> = (0..500)
659 .map(|i| make_event(&format!("x{i:04}"), i, &format!("x{i}")))
660 .collect();
661
662 let subset = &all[0..400];
663
664 let tree_all = ProllyTree::build(&all);
665 let tree_sub = ProllyTree::build(subset);
666
667 let missing = tree_sub.diff(&tree_all);
668 assert!(
670 missing.len() >= 90,
671 "Expected ~100 missing, got {}",
672 missing.len()
673 );
674 }
675
676 #[test]
677 fn gear_table_is_deterministic() {
678 let t1 = gear_table();
679 let t2 = gear_table();
680 assert_eq!(t1, t2);
681 }
682
683 #[test]
684 fn chunk_boundaries_are_content_defined() {
685 let base: Vec<Event> = (0..100)
687 .map(|i| make_event(&format!("e{i:04}"), i * 10, &format!("h{i}")))
688 .collect();
689
690 let tree_base = ProllyTree::build(&base);
691
692 let mut modified = base;
694 modified.push(make_event("e0050x", 505, "hx"));
695
696 let tree_mod = ProllyTree::build(&modified);
697
698 assert_ne!(tree_base.root.hash(), tree_mod.root.hash());
700
701 let missing = tree_base.diff(&tree_mod);
703 assert!(missing.contains(&"blake3:e0050x_505_hx".to_string()));
704 }
705
706 #[test]
707 fn empty_diff_with_empty() {
708 let e1 = ProllyTree::build(&[]);
709 let e2 = ProllyTree::build(&[]);
710 assert!(e1.diff(&e2).is_empty());
711 }
712
713 #[test]
714 fn serialization_preserves_event_count() {
715 let events: Vec<Event> = (0..50)
716 .map(|i| make_event(&format!("item-{i}"), i, &format!("h{i}")))
717 .collect();
718 let tree = ProllyTree::build(&events);
719 let bytes = tree.to_bytes();
720 let restored = ProllyTree::from_bytes(&bytes).unwrap();
721 assert_eq!(restored.event_count, 50);
722 assert_eq!(restored.event_hashes().len(), 50);
723 }
724}