Skip to main content

converge_core/
integrity.rs

1// Copyright 2024-2026 Reflective Labs
2// SPDX-License-Identifier: MIT
3
4//! Integrity primitives for Converge.
5//!
6//! This module provides cryptographic and causal ordering primitives that can
7//! be used alongside the core Context and Fact types to add integrity features:
8//!
9//! - **Lamport Clock**: Provides causal ordering of events without wall clocks
10//! - **Content Hash**: SHA-256 hash of content for tamper detection
11//! - **Merkle Root**: Cryptographic commitment to a set of facts for audit verification
12//!
13//! # Example
14//!
15//! ```
16//! use converge_core::integrity::{LamportClock, ContentHash, MerkleRoot};
17//!
18//! // Lamport clock for causal ordering
19//! let mut clock = LamportClock::new();
20//! let t1 = clock.tick(); // Event 1
21//! let t2 = clock.tick(); // Event 2 (causally after Event 1)
22//!
23//! // Content hash for integrity
24//! let hash = ContentHash::compute("important data");
25//! assert!(hash.verify("important data"));
26//!
27//! // Merkle root for audit trail
28//! let hashes = vec![
29//!     ContentHash::compute("fact 1"),
30//!     ContentHash::compute("fact 2"),
31//! ];
32//! let root = MerkleRoot::compute(&hashes);
33//! ```
34
35use serde::{Deserialize, Serialize};
36
37use crate::context::{ContextKey, ContextState, Fact};
38
39// ============================================================================
40// Lamport Clock
41// ============================================================================
42
43/// A Lamport logical clock for causal ordering of events.
44///
45/// Lamport clocks provide a partial ordering of events in a distributed system
46/// without requiring synchronized wall clocks. The key property is:
47/// if event A happened-before event B, then `clock(A) < clock(B)`.
48///
49/// # Example
50///
51/// ```
52/// use converge_core::integrity::LamportClock;
53///
54/// let mut clock = LamportClock::new();
55/// assert_eq!(clock.time(), 0);
56///
57/// clock.tick();
58/// assert_eq!(clock.time(), 1);
59///
60/// // When receiving a message with clock=5, update to max+1
61/// clock.update(5);
62/// assert_eq!(clock.time(), 6);
63/// ```
64#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default, Serialize, Deserialize)]
65pub struct LamportClock {
66    time: u64,
67}
68
69impl LamportClock {
70    /// Creates a new Lamport clock initialized to 0.
71    #[must_use]
72    pub const fn new() -> Self {
73        Self { time: 0 }
74    }
75
76    /// Returns the current logical time.
77    #[must_use]
78    pub const fn time(&self) -> u64 {
79        self.time
80    }
81
82    /// Increments the clock and returns the new time.
83    /// Called before any local event (e.g., creating a fact).
84    pub fn tick(&mut self) -> u64 {
85        self.time += 1;
86        self.time
87    }
88
89    /// Updates the clock based on a received timestamp.
90    /// Sets clock to `max(local, received) + 1`.
91    /// Called when receiving facts from another source.
92    pub fn update(&mut self, received: u64) -> u64 {
93        self.time = self.time.max(received) + 1;
94        self.time
95    }
96}
97
98// ============================================================================
99// Content Hash
100// ============================================================================
101
102/// Error type for ContentHash operations.
103#[derive(Debug, Clone, PartialEq, Eq)]
104pub enum ContentHashError {
105    /// Invalid hexadecimal character.
106    InvalidHex,
107    /// Invalid string length.
108    InvalidLength,
109}
110
111impl std::fmt::Display for ContentHashError {
112    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
113        match self {
114            Self::InvalidHex => write!(f, "invalid hexadecimal character"),
115            Self::InvalidLength => write!(f, "invalid string length (expected 64 hex chars)"),
116        }
117    }
118}
119
120impl std::error::Error for ContentHashError {}
121
122/// A SHA-256 hash of content for integrity verification.
123///
124/// Content hashes enable:
125/// - Tamper detection: any change to content changes the hash
126/// - Efficient comparison: compare 32 bytes instead of full content
127/// - Merkle tree construction: hashes combine into a tree
128#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
129pub struct ContentHash(pub [u8; 32]);
130
131impl ContentHash {
132    /// Computes a SHA-256 hash of the given content.
133    #[must_use]
134    pub fn compute(content: &str) -> Self {
135        use sha2::{Digest, Sha256};
136        let mut hasher = Sha256::new();
137        hasher.update(content.as_bytes());
138        Self(hasher.finalize().into())
139    }
140
141    /// Computes the hash of a Fact (combines key, id, and content).
142    #[must_use]
143    pub fn compute_fact(fact: &Fact) -> Self {
144        let combined = format!("{:?}|{}|{}", fact.key(), fact.id, fact.content);
145        Self::compute(&combined)
146    }
147
148    /// Combines two hashes (for Merkle tree internal nodes).
149    #[must_use]
150    pub fn combine(left: &Self, right: &Self) -> Self {
151        use sha2::{Digest, Sha256};
152        let mut hasher = Sha256::new();
153        hasher.update(left.0);
154        hasher.update(right.0);
155        Self(hasher.finalize().into())
156    }
157
158    /// Verifies that this hash matches the given content.
159    #[must_use]
160    pub fn verify(&self, content: &str) -> bool {
161        *self == Self::compute(content)
162    }
163
164    /// Returns the hash as a hex string.
165    #[must_use]
166    pub fn to_hex(&self) -> String {
167        self.0.iter().map(|b| format!("{b:02x}")).collect()
168    }
169
170    /// Creates a `ContentHash` from a hex string.
171    ///
172    /// # Errors
173    ///
174    /// Returns an error if the string is not valid hex or wrong length.
175    pub fn from_hex(s: &str) -> Result<Self, ContentHashError> {
176        if s.len() != 64 {
177            return Err(ContentHashError::InvalidLength);
178        }
179        let mut result = [0u8; 32];
180        for (i, chunk) in s.as_bytes().chunks(2).enumerate() {
181            let high = Self::hex_char_to_nibble(chunk[0])?;
182            let low = Self::hex_char_to_nibble(chunk[1])?;
183            result[i] = (high << 4) | low;
184        }
185        Ok(Self(result))
186    }
187
188    fn hex_char_to_nibble(c: u8) -> Result<u8, ContentHashError> {
189        match c {
190            b'0'..=b'9' => Ok(c - b'0'),
191            b'a'..=b'f' => Ok(c - b'a' + 10),
192            b'A'..=b'F' => Ok(c - b'A' + 10),
193            _ => Err(ContentHashError::InvalidHex),
194        }
195    }
196}
197
198impl std::fmt::Display for ContentHash {
199    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
200        write!(f, "{}", &self.to_hex()[..16]) // Short form for display
201    }
202}
203
204// ============================================================================
205// Merkle Tree
206// ============================================================================
207
208/// A Merkle tree root hash representing the integrity of a set of facts.
209///
210/// The Merkle root changes if any fact is added, modified, or reordered.
211/// This enables:
212/// - Tamper detection: compare roots to verify integrity
213/// - Efficient sync: different roots mean different states
214/// - Audit proofs: prove a fact exists without revealing all facts
215#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
216pub struct MerkleRoot(pub ContentHash);
217
218impl MerkleRoot {
219    /// Computes the Merkle root from a list of content hashes.
220    ///
221    /// Uses a standard binary Merkle tree construction.
222    /// Empty list returns a hash of empty string.
223    /// Single element is combined with itself (Bitcoin-style).
224    #[must_use]
225    pub fn compute(hashes: &[ContentHash]) -> Self {
226        if hashes.is_empty() {
227            return Self(ContentHash::compute(""));
228        }
229
230        let mut current_level: Vec<ContentHash> = hashes.to_vec();
231
232        // Keep combining until we reach a single root
233        // For a single element, we still iterate once to combine it with itself
234        loop {
235            if current_level.len() == 1 {
236                // If we started with 1 element, combine with itself
237                // If we reduced to 1 element through combinations, we're done
238                if hashes.len() == 1 {
239                    return Self(ContentHash::combine(&current_level[0], &current_level[0]));
240                }
241                return Self(current_level.into_iter().next().unwrap());
242            }
243
244            let mut next_level = Vec::new();
245
246            for chunk in current_level.chunks(2) {
247                let combined = if chunk.len() == 2 {
248                    ContentHash::combine(&chunk[0], &chunk[1])
249                } else {
250                    // Odd number: duplicate the last hash
251                    ContentHash::combine(&chunk[0], &chunk[0])
252                };
253                next_level.push(combined);
254            }
255
256            current_level = next_level;
257        }
258    }
259
260    /// Computes the Merkle root from a Context's facts.
261    ///
262    /// Facts are hashed in deterministic order (by key, then by position).
263    #[must_use]
264    pub fn from_context(ctx: &ContextState) -> Self {
265        let mut all_hashes: Vec<ContentHash> = Vec::new();
266        let mut keys: Vec<_> = ctx.all_keys();
267        keys.sort();
268
269        for key in keys {
270            for fact in ctx.get(key) {
271                all_hashes.push(ContentHash::compute_fact(fact));
272            }
273        }
274
275        Self::compute(&all_hashes)
276    }
277
278    /// Returns the root hash as a hex string.
279    #[must_use]
280    pub fn to_hex(&self) -> String {
281        self.0.to_hex()
282    }
283}
284
285// ============================================================================
286// Tracked Context
287// ============================================================================
288
289/// A wrapper around Context that tracks integrity metadata.
290///
291/// This provides optional integrity tracking without modifying the core types.
292#[derive(Debug, Clone, Serialize)]
293pub struct TrackedContext {
294    /// The underlying context.
295    pub context: ContextState,
296    /// Lamport clock for causal ordering.
297    pub clock: LamportClock,
298    /// Cached Merkle root (invalidated on changes).
299    merkle_root: Option<MerkleRoot>,
300    /// Hash of each fact (by key and index).
301    fact_hashes: Vec<(ContextKey, String, ContentHash)>,
302}
303
304impl TrackedContext {
305    /// Creates a new tracked context wrapping an existing context.
306    #[must_use]
307    pub fn new(context: ContextState) -> Self {
308        let mut tracked = Self {
309            context,
310            clock: LamportClock::new(),
311            merkle_root: None,
312            fact_hashes: Vec::new(),
313        };
314        tracked.recompute_hashes();
315        tracked
316    }
317
318    /// Creates an empty tracked context.
319    #[must_use]
320    pub fn empty() -> Self {
321        Self::new(ContextState::new())
322    }
323
324    /// Returns the current Lamport clock time.
325    #[must_use]
326    pub fn clock_time(&self) -> u64 {
327        self.clock.time()
328    }
329
330    /// Ticks the clock and returns the new time.
331    pub fn tick(&mut self) -> u64 {
332        self.clock.tick()
333    }
334
335    /// Computes and returns the Merkle root.
336    #[must_use]
337    pub fn merkle_root(&mut self) -> &MerkleRoot {
338        if self.merkle_root.is_none() {
339            self.merkle_root = Some(MerkleRoot::from_context(&self.context));
340        }
341        self.merkle_root.as_ref().unwrap()
342    }
343
344    /// Verifies the integrity of all facts.
345    ///
346    /// Returns `true` if all recorded hashes match current fact content.
347    #[must_use]
348    pub fn verify_integrity(&self) -> bool {
349        for (key, id, expected_hash) in &self.fact_hashes {
350            if let Some(fact) = self.context.get(*key).iter().find(|f| &f.id == id) {
351                if ContentHash::compute_fact(fact) != *expected_hash {
352                    return false;
353                }
354            } else {
355                return false; // Fact missing
356            }
357        }
358        true
359    }
360
361    /// Recomputes all fact hashes.
362    fn recompute_hashes(&mut self) {
363        self.fact_hashes.clear();
364        for key in self.context.all_keys() {
365            for fact in self.context.get(key) {
366                let hash = ContentHash::compute_fact(fact);
367                self.fact_hashes.push((key, fact.id.to_string(), hash));
368            }
369        }
370        self.merkle_root = None; // Invalidate cached root
371    }
372
373    /// Adds a fact to the underlying context and updates tracking.
374    ///
375    /// # Errors
376    ///
377    /// Returns an error if the fact conflicts with an existing fact.
378    pub fn add_fact(&mut self, fact: Fact) -> Result<bool, crate::error::ConvergeError> {
379        let key = fact.key();
380        let id = fact.id.to_string();
381        let hash = ContentHash::compute_fact(&fact);
382
383        let changed = self.context.add_fact(fact)?;
384
385        if changed {
386            self.fact_hashes.push((key, id, hash));
387            self.merkle_root = None; // Invalidate cached root
388            self.clock.tick();
389        }
390
391        Ok(changed)
392    }
393
394    /// Extracts an integrity proof from the current state.
395    pub fn extract_proof(&mut self) -> IntegrityProof {
396        let merkle_root = self.merkle_root().clone();
397        IntegrityProof {
398            merkle_root,
399            clock_time: self.clock_time(),
400            fact_count: self.fact_hashes.len(),
401        }
402    }
403}
404
405impl Default for TrackedContext {
406    fn default() -> Self {
407        Self::empty()
408    }
409}
410
411// ============================================================================
412// Integrity Proof
413// ============================================================================
414
415/// Cryptographic integrity proof for a converged context.
416///
417/// Produced by the engine at the end of convergence. Provides:
418/// - Merkle root: tamper-evident commitment to all facts
419/// - Clock time: causal ordering count (number of fact-addition events)
420/// - Fact count: total facts in the context
421#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
422pub struct IntegrityProof {
423    /// Merkle root over all facts in deterministic order.
424    pub merkle_root: MerkleRoot,
425    /// Final Lamport clock time (number of tracked events).
426    pub clock_time: u64,
427    /// Number of facts in the context.
428    pub fact_count: usize,
429}
430
431// ============================================================================
432// Tests
433// ============================================================================
434
435#[cfg(test)]
436mod tests {
437    use super::*;
438
439    // =========================================================================
440    // Lamport Clock Tests
441    // =========================================================================
442
443    #[test]
444    fn lamport_clock_starts_at_zero() {
445        let clock = LamportClock::new();
446        assert_eq!(clock.time(), 0);
447    }
448
449    #[test]
450    fn lamport_clock_tick_increments() {
451        let mut clock = LamportClock::new();
452        assert_eq!(clock.tick(), 1);
453        assert_eq!(clock.tick(), 2);
454        assert_eq!(clock.tick(), 3);
455    }
456
457    #[test]
458    fn lamport_clock_update_takes_max_plus_one() {
459        let mut clock = LamportClock::new();
460        clock.tick(); // 1
461        clock.tick(); // 2
462
463        // Received message with clock=5, should become 6
464        assert_eq!(clock.update(5), 6);
465
466        // Received message with clock=3, should become 7 (max(6,3)+1)
467        assert_eq!(clock.update(3), 7);
468    }
469
470    // =========================================================================
471    // Content Hash Tests
472    // =========================================================================
473
474    #[test]
475
476    fn content_hash_is_deterministic() {
477        let h1 = ContentHash::compute("hello world");
478        let h2 = ContentHash::compute("hello world");
479        assert_eq!(h1, h2);
480    }
481
482    #[test]
483
484    fn content_hash_changes_with_content() {
485        let h1 = ContentHash::compute("hello");
486        let h2 = ContentHash::compute("world");
487        assert_ne!(h1, h2);
488    }
489
490    #[test]
491
492    fn content_hash_verify_works() {
493        let hash = ContentHash::compute("test");
494        assert!(hash.verify("test"));
495        assert!(!hash.verify("modified"));
496    }
497
498    #[test]
499
500    fn content_hash_hex_roundtrip() {
501        let original = ContentHash::compute("test content");
502        let hex = original.to_hex();
503        let restored = ContentHash::from_hex(&hex).unwrap();
504        assert_eq!(original, restored);
505    }
506
507    // =========================================================================
508    // Merkle Tree Tests
509    // =========================================================================
510
511    #[test]
512
513    fn merkle_root_empty_list() {
514        let root = MerkleRoot::compute(&[]);
515        let empty_hash = ContentHash::compute("");
516        assert_eq!(root.0, empty_hash);
517    }
518
519    #[test]
520
521    fn merkle_root_single_element() {
522        let h = ContentHash::compute("only element");
523        let root = MerkleRoot::compute(std::slice::from_ref(&h));
524        let expected = ContentHash::combine(&h, &h);
525        assert_eq!(root.0, expected);
526    }
527
528    #[test]
529
530    fn merkle_root_two_elements() {
531        let h1 = ContentHash::compute("first");
532        let h2 = ContentHash::compute("second");
533        let root = MerkleRoot::compute(&[h1.clone(), h2.clone()]);
534        let expected = ContentHash::combine(&h1, &h2);
535        assert_eq!(root.0, expected);
536    }
537
538    #[test]
539
540    fn merkle_root_is_deterministic() {
541        let hashes = vec![
542            ContentHash::compute("a"),
543            ContentHash::compute("b"),
544            ContentHash::compute("c"),
545        ];
546        let root1 = MerkleRoot::compute(&hashes);
547        let root2 = MerkleRoot::compute(&hashes);
548        assert_eq!(root1, root2);
549    }
550
551    #[test]
552
553    fn merkle_root_changes_with_different_content() {
554        let hashes1 = vec![ContentHash::compute("a"), ContentHash::compute("b")];
555        let hashes2 = vec![ContentHash::compute("a"), ContentHash::compute("c")];
556        let root1 = MerkleRoot::compute(&hashes1);
557        let root2 = MerkleRoot::compute(&hashes2);
558        assert_ne!(root1, root2);
559    }
560
561    // =========================================================================
562    // Tracked Context Tests
563    // =========================================================================
564
565    #[test]
566    fn tracked_context_starts_empty() {
567        let tracked = TrackedContext::empty();
568        assert_eq!(tracked.clock_time(), 0);
569        assert!(!tracked.context.has(ContextKey::Seeds));
570    }
571
572    #[test]
573    fn tracked_context_ticks_on_add() {
574        let mut tracked = TrackedContext::empty();
575        assert_eq!(tracked.clock_time(), 0);
576
577        tracked
578            .add_fact(crate::context::new_fact(ContextKey::Seeds, "s1", "seed"))
579            .unwrap();
580        assert_eq!(tracked.clock_time(), 1);
581
582        tracked
583            .add_fact(crate::context::new_fact(ContextKey::Seeds, "s2", "seed2"))
584            .unwrap();
585        assert_eq!(tracked.clock_time(), 2);
586    }
587
588    #[test]
589    fn tracked_context_computes_merkle_root() {
590        let mut tracked = TrackedContext::empty();
591        tracked
592            .add_fact(crate::context::new_fact(ContextKey::Seeds, "s1", "first"))
593            .unwrap();
594        tracked
595            .add_fact(crate::context::new_fact(ContextKey::Seeds, "s2", "second"))
596            .unwrap();
597
598        let root1 = tracked.merkle_root().clone();
599
600        // Adding another fact changes the root
601        tracked
602            .add_fact(crate::context::new_fact(ContextKey::Seeds, "s3", "third"))
603            .unwrap();
604        let root2 = tracked.merkle_root().clone();
605
606        assert_ne!(root1, root2);
607    }
608
609    #[test]
610    fn tracked_context_verifies_integrity() {
611        let mut tracked = TrackedContext::empty();
612        tracked
613            .add_fact(crate::context::new_fact(ContextKey::Seeds, "s1", "test"))
614            .unwrap();
615
616        assert!(tracked.verify_integrity());
617    }
618
619    #[test]
620    fn integrity_proof_serializes() {
621        let mut tracked = TrackedContext::empty();
622        tracked
623            .add_fact(crate::context::new_fact(ContextKey::Seeds, "s1", "test"))
624            .unwrap();
625        let proof = tracked.extract_proof();
626        assert_eq!(proof.clock_time, 1);
627        assert_eq!(proof.fact_count, 1);
628
629        let json = serde_json::to_string(&proof).unwrap();
630        let deser: IntegrityProof = serde_json::from_str(&json).unwrap();
631        assert_eq!(proof, deser);
632    }
633
634    #[test]
635    fn integrity_proof_changes_with_different_facts() {
636        let mut t1 = TrackedContext::empty();
637        t1.add_fact(crate::context::new_fact(ContextKey::Seeds, "s1", "alpha"))
638            .unwrap();
639        let proof1 = t1.extract_proof();
640
641        let mut t2 = TrackedContext::empty();
642        t2.add_fact(crate::context::new_fact(ContextKey::Seeds, "s1", "beta"))
643            .unwrap();
644        let proof2 = t2.extract_proof();
645
646        assert_ne!(proof1.merkle_root, proof2.merkle_root);
647        assert_eq!(proof1.clock_time, proof2.clock_time);
648        assert_eq!(proof1.fact_count, proof2.fact_count);
649    }
650}