Skip to main content

converge_core/
integrity.rs

1// Copyright 2024-2026 Reflective Labs
2// SPDX-License-Identifier: MIT
3
4//! Integrity primitives for Converge.
5//!
6//! This module provides cryptographic and causal ordering primitives that can
7//! be used alongside the core Context and Fact types to add integrity features:
8//!
9//! - **Lamport Clock**: Provides causal ordering of events without wall clocks
10//! - **Content Hash**: SHA-256 hash of content for tamper detection
11//! - **Merkle Root**: Cryptographic commitment to a set of facts for audit verification
12//!
13//! # Example
14//!
15//! ```
16//! use converge_core::integrity::{LamportClock, ContentHash, MerkleRoot};
17//!
18//! // Lamport clock for causal ordering
19//! let mut clock = LamportClock::new();
20//! let t1 = clock.tick(); // Event 1
21//! let t2 = clock.tick(); // Event 2 (causally after Event 1)
22//!
23//! // Content hash for integrity
24//! let hash = ContentHash::compute("important data");
25//! assert!(hash.verify("important data"));
26//!
27//! // Merkle root for audit trail
28//! let hashes = vec![
29//!     ContentHash::compute("fact 1"),
30//!     ContentHash::compute("fact 2"),
31//! ];
32//! let root = MerkleRoot::compute(&hashes);
33//! ```
34
35use serde::{Deserialize, Serialize};
36
37use crate::context::{ContextFact, ContextKey, ContextState};
38
39// ============================================================================
40// Lamport Clock
41// ============================================================================
42
43/// A Lamport logical clock for causal ordering of events.
44///
45/// Lamport clocks provide a partial ordering of events in a distributed system
46/// without requiring synchronized wall clocks. The key property is:
47/// if event A happened-before event B, then `clock(A) < clock(B)`.
48///
49/// # Example
50///
51/// ```
52/// use converge_core::integrity::LamportClock;
53///
54/// let mut clock = LamportClock::new();
55/// assert_eq!(clock.time(), 0);
56///
57/// clock.tick();
58/// assert_eq!(clock.time(), 1);
59///
60/// // When receiving a message with clock=5, update to max+1
61/// clock.update(5);
62/// assert_eq!(clock.time(), 6);
63/// ```
64#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default, Serialize, Deserialize)]
65pub struct LamportClock {
66    time: u64,
67}
68
69impl LamportClock {
70    /// Creates a new Lamport clock initialized to 0.
71    #[must_use]
72    pub const fn new() -> Self {
73        Self { time: 0 }
74    }
75
76    /// Returns the current logical time.
77    #[must_use]
78    pub const fn time(&self) -> u64 {
79        self.time
80    }
81
82    /// Increments the clock and returns the new time.
83    /// Called before any local event (e.g., creating a fact).
84    pub fn tick(&mut self) -> u64 {
85        self.time += 1;
86        self.time
87    }
88
89    /// Updates the clock based on a received timestamp.
90    /// Sets clock to `max(local, received) + 1`.
91    /// Called when receiving facts from another source.
92    pub fn update(&mut self, received: u64) -> u64 {
93        self.time = self.time.max(received) + 1;
94        self.time
95    }
96}
97
98// ============================================================================
99// Content Hash
100// ============================================================================
101
102/// Error type for ContentHash operations.
103#[derive(Debug, Clone, PartialEq, Eq)]
104pub enum ContentHashError {
105    /// Invalid hexadecimal character.
106    InvalidHex,
107    /// Invalid string length.
108    InvalidLength,
109}
110
111impl std::fmt::Display for ContentHashError {
112    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
113        match self {
114            Self::InvalidHex => write!(f, "invalid hexadecimal character"),
115            Self::InvalidLength => write!(f, "invalid string length (expected 64 hex chars)"),
116        }
117    }
118}
119
120impl std::error::Error for ContentHashError {}
121
122/// A SHA-256 hash of content for integrity verification.
123///
124/// Content hashes enable:
125/// - Tamper detection: any change to content changes the hash
126/// - Efficient comparison: compare 32 bytes instead of full content
127/// - Merkle tree construction: hashes combine into a tree
128#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
129pub struct ContentHash(pub [u8; 32]);
130
131impl ContentHash {
132    /// Computes a SHA-256 hash of the given content.
133    #[must_use]
134    pub fn compute(content: &str) -> Self {
135        use sha2::{Digest, Sha256};
136        let mut hasher = Sha256::new();
137        hasher.update(content.as_bytes());
138        Self(hasher.finalize().into())
139    }
140
141    /// Computes the hash of a Fact (combines key, id, and content).
142    #[must_use]
143    pub fn compute_fact(fact: &ContextFact) -> Self {
144        let combined = format!("{:?}|{}|{}", fact.key(), fact.id(), fact.content());
145        Self::compute(&combined)
146    }
147
148    /// Combines two hashes (for Merkle tree internal nodes).
149    #[must_use]
150    pub fn combine(left: &Self, right: &Self) -> Self {
151        use sha2::{Digest, Sha256};
152        let mut hasher = Sha256::new();
153        hasher.update(left.0);
154        hasher.update(right.0);
155        Self(hasher.finalize().into())
156    }
157
158    /// Verifies that this hash matches the given content.
159    #[must_use]
160    pub fn verify(&self, content: &str) -> bool {
161        *self == Self::compute(content)
162    }
163
164    /// Returns the hash as a hex string.
165    #[must_use]
166    pub fn to_hex(&self) -> String {
167        self.0.iter().map(|b| format!("{b:02x}")).collect()
168    }
169
170    /// Creates a `ContentHash` from a hex string.
171    ///
172    /// # Errors
173    ///
174    /// Returns an error if the string is not valid hex or wrong length.
175    pub fn from_hex(s: &str) -> Result<Self, ContentHashError> {
176        if s.len() != 64 {
177            return Err(ContentHashError::InvalidLength);
178        }
179        let mut result = [0u8; 32];
180        for (i, chunk) in s.as_bytes().chunks(2).enumerate() {
181            let high = Self::hex_char_to_nibble(chunk[0])?;
182            let low = Self::hex_char_to_nibble(chunk[1])?;
183            result[i] = (high << 4) | low;
184        }
185        Ok(Self(result))
186    }
187
188    fn hex_char_to_nibble(c: u8) -> Result<u8, ContentHashError> {
189        match c {
190            b'0'..=b'9' => Ok(c - b'0'),
191            b'a'..=b'f' => Ok(c - b'a' + 10),
192            b'A'..=b'F' => Ok(c - b'A' + 10),
193            _ => Err(ContentHashError::InvalidHex),
194        }
195    }
196}
197
198impl std::fmt::Display for ContentHash {
199    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
200        write!(f, "{}", &self.to_hex()[..16]) // Short form for display
201    }
202}
203
204// ============================================================================
205// Merkle Tree
206// ============================================================================
207
208/// A Merkle tree root hash representing the integrity of a set of facts.
209///
210/// The Merkle root changes if any fact is added, modified, or reordered.
211/// This enables:
212/// - Tamper detection: compare roots to verify integrity
213/// - Efficient sync: different roots mean different states
214/// - Audit proofs: prove a fact exists without revealing all facts
215#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
216pub struct MerkleRoot(pub ContentHash);
217
218impl MerkleRoot {
219    /// Computes the Merkle root from a list of content hashes.
220    ///
221    /// Uses a standard binary Merkle tree construction.
222    /// Empty list returns a hash of empty string.
223    /// Single element is combined with itself (Bitcoin-style).
224    #[must_use]
225    pub fn compute(hashes: &[ContentHash]) -> Self {
226        if hashes.is_empty() {
227            return Self(ContentHash::compute(""));
228        }
229
230        let mut current_level: Vec<ContentHash> = hashes.to_vec();
231
232        // Keep combining until we reach a single root
233        // For a single element, we still iterate once to combine it with itself
234        loop {
235            if current_level.len() == 1 {
236                // If we started with 1 element, combine with itself
237                // If we reduced to 1 element through combinations, we're done
238                if hashes.len() == 1 {
239                    return Self(ContentHash::combine(&current_level[0], &current_level[0]));
240                }
241                return Self(current_level.into_iter().next().unwrap());
242            }
243
244            let mut next_level = Vec::new();
245
246            for chunk in current_level.chunks(2) {
247                let combined = if chunk.len() == 2 {
248                    ContentHash::combine(&chunk[0], &chunk[1])
249                } else {
250                    // Odd number: duplicate the last hash
251                    ContentHash::combine(&chunk[0], &chunk[0])
252                };
253                next_level.push(combined);
254            }
255
256            current_level = next_level;
257        }
258    }
259
260    /// Computes the Merkle root from a Context's facts.
261    ///
262    /// Facts are hashed in deterministic order (by key, then by position).
263    #[must_use]
264    pub fn from_context(ctx: &ContextState) -> Self {
265        let mut all_hashes: Vec<ContentHash> = Vec::new();
266        let mut keys: Vec<_> = ctx.all_keys();
267        keys.sort();
268
269        for key in keys {
270            for fact in ctx.get(key) {
271                all_hashes.push(ContentHash::compute_fact(fact));
272            }
273        }
274
275        Self::compute(&all_hashes)
276    }
277
278    /// Returns the root hash as a hex string.
279    #[must_use]
280    pub fn to_hex(&self) -> String {
281        self.0.to_hex()
282    }
283}
284
285// ============================================================================
286// Tracked Context
287// ============================================================================
288
289/// A wrapper around Context that tracks integrity metadata.
290///
291/// This provides optional integrity tracking without modifying the core types.
292#[derive(Debug, Clone, Serialize)]
293pub struct TrackedContext {
294    /// The underlying context.
295    pub context: ContextState,
296    /// Lamport clock for causal ordering.
297    pub clock: LamportClock,
298    /// Cached Merkle root (invalidated on changes).
299    merkle_root: Option<MerkleRoot>,
300    /// Hash of each fact (by key and index).
301    fact_hashes: Vec<(ContextKey, String, ContentHash)>,
302}
303
304impl TrackedContext {
305    /// Creates a new tracked context wrapping an existing context.
306    #[must_use]
307    pub fn new(context: ContextState) -> Self {
308        let mut tracked = Self {
309            context,
310            clock: LamportClock::new(),
311            merkle_root: None,
312            fact_hashes: Vec::new(),
313        };
314        tracked.recompute_hashes();
315        tracked
316    }
317
318    /// Creates an empty tracked context.
319    #[must_use]
320    pub fn empty() -> Self {
321        Self::new(ContextState::new())
322    }
323
324    /// Returns the current Lamport clock time.
325    #[must_use]
326    pub fn clock_time(&self) -> u64 {
327        self.clock.time()
328    }
329
330    /// Ticks the clock and returns the new time.
331    pub fn tick(&mut self) -> u64 {
332        self.clock.tick()
333    }
334
335    /// Computes and returns the Merkle root.
336    #[must_use]
337    pub fn merkle_root(&mut self) -> &MerkleRoot {
338        if self.merkle_root.is_none() {
339            self.merkle_root = Some(MerkleRoot::from_context(&self.context));
340        }
341        self.merkle_root.as_ref().unwrap()
342    }
343
344    /// Verifies the integrity of all facts.
345    ///
346    /// Returns `true` if all recorded hashes match current fact content.
347    #[must_use]
348    pub fn verify_integrity(&self) -> bool {
349        for (key, id, expected_hash) in &self.fact_hashes {
350            if let Some(fact) = self
351                .context
352                .get(*key)
353                .iter()
354                .find(|f| f.id().as_str() == id)
355            {
356                if ContentHash::compute_fact(fact) != *expected_hash {
357                    return false;
358                }
359            } else {
360                return false; // Fact missing
361            }
362        }
363        true
364    }
365
366    /// Recomputes all fact hashes.
367    fn recompute_hashes(&mut self) {
368        self.fact_hashes.clear();
369        for key in self.context.all_keys() {
370            for fact in self.context.get(key) {
371                let hash = ContentHash::compute_fact(fact);
372                self.fact_hashes.push((key, fact.id().to_string(), hash));
373            }
374        }
375        self.merkle_root = None; // Invalidate cached root
376    }
377
378    /// Adds a fact to the underlying context and updates tracking.
379    ///
380    /// # Errors
381    ///
382    /// Returns an error if the fact conflicts with an existing fact.
383    pub(crate) fn add_fact(
384        &mut self,
385        fact: ContextFact,
386    ) -> Result<bool, crate::error::ConvergeError> {
387        let key = fact.key();
388        let id = fact.id().to_string();
389        let hash = ContentHash::compute_fact(&fact);
390
391        let changed = self.context.add_fact(fact)?;
392
393        if changed {
394            self.fact_hashes.push((key, id, hash));
395            self.merkle_root = None; // Invalidate cached root
396            self.clock.tick();
397        }
398
399        Ok(changed)
400    }
401
402    /// Extracts an integrity proof from the current state.
403    pub fn extract_proof(&mut self) -> IntegrityProof {
404        let merkle_root = self.merkle_root().clone();
405        IntegrityProof {
406            merkle_root,
407            clock_time: self.clock_time(),
408            fact_count: self.fact_hashes.len(),
409        }
410    }
411}
412
413impl Default for TrackedContext {
414    fn default() -> Self {
415        Self::empty()
416    }
417}
418
419// ============================================================================
420// Integrity Proof
421// ============================================================================
422
423/// Cryptographic integrity proof for a converged context.
424///
425/// Produced by the engine at the end of convergence. Provides:
426/// - Merkle root: tamper-evident commitment to all facts
427/// - Clock time: causal ordering count (number of fact-addition events)
428/// - Fact count: total facts in the context
429#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
430pub struct IntegrityProof {
431    /// Merkle root over all facts in deterministic order.
432    pub merkle_root: MerkleRoot,
433    /// Final Lamport clock time (number of tracked events).
434    pub clock_time: u64,
435    /// Number of facts in the context.
436    pub fact_count: usize,
437}
438
439// ============================================================================
440// Tests
441// ============================================================================
442
443#[cfg(test)]
444mod tests {
445    use super::*;
446
447    // =========================================================================
448    // Lamport Clock Tests
449    // =========================================================================
450
451    #[test]
452    fn lamport_clock_starts_at_zero() {
453        let clock = LamportClock::new();
454        assert_eq!(clock.time(), 0);
455    }
456
457    #[test]
458    fn lamport_clock_tick_increments() {
459        let mut clock = LamportClock::new();
460        assert_eq!(clock.tick(), 1);
461        assert_eq!(clock.tick(), 2);
462        assert_eq!(clock.tick(), 3);
463    }
464
465    #[test]
466    fn lamport_clock_update_takes_max_plus_one() {
467        let mut clock = LamportClock::new();
468        clock.tick(); // 1
469        clock.tick(); // 2
470
471        // Received message with clock=5, should become 6
472        assert_eq!(clock.update(5), 6);
473
474        // Received message with clock=3, should become 7 (max(6,3)+1)
475        assert_eq!(clock.update(3), 7);
476    }
477
478    // =========================================================================
479    // Content Hash Tests
480    // =========================================================================
481
482    #[test]
483
484    fn content_hash_is_deterministic() {
485        let h1 = ContentHash::compute("hello world");
486        let h2 = ContentHash::compute("hello world");
487        assert_eq!(h1, h2);
488    }
489
490    #[test]
491
492    fn content_hash_changes_with_content() {
493        let h1 = ContentHash::compute("hello");
494        let h2 = ContentHash::compute("world");
495        assert_ne!(h1, h2);
496    }
497
498    #[test]
499
500    fn content_hash_verify_works() {
501        let hash = ContentHash::compute("test");
502        assert!(hash.verify("test"));
503        assert!(!hash.verify("modified"));
504    }
505
506    #[test]
507
508    fn content_hash_hex_roundtrip() {
509        let original = ContentHash::compute("test content");
510        let hex = original.to_hex();
511        let restored = ContentHash::from_hex(&hex).unwrap();
512        assert_eq!(original, restored);
513    }
514
515    // =========================================================================
516    // Merkle Tree Tests
517    // =========================================================================
518
519    #[test]
520
521    fn merkle_root_empty_list() {
522        let root = MerkleRoot::compute(&[]);
523        let empty_hash = ContentHash::compute("");
524        assert_eq!(root.0, empty_hash);
525    }
526
527    #[test]
528
529    fn merkle_root_single_element() {
530        let h = ContentHash::compute("only element");
531        let root = MerkleRoot::compute(std::slice::from_ref(&h));
532        let expected = ContentHash::combine(&h, &h);
533        assert_eq!(root.0, expected);
534    }
535
536    #[test]
537
538    fn merkle_root_two_elements() {
539        let h1 = ContentHash::compute("first");
540        let h2 = ContentHash::compute("second");
541        let root = MerkleRoot::compute(&[h1.clone(), h2.clone()]);
542        let expected = ContentHash::combine(&h1, &h2);
543        assert_eq!(root.0, expected);
544    }
545
546    #[test]
547
548    fn merkle_root_is_deterministic() {
549        let hashes = vec![
550            ContentHash::compute("a"),
551            ContentHash::compute("b"),
552            ContentHash::compute("c"),
553        ];
554        let root1 = MerkleRoot::compute(&hashes);
555        let root2 = MerkleRoot::compute(&hashes);
556        assert_eq!(root1, root2);
557    }
558
559    #[test]
560
561    fn merkle_root_changes_with_different_content() {
562        let hashes1 = vec![ContentHash::compute("a"), ContentHash::compute("b")];
563        let hashes2 = vec![ContentHash::compute("a"), ContentHash::compute("c")];
564        let root1 = MerkleRoot::compute(&hashes1);
565        let root2 = MerkleRoot::compute(&hashes2);
566        assert_ne!(root1, root2);
567    }
568
569    // =========================================================================
570    // Tracked Context Tests
571    // =========================================================================
572
573    #[test]
574    fn tracked_context_starts_empty() {
575        let tracked = TrackedContext::empty();
576        assert_eq!(tracked.clock_time(), 0);
577        assert!(!tracked.context.has(ContextKey::Seeds));
578    }
579
580    #[test]
581    fn tracked_context_ticks_on_add() {
582        let mut tracked = TrackedContext::empty();
583        assert_eq!(tracked.clock_time(), 0);
584
585        tracked
586            .add_fact(crate::context::new_fact(ContextKey::Seeds, "s1", "seed"))
587            .unwrap();
588        assert_eq!(tracked.clock_time(), 1);
589
590        tracked
591            .add_fact(crate::context::new_fact(ContextKey::Seeds, "s2", "seed2"))
592            .unwrap();
593        assert_eq!(tracked.clock_time(), 2);
594    }
595
596    #[test]
597    fn tracked_context_computes_merkle_root() {
598        let mut tracked = TrackedContext::empty();
599        tracked
600            .add_fact(crate::context::new_fact(ContextKey::Seeds, "s1", "first"))
601            .unwrap();
602        tracked
603            .add_fact(crate::context::new_fact(ContextKey::Seeds, "s2", "second"))
604            .unwrap();
605
606        let root1 = tracked.merkle_root().clone();
607
608        // Adding another fact changes the root
609        tracked
610            .add_fact(crate::context::new_fact(ContextKey::Seeds, "s3", "third"))
611            .unwrap();
612        let root2 = tracked.merkle_root().clone();
613
614        assert_ne!(root1, root2);
615    }
616
617    #[test]
618    fn tracked_context_verifies_integrity() {
619        let mut tracked = TrackedContext::empty();
620        tracked
621            .add_fact(crate::context::new_fact(ContextKey::Seeds, "s1", "test"))
622            .unwrap();
623
624        assert!(tracked.verify_integrity());
625    }
626
627    #[test]
628    fn integrity_proof_serializes() {
629        let mut tracked = TrackedContext::empty();
630        tracked
631            .add_fact(crate::context::new_fact(ContextKey::Seeds, "s1", "test"))
632            .unwrap();
633        let proof = tracked.extract_proof();
634        assert_eq!(proof.clock_time, 1);
635        assert_eq!(proof.fact_count, 1);
636
637        let json = serde_json::to_string(&proof).unwrap();
638        let deser: IntegrityProof = serde_json::from_str(&json).unwrap();
639        assert_eq!(proof, deser);
640    }
641
642    #[test]
643    fn integrity_proof_changes_with_different_facts() {
644        let mut t1 = TrackedContext::empty();
645        t1.add_fact(crate::context::new_fact(ContextKey::Seeds, "s1", "alpha"))
646            .unwrap();
647        let proof1 = t1.extract_proof();
648
649        let mut t2 = TrackedContext::empty();
650        t2.add_fact(crate::context::new_fact(ContextKey::Seeds, "s1", "beta"))
651            .unwrap();
652        let proof2 = t2.extract_proof();
653
654        assert_ne!(proof1.merkle_root, proof2.merkle_root);
655        assert_eq!(proof1.clock_time, proof2.clock_time);
656        assert_eq!(proof1.fact_count, proof2.fact_count);
657    }
658}