Skip to main content

converge_core/
integrity.rs

1// Copyright 2024-2026 Reflective Labs
2// SPDX-License-Identifier: MIT
3
4//! Integrity primitives for Converge.
5//!
6//! This module provides cryptographic and causal ordering primitives that can
7//! be used alongside the core Context and Fact types to add integrity features:
8//!
9//! - **Lamport Clock**: Provides causal ordering of events without wall clocks
10//! - **Content Hash**: SHA-256 hash of content for tamper detection
11//! - **Merkle Root**: Cryptographic commitment to a set of facts for audit verification
12//!
13//! # Example
14//!
15//! ```
16//! use converge_core::integrity::{LamportClock, ContentHash, MerkleRoot};
17//!
18//! // Lamport clock for causal ordering
19//! let mut clock = LamportClock::new();
20//! let t1 = clock.tick(); // Event 1
21//! let t2 = clock.tick(); // Event 2 (causally after Event 1)
22//!
23//! // Content hash for integrity
24//! let hash = ContentHash::compute("important data");
25//! assert!(hash.verify("important data"));
26//!
27//! // Merkle root for audit trail
28//! let hashes = vec![
29//!     ContentHash::compute("fact 1"),
30//!     ContentHash::compute("fact 2"),
31//! ];
32//! let root = MerkleRoot::compute(&hashes);
33//! ```
34
35use serde::{Deserialize, Serialize};
36
37use crate::context::{ContextFact, ContextKey, ContextState};
38
39// ============================================================================
40// Lamport Clock
41// ============================================================================
42
43/// A Lamport logical clock for causal ordering of events.
44///
45/// Lamport clocks provide a partial ordering of events in a distributed system
46/// without requiring synchronized wall clocks. The key property is:
47/// if event A happened-before event B, then `clock(A) < clock(B)`.
48///
49/// # Example
50///
51/// ```
52/// use converge_core::integrity::LamportClock;
53///
54/// let mut clock = LamportClock::new();
55/// assert_eq!(clock.time(), 0);
56///
57/// clock.tick();
58/// assert_eq!(clock.time(), 1);
59///
60/// // When receiving a message with clock=5, update to max+1
61/// clock.update(5);
62/// assert_eq!(clock.time(), 6);
63/// ```
64#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default, Serialize, Deserialize)]
65pub struct LamportClock {
66    time: u64,
67}
68
69impl LamportClock {
70    /// Creates a new Lamport clock initialized to 0.
71    #[must_use]
72    pub const fn new() -> Self {
73        Self { time: 0 }
74    }
75
76    /// Creates a Lamport clock at an already observed logical time.
77    #[must_use]
78    pub const fn at(time: u64) -> Self {
79        Self { time }
80    }
81
82    /// Returns the current logical time.
83    #[must_use]
84    pub const fn time(&self) -> u64 {
85        self.time
86    }
87
88    /// Increments the clock and returns the new time.
89    /// Called before any local event (e.g., creating a fact).
90    pub fn tick(&mut self) -> u64 {
91        self.time += 1;
92        self.time
93    }
94
95    /// Updates the clock based on a received timestamp.
96    /// Sets clock to `max(local, received) + 1`.
97    /// Called when receiving facts from another source.
98    pub fn update(&mut self, received: u64) -> u64 {
99        self.time = self.time.max(received) + 1;
100        self.time
101    }
102}
103
104// ============================================================================
105// Content Hash
106// ============================================================================
107
108/// Error type for ContentHash operations.
109#[derive(Debug, Clone, PartialEq, Eq)]
110pub enum ContentHashError {
111    /// Invalid hexadecimal character.
112    InvalidHex,
113    /// Invalid string length.
114    InvalidLength,
115}
116
117impl std::fmt::Display for ContentHashError {
118    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
119        match self {
120            Self::InvalidHex => write!(f, "invalid hexadecimal character"),
121            Self::InvalidLength => write!(f, "invalid string length (expected 64 hex chars)"),
122        }
123    }
124}
125
126impl std::error::Error for ContentHashError {}
127
128/// A SHA-256 hash of content for integrity verification.
129///
130/// Content hashes enable:
131/// - Tamper detection: any change to content changes the hash
132/// - Efficient comparison: compare 32 bytes instead of full content
133/// - Merkle tree construction: hashes combine into a tree
134#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
135pub struct ContentHash(pub [u8; 32]);
136
137impl ContentHash {
138    /// Computes a SHA-256 hash of the given content.
139    #[must_use]
140    pub fn compute(content: &str) -> Self {
141        use sha2::{Digest, Sha256};
142        let mut hasher = Sha256::new();
143        hasher.update(content.as_bytes());
144        Self(hasher.finalize().into())
145    }
146
147    /// Computes the hash of a Fact (combines key, id, and content).
148    #[must_use]
149    pub fn compute_fact(fact: &ContextFact) -> Self {
150        let payload = fact
151            .to_wire()
152            .map(|wire| wire.payload.payload.to_string())
153            .unwrap_or_else(|error| error.to_string());
154        let combined = format!("{:?}|{}|{}", fact.key(), fact.id(), payload);
155        Self::compute(&combined)
156    }
157
158    /// Combines two hashes (for Merkle tree internal nodes).
159    #[must_use]
160    pub fn combine(left: &Self, right: &Self) -> Self {
161        use sha2::{Digest, Sha256};
162        let mut hasher = Sha256::new();
163        hasher.update(left.0);
164        hasher.update(right.0);
165        Self(hasher.finalize().into())
166    }
167
168    /// Verifies that this hash matches the given content.
169    #[must_use]
170    pub fn verify(&self, content: &str) -> bool {
171        *self == Self::compute(content)
172    }
173
174    /// Returns the hash as a hex string.
175    #[must_use]
176    pub fn to_hex(&self) -> String {
177        self.0.iter().map(|b| format!("{b:02x}")).collect()
178    }
179
180    /// Creates a `ContentHash` from a hex string.
181    ///
182    /// # Errors
183    ///
184    /// Returns an error if the string is not valid hex or wrong length.
185    pub fn from_hex(s: &str) -> Result<Self, ContentHashError> {
186        if s.len() != 64 {
187            return Err(ContentHashError::InvalidLength);
188        }
189        let mut result = [0u8; 32];
190        for (i, chunk) in s.as_bytes().chunks(2).enumerate() {
191            let high = Self::hex_char_to_nibble(chunk[0])?;
192            let low = Self::hex_char_to_nibble(chunk[1])?;
193            result[i] = (high << 4) | low;
194        }
195        Ok(Self(result))
196    }
197
198    fn hex_char_to_nibble(c: u8) -> Result<u8, ContentHashError> {
199        match c {
200            b'0'..=b'9' => Ok(c - b'0'),
201            b'a'..=b'f' => Ok(c - b'a' + 10),
202            b'A'..=b'F' => Ok(c - b'A' + 10),
203            _ => Err(ContentHashError::InvalidHex),
204        }
205    }
206}
207
208impl std::fmt::Display for ContentHash {
209    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
210        write!(f, "{}", &self.to_hex()[..16]) // Short form for display
211    }
212}
213
214// ============================================================================
215// Merkle Tree
216// ============================================================================
217
218/// A Merkle tree root hash representing the integrity of a set of facts.
219///
220/// The Merkle root changes if any fact is added, modified, or reordered.
221/// This enables:
222/// - Tamper detection: compare roots to verify integrity
223/// - Efficient sync: different roots mean different states
224/// - Audit proofs: prove a fact exists without revealing all facts
225#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
226pub struct MerkleRoot(pub ContentHash);
227
228impl MerkleRoot {
229    /// Computes the Merkle root from a list of content hashes.
230    ///
231    /// Uses a standard binary Merkle tree construction.
232    /// Empty list returns a hash of empty string.
233    /// Single element is combined with itself (Bitcoin-style).
234    #[must_use]
235    pub fn compute(hashes: &[ContentHash]) -> Self {
236        if hashes.is_empty() {
237            return Self(ContentHash::compute(""));
238        }
239
240        let mut current_level: Vec<ContentHash> = hashes.to_vec();
241
242        // Keep combining until we reach a single root
243        // For a single element, we still iterate once to combine it with itself
244        loop {
245            if current_level.len() == 1 {
246                // If we started with 1 element, combine with itself
247                // If we reduced to 1 element through combinations, we're done
248                if hashes.len() == 1 {
249                    return Self(ContentHash::combine(&current_level[0], &current_level[0]));
250                }
251                return Self(current_level.into_iter().next().unwrap());
252            }
253
254            let mut next_level = Vec::new();
255
256            for chunk in current_level.chunks(2) {
257                let combined = if chunk.len() == 2 {
258                    ContentHash::combine(&chunk[0], &chunk[1])
259                } else {
260                    // Odd number: duplicate the last hash
261                    ContentHash::combine(&chunk[0], &chunk[0])
262                };
263                next_level.push(combined);
264            }
265
266            current_level = next_level;
267        }
268    }
269
270    /// Computes the Merkle root from a Context's facts.
271    ///
272    /// Facts are hashed in deterministic order (by key, then by position).
273    #[must_use]
274    pub fn from_context(ctx: &ContextState) -> Self {
275        let mut all_hashes: Vec<ContentHash> = Vec::new();
276        let mut keys: Vec<_> = ctx.all_keys();
277        keys.sort();
278
279        for key in keys {
280            for fact in ctx.get(key) {
281                all_hashes.push(ContentHash::compute_fact(fact));
282            }
283        }
284
285        Self::compute(&all_hashes)
286    }
287
288    /// Returns the root hash as a hex string.
289    #[must_use]
290    pub fn to_hex(&self) -> String {
291        self.0.to_hex()
292    }
293}
294
295// ============================================================================
296// Tracked Context
297// ============================================================================
298
299/// A wrapper around Context that tracks integrity metadata.
300///
301/// This provides optional integrity tracking without modifying the core types.
302#[derive(Debug, Clone, Serialize)]
303pub struct TrackedContext {
304    /// The underlying context.
305    pub context: ContextState,
306    /// Lamport clock for causal ordering.
307    pub clock: LamportClock,
308    /// Cached Merkle root (invalidated on changes).
309    merkle_root: Option<MerkleRoot>,
310    /// Hash of each fact (by key and index).
311    fact_hashes: Vec<(ContextKey, String, ContentHash)>,
312}
313
314impl TrackedContext {
315    /// Creates a new tracked context wrapping an existing context.
316    #[must_use]
317    pub fn new(context: ContextState) -> Self {
318        let mut tracked = Self {
319            context,
320            clock: LamportClock::new(),
321            merkle_root: None,
322            fact_hashes: Vec::new(),
323        };
324        tracked.recompute_hashes();
325        tracked
326    }
327
328    /// Creates an empty tracked context.
329    #[must_use]
330    pub fn empty() -> Self {
331        Self::new(ContextState::new())
332    }
333
334    /// Returns the current Lamport clock time.
335    #[must_use]
336    pub fn clock_time(&self) -> u64 {
337        self.clock.time()
338    }
339
340    /// Ticks the clock and returns the new time.
341    pub fn tick(&mut self) -> u64 {
342        self.clock.tick()
343    }
344
345    /// Returns the Lamport time assigned to the next local context event.
346    #[must_use]
347    pub fn next_logical_time(&self) -> u64 {
348        self.clock_time() + 1
349    }
350
351    /// Restores the tracked logical clock to an already observed time.
352    pub(crate) fn set_clock_time(&mut self, time: u64) {
353        self.clock = LamportClock::at(time);
354    }
355
356    /// Computes and returns the Merkle root.
357    #[must_use]
358    pub fn merkle_root(&mut self) -> &MerkleRoot {
359        if self.merkle_root.is_none() {
360            self.merkle_root = Some(MerkleRoot::from_context(&self.context));
361        }
362        self.merkle_root.as_ref().unwrap()
363    }
364
365    /// Verifies the integrity of all facts.
366    ///
367    /// Returns `true` if all recorded hashes match current fact content.
368    #[must_use]
369    pub fn verify_integrity(&self) -> bool {
370        for (key, id, expected_hash) in &self.fact_hashes {
371            if let Some(fact) = self
372                .context
373                .get(*key)
374                .iter()
375                .find(|f| f.id().as_str() == id)
376            {
377                if ContentHash::compute_fact(fact) != *expected_hash {
378                    return false;
379                }
380            } else {
381                return false; // Fact missing
382            }
383        }
384        true
385    }
386
387    /// Recomputes all fact hashes.
388    fn recompute_hashes(&mut self) {
389        self.fact_hashes.clear();
390        for key in self.context.all_keys() {
391            for fact in self.context.get(key) {
392                let hash = ContentHash::compute_fact(fact);
393                self.fact_hashes.push((key, fact.id().to_string(), hash));
394            }
395        }
396        self.merkle_root = None; // Invalidate cached root
397    }
398
399    /// Adds a fact to the underlying context and updates tracking.
400    ///
401    /// # Errors
402    ///
403    /// Returns an error if the fact conflicts with an existing fact.
404    pub(crate) fn add_fact(
405        &mut self,
406        fact: ContextFact,
407    ) -> Result<bool, crate::error::ConvergeError> {
408        let key = fact.key();
409        let id = fact.id().to_string();
410        let hash = ContentHash::compute_fact(&fact);
411
412        let changed = self.context.add_fact(fact)?;
413
414        if changed {
415            self.fact_hashes.push((key, id, hash));
416            self.merkle_root = None; // Invalidate cached root
417            self.clock.tick();
418        }
419
420        Ok(changed)
421    }
422
423    /// Extracts an integrity proof from the current state.
424    pub fn extract_proof(&mut self) -> IntegrityProof {
425        let merkle_root = self.merkle_root().clone();
426        IntegrityProof {
427            merkle_root,
428            clock_time: self.clock_time(),
429            fact_count: self.fact_hashes.len(),
430        }
431    }
432}
433
434impl Default for TrackedContext {
435    fn default() -> Self {
436        Self::empty()
437    }
438}
439
440// ============================================================================
441// Integrity Proof
442// ============================================================================
443
444/// Cryptographic integrity proof for a converged context.
445///
446/// Produced by the engine at the end of convergence. Provides:
447/// - Merkle root: tamper-evident commitment to all facts
448/// - Clock time: causal ordering count (number of fact-addition events)
449/// - Fact count: total facts in the context
450#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
451pub struct IntegrityProof {
452    /// Merkle root over all facts in deterministic order.
453    pub merkle_root: MerkleRoot,
454    /// Final Lamport clock time (number of tracked events).
455    pub clock_time: u64,
456    /// Number of facts in the context.
457    pub fact_count: usize,
458}
459
460// ============================================================================
461// Tests
462// ============================================================================
463
464#[cfg(test)]
465mod tests {
466    use super::*;
467
468    // =========================================================================
469    // Lamport Clock Tests
470    // =========================================================================
471
472    #[test]
473    fn lamport_clock_starts_at_zero() {
474        let clock = LamportClock::new();
475        assert_eq!(clock.time(), 0);
476    }
477
478    #[test]
479    fn lamport_clock_tick_increments() {
480        let mut clock = LamportClock::new();
481        assert_eq!(clock.tick(), 1);
482        assert_eq!(clock.tick(), 2);
483        assert_eq!(clock.tick(), 3);
484    }
485
486    #[test]
487    fn lamport_clock_update_takes_max_plus_one() {
488        let mut clock = LamportClock::new();
489        clock.tick(); // 1
490        clock.tick(); // 2
491
492        // Received message with clock=5, should become 6
493        assert_eq!(clock.update(5), 6);
494
495        // Received message with clock=3, should become 7 (max(6,3)+1)
496        assert_eq!(clock.update(3), 7);
497    }
498
499    // =========================================================================
500    // Content Hash Tests
501    // =========================================================================
502
503    #[test]
504
505    fn content_hash_is_deterministic() {
506        let h1 = ContentHash::compute("hello world");
507        let h2 = ContentHash::compute("hello world");
508        assert_eq!(h1, h2);
509    }
510
511    #[test]
512
513    fn content_hash_changes_with_content() {
514        let h1 = ContentHash::compute("hello");
515        let h2 = ContentHash::compute("world");
516        assert_ne!(h1, h2);
517    }
518
519    #[test]
520
521    fn content_hash_verify_works() {
522        let hash = ContentHash::compute("test");
523        assert!(hash.verify("test"));
524        assert!(!hash.verify("modified"));
525    }
526
527    #[test]
528
529    fn content_hash_hex_roundtrip() {
530        let original = ContentHash::compute("test content");
531        let hex = original.to_hex();
532        let restored = ContentHash::from_hex(&hex).unwrap();
533        assert_eq!(original, restored);
534    }
535
536    // =========================================================================
537    // Merkle Tree Tests
538    // =========================================================================
539
540    #[test]
541
542    fn merkle_root_empty_list() {
543        let root = MerkleRoot::compute(&[]);
544        let empty_hash = ContentHash::compute("");
545        assert_eq!(root.0, empty_hash);
546    }
547
548    #[test]
549
550    fn merkle_root_single_element() {
551        let h = ContentHash::compute("only element");
552        let root = MerkleRoot::compute(std::slice::from_ref(&h));
553        let expected = ContentHash::combine(&h, &h);
554        assert_eq!(root.0, expected);
555    }
556
557    #[test]
558
559    fn merkle_root_two_elements() {
560        let h1 = ContentHash::compute("first");
561        let h2 = ContentHash::compute("second");
562        let root = MerkleRoot::compute(&[h1.clone(), h2.clone()]);
563        let expected = ContentHash::combine(&h1, &h2);
564        assert_eq!(root.0, expected);
565    }
566
567    #[test]
568
569    fn merkle_root_is_deterministic() {
570        let hashes = vec![
571            ContentHash::compute("a"),
572            ContentHash::compute("b"),
573            ContentHash::compute("c"),
574        ];
575        let root1 = MerkleRoot::compute(&hashes);
576        let root2 = MerkleRoot::compute(&hashes);
577        assert_eq!(root1, root2);
578    }
579
580    #[test]
581
582    fn merkle_root_changes_with_different_content() {
583        let hashes1 = vec![ContentHash::compute("a"), ContentHash::compute("b")];
584        let hashes2 = vec![ContentHash::compute("a"), ContentHash::compute("c")];
585        let root1 = MerkleRoot::compute(&hashes1);
586        let root2 = MerkleRoot::compute(&hashes2);
587        assert_ne!(root1, root2);
588    }
589
590    // =========================================================================
591    // Tracked Context Tests
592    // =========================================================================
593
594    #[test]
595    fn tracked_context_starts_empty() {
596        let tracked = TrackedContext::empty();
597        assert_eq!(tracked.clock_time(), 0);
598        assert!(!tracked.context.has(ContextKey::Seeds));
599    }
600
601    #[test]
602    fn tracked_context_ticks_on_add() {
603        let mut tracked = TrackedContext::empty();
604        assert_eq!(tracked.clock_time(), 0);
605
606        tracked
607            .add_fact(crate::context::new_fact(ContextKey::Seeds, "s1", "seed"))
608            .unwrap();
609        assert_eq!(tracked.clock_time(), 1);
610
611        tracked
612            .add_fact(crate::context::new_fact(ContextKey::Seeds, "s2", "seed2"))
613            .unwrap();
614        assert_eq!(tracked.clock_time(), 2);
615    }
616
617    #[test]
618    fn tracked_context_computes_merkle_root() {
619        let mut tracked = TrackedContext::empty();
620        tracked
621            .add_fact(crate::context::new_fact(ContextKey::Seeds, "s1", "first"))
622            .unwrap();
623        tracked
624            .add_fact(crate::context::new_fact(ContextKey::Seeds, "s2", "second"))
625            .unwrap();
626
627        let root1 = tracked.merkle_root().clone();
628
629        // Adding another fact changes the root
630        tracked
631            .add_fact(crate::context::new_fact(ContextKey::Seeds, "s3", "third"))
632            .unwrap();
633        let root2 = tracked.merkle_root().clone();
634
635        assert_ne!(root1, root2);
636    }
637
638    #[test]
639    fn tracked_context_verifies_integrity() {
640        let mut tracked = TrackedContext::empty();
641        tracked
642            .add_fact(crate::context::new_fact(ContextKey::Seeds, "s1", "test"))
643            .unwrap();
644
645        assert!(tracked.verify_integrity());
646    }
647
648    #[test]
649    fn integrity_proof_serializes() {
650        let mut tracked = TrackedContext::empty();
651        tracked
652            .add_fact(crate::context::new_fact(ContextKey::Seeds, "s1", "test"))
653            .unwrap();
654        let proof = tracked.extract_proof();
655        assert_eq!(proof.clock_time, 1);
656        assert_eq!(proof.fact_count, 1);
657
658        let json = serde_json::to_string(&proof).unwrap();
659        let deser: IntegrityProof = serde_json::from_str(&json).unwrap();
660        assert_eq!(proof, deser);
661    }
662
663    #[test]
664    fn integrity_proof_changes_with_different_facts() {
665        let mut t1 = TrackedContext::empty();
666        t1.add_fact(crate::context::new_fact(ContextKey::Seeds, "s1", "alpha"))
667            .unwrap();
668        let proof1 = t1.extract_proof();
669
670        let mut t2 = TrackedContext::empty();
671        t2.add_fact(crate::context::new_fact(ContextKey::Seeds, "s1", "beta"))
672            .unwrap();
673        let proof2 = t2.extract_proof();
674
675        assert_ne!(proof1.merkle_root, proof2.merkle_root);
676        assert_eq!(proof1.clock_time, proof2.clock_time);
677        assert_eq!(proof1.fact_count, proof2.fact_count);
678    }
679}