Skip to main content

canon_core/
diff.rs

1//! Cognitive diff representing changes between state roots
2//!
3//! Per CP-001 §2.7: A diff captures all changes between two state roots
4//! and is the fundamental unit of synchronization.
5
6use serde::{Deserialize, Serialize};
7use uuid::Uuid;
8
9use crate::hlc::Hlc;
10use crate::{Chunk, Document, Edge, EdgeKind, Embedding};
11
12/// Metadata about a cognitive diff
13#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
14pub struct DiffMetadata {
15    /// Hash of the previous state root
16    pub prev_root: [u8; 32],
17
18    /// Hash of the new state root after applying this diff
19    pub new_root: [u8; 32],
20
21    /// HLC timestamp when this diff was created
22    pub hlc: Hlc,
23
24    /// Device ID that produced this diff
25    pub device_id: Uuid,
26
27    /// Sequence number for ordering
28    pub seq: u64,
29}
30
31/// A cognitive diff containing all changes between two state roots.
32///
33/// Per CP-001 §2.7: devices exchange diffs rather than full state
34/// to minimize bandwidth and computation.
35///
36/// Note: there is no `updated_docs` — updates are modeled as
37/// remove + add (per spec pattern).
38#[derive(Debug, Clone, Serialize, Deserialize)]
39pub struct CognitiveDiff {
40    /// Documents added in this diff
41    pub added_docs: Vec<Document>,
42
43    /// IDs of documents removed
44    pub removed_doc_ids: Vec<Uuid>,
45
46    /// Chunks added
47    pub added_chunks: Vec<Chunk>,
48
49    /// IDs of chunks removed
50    pub removed_chunk_ids: Vec<Uuid>,
51
52    /// Embeddings added
53    pub added_embeddings: Vec<Embedding>,
54
55    /// IDs of embeddings removed
56    pub removed_embedding_ids: Vec<Uuid>,
57
58    /// Edges added
59    pub added_edges: Vec<Edge>,
60
61    /// Edges removed — identified by (source, target, kind) triple
62    pub removed_edges: Vec<(Uuid, Uuid, EdgeKind)>,
63
64    /// Diff metadata
65    pub metadata: DiffMetadata,
66}
67
68impl CognitiveDiff {
69    /// Create an empty diff (no changes)
70    pub fn empty(prev_root: [u8; 32], device_id: Uuid, seq: u64, hlc: Hlc) -> Self {
71        Self {
72            added_docs: Vec::new(),
73            removed_doc_ids: Vec::new(),
74            added_chunks: Vec::new(),
75            removed_chunk_ids: Vec::new(),
76            added_embeddings: Vec::new(),
77            removed_embedding_ids: Vec::new(),
78            added_edges: Vec::new(),
79            removed_edges: Vec::new(),
80            metadata: DiffMetadata {
81                prev_root,
82                new_root: [0u8; 32], // Computed after application
83                hlc,
84                device_id,
85                seq,
86            },
87        }
88    }
89
90    /// Check if the diff is empty (no changes)
91    pub fn is_empty(&self) -> bool {
92        self.added_docs.is_empty()
93            && self.removed_doc_ids.is_empty()
94            && self.added_chunks.is_empty()
95            && self.removed_chunk_ids.is_empty()
96            && self.added_embeddings.is_empty()
97            && self.removed_embedding_ids.is_empty()
98            && self.added_edges.is_empty()
99            && self.removed_edges.is_empty()
100    }
101
102    /// Count total number of changes in this diff
103    pub fn change_count(&self) -> usize {
104        self.added_docs.len()
105            + self.removed_doc_ids.len()
106            + self.added_chunks.len()
107            + self.removed_chunk_ids.len()
108            + self.added_embeddings.len()
109            + self.removed_embedding_ids.len()
110            + self.added_edges.len()
111            + self.removed_edges.len()
112    }
113
114    /// Estimate serialized size in bytes
115    pub fn estimated_size(&self) -> usize {
116        const DOC_SIZE: usize = 200;
117        const CHUNK_SIZE: usize = 1000;
118        const EMBEDDING_SIZE: usize = 3072; // 1536 dims * 2 bytes
119        const EDGE_SIZE: usize = 50;
120        const ID_SIZE: usize = 16;
121
122        self.added_docs.len() * DOC_SIZE
123            + self.removed_doc_ids.len() * ID_SIZE
124            + self.added_chunks.len() * CHUNK_SIZE
125            + self.removed_chunk_ids.len() * ID_SIZE
126            + self.added_embeddings.len() * EMBEDDING_SIZE
127            + self.removed_embedding_ids.len() * ID_SIZE
128            + self.added_edges.len() * EDGE_SIZE
129            + self.removed_edges.len() * (ID_SIZE * 2 + 1)
130            + 200 // metadata overhead
131    }
132}
133
134impl PartialEq for CognitiveDiff {
135    fn eq(&self, other: &Self) -> bool {
136        self.metadata == other.metadata
137    }
138}
139
140impl Eq for CognitiveDiff {}
141
142/// Input for embedding computation (for trace verification)
143#[derive(Debug, Clone, Serialize, Deserialize)]
144pub struct EmbeddingInput {
145    /// Hash of the text that was embedded
146    pub text_hash: [u8; 32],
147    /// Hash of the model manifest used
148    pub model_hash: [u8; 32],
149}
150
151/// Execution trace for verified execution per CP-003 §10.
152///
153/// This structure enables devices to verify each other's operations
154/// by providing a complete record of the state transition.
155#[derive(Debug, Clone, Serialize, Deserialize)]
156pub struct ExecutionTrace {
157    /// The operation that was executed
158    pub operation: Operation,
159    /// Hash of the state before this operation
160    pub prev_state_root: [u8; 32],
161    /// Hash of the state after this operation
162    pub new_state_root: [u8; 32],
163
164    /// Subtree root for documents
165    pub document_subtree_root: [u8; 32],
166    /// Subtree root for chunks
167    pub chunk_subtree_root: [u8; 32],
168    /// Subtree root for embeddings
169    pub embedding_subtree_root: [u8; 32],
170    /// Subtree root for edges
171    pub edge_subtree_root: [u8; 32],
172
173    /// Embedding computation inputs (for verification)
174    pub embedding_inputs: Vec<EmbeddingInput>,
175    /// Embedding computation outputs (hashes of embeddings)
176    pub embedding_outputs: Vec<[u8; 32]>,
177
178    /// Timestamp when operation was executed
179    pub timestamp: Hlc,
180    /// Device that executed the operation
181    pub device_id: Uuid,
182    /// Signature of this trace (for non-repudiation)
183    pub signature: Vec<u8>,
184}
185
186impl ExecutionTrace {
187    /// Get signature as a fixed array (panics if not 64 bytes)
188    #[allow(dead_code)]
189    pub fn signature_array(&self) -> [u8; 64] {
190        let mut arr = [0u8; 64];
191        arr.copy_from_slice(&self.signature);
192        arr
193    }
194
195    /// Create a trace from a pre-computed signature and full state transition context.
196    #[allow(dead_code)]
197    pub fn with_signature(
198        operation: Operation,
199        prev_state_root: [u8; 32],
200        new_state_root: [u8; 32],
201        timestamp: Hlc,
202        device_id: Uuid,
203        signature: [u8; 64],
204    ) -> Self {
205        Self {
206            operation,
207            prev_state_root,
208            new_state_root,
209            document_subtree_root: [0u8; 32],
210            chunk_subtree_root: [0u8; 32],
211            embedding_subtree_root: [0u8; 32],
212            edge_subtree_root: [0u8; 32],
213            embedding_inputs: Vec::new(),
214            embedding_outputs: Vec::new(),
215            timestamp,
216            device_id,
217            signature: signature.to_vec(),
218        }
219    }
220}
221
222/// Operation types for deterministic state transitions
223/// Per CP-003 §9.1
224#[derive(Debug, Clone, Serialize, Deserialize)]
225pub enum Operation {
226    /// Add a document with its chunks and embeddings
227    AddDocument {
228        /// The document to add
229        document: Document,
230        /// Associated chunks
231        chunks: Vec<Chunk>,
232        /// Associated embeddings
233        embeddings: Vec<Embedding>,
234        /// Timestamp for ordering
235        timestamp: Hlc,
236    },
237    /// Remove a document (cascades to chunks/embeddings)
238    RemoveDocument {
239        /// ID of document to remove
240        document_id: Uuid,
241        /// Timestamp for ordering
242        timestamp: Hlc,
243    },
244    /// Update a document (remove then add atomically)
245    UpdateDocument {
246        /// ID of document to update
247        document_id: Uuid,
248        /// New chunks
249        new_chunks: Vec<Chunk>,
250        /// New embeddings
251        new_embeddings: Vec<Embedding>,
252        /// Timestamp for ordering
253        timestamp: Hlc,
254    },
255}
256
257#[cfg(test)]
258mod tests {
259    use super::*;
260
261    fn test_hlc() -> Hlc {
262        Hlc::new(1000, [1u8; 16])
263    }
264
265    #[test]
266    fn test_empty_diff() {
267        let diff = CognitiveDiff::empty([0u8; 32], Uuid::from_bytes([1u8; 16]), 0, test_hlc());
268        assert!(diff.is_empty());
269        assert_eq!(diff.change_count(), 0);
270    }
271
272    #[test]
273    fn test_diff_with_changes() {
274        let mut diff = CognitiveDiff::empty(
275            [0u8; 32],
276            Uuid::from_bytes([1u8; 16]),
277            0,
278            test_hlc(),
279        );
280
281        diff.added_docs.push(Document::new(
282            std::path::PathBuf::from("test.md"),
283            b"content",
284            0,
285        ));
286
287        assert!(!diff.is_empty());
288        assert_eq!(diff.change_count(), 1);
289    }
290
291    #[test]
292    fn test_diff_has_hlc() {
293        let hlc = Hlc::new(12345, [7u8; 16]);
294        let diff = CognitiveDiff::empty(
295            [0u8; 32],
296            Uuid::from_bytes([1u8; 16]),
297            0,
298            hlc.clone(),
299        );
300        assert_eq!(diff.metadata.hlc, hlc);
301    }
302
303    #[test]
304    fn test_diff_removed_edges_has_kind() {
305        let mut diff = CognitiveDiff::empty(
306            [0u8; 32],
307            Uuid::from_bytes([1u8; 16]),
308            0,
309            test_hlc(),
310        );
311
312        diff.removed_edges.push((
313            Uuid::from_bytes([2u8; 16]),
314            Uuid::from_bytes([3u8; 16]),
315            EdgeKind::DocToChunk,
316        ));
317
318        assert_eq!(diff.change_count(), 1);
319        assert_eq!(diff.removed_edges[0].2, EdgeKind::DocToChunk);
320    }
321}