Skip to main content

canon_core/
diff.rs

1//! Cognitive diff representing changes between state roots
2//!
3//! Per CP-001 §2.7: A diff captures all changes between two state roots
4//! and is the fundamental unit of synchronization.
5
6use serde::{Deserialize, Serialize};
7use uuid::Uuid;
8
9use crate::hlc::Hlc;
10use crate::{Chunk, Document, Edge, EdgeKind, Embedding};
11
12/// Metadata about a cognitive diff
13#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
14pub struct DiffMetadata {
15    /// Hash of the previous state root
16    pub prev_root: [u8; 32],
17
18    /// Hash of the new state root after applying this diff
19    pub new_root: [u8; 32],
20
21    /// HLC timestamp when this diff was created
22    pub hlc: Hlc,
23
24    /// Device ID that produced this diff
25    pub device_id: Uuid,
26
27    /// Sequence number for ordering
28    pub seq: u64,
29}
30
31/// A cognitive diff containing all changes between two state roots.
32///
33/// Per CP-001 §2.7: devices exchange diffs rather than full state
34/// to minimize bandwidth and computation.
35///
36/// Note: there is no `updated_docs` — updates are modeled as
37/// remove + add (per spec pattern).
38#[derive(Debug, Clone, Serialize, Deserialize)]
39pub struct CognitiveDiff {
40    /// Documents added in this diff
41    pub added_docs: Vec<Document>,
42
43    /// IDs of documents removed
44    pub removed_doc_ids: Vec<Uuid>,
45
46    /// Chunks added
47    pub added_chunks: Vec<Chunk>,
48
49    /// IDs of chunks removed
50    pub removed_chunk_ids: Vec<Uuid>,
51
52    /// Embeddings added
53    pub added_embeddings: Vec<Embedding>,
54
55    /// IDs of embeddings removed
56    pub removed_embedding_ids: Vec<Uuid>,
57
58    /// Edges added
59    pub added_edges: Vec<Edge>,
60
61    /// Edges removed — identified by (source, target, kind) triple
62    pub removed_edges: Vec<(Uuid, Uuid, EdgeKind)>,
63
64    /// Diff metadata
65    pub metadata: DiffMetadata,
66}
67
68impl CognitiveDiff {
69    /// Create an empty diff (no changes)
70    pub fn empty(prev_root: [u8; 32], device_id: Uuid, seq: u64, hlc: Hlc) -> Self {
71        Self {
72            added_docs: Vec::new(),
73            removed_doc_ids: Vec::new(),
74            added_chunks: Vec::new(),
75            removed_chunk_ids: Vec::new(),
76            added_embeddings: Vec::new(),
77            removed_embedding_ids: Vec::new(),
78            added_edges: Vec::new(),
79            removed_edges: Vec::new(),
80            metadata: DiffMetadata {
81                prev_root,
82                new_root: [0u8; 32], // Computed after application
83                hlc,
84                device_id,
85                seq,
86            },
87        }
88    }
89
90    /// Check if the diff is empty (no changes)
91    pub fn is_empty(&self) -> bool {
92        self.added_docs.is_empty()
93            && self.removed_doc_ids.is_empty()
94            && self.added_chunks.is_empty()
95            && self.removed_chunk_ids.is_empty()
96            && self.added_embeddings.is_empty()
97            && self.removed_embedding_ids.is_empty()
98            && self.added_edges.is_empty()
99            && self.removed_edges.is_empty()
100    }
101
102    /// Count total number of changes in this diff
103    pub fn change_count(&self) -> usize {
104        self.added_docs.len()
105            + self.removed_doc_ids.len()
106            + self.added_chunks.len()
107            + self.removed_chunk_ids.len()
108            + self.added_embeddings.len()
109            + self.removed_embedding_ids.len()
110            + self.added_edges.len()
111            + self.removed_edges.len()
112    }
113
114    /// Estimate serialized size in bytes
115    pub fn estimated_size(&self) -> usize {
116        const DOC_SIZE: usize = 200;
117        const CHUNK_SIZE: usize = 1000;
118        const EMBEDDING_SIZE: usize = 3072; // 1536 dims * 2 bytes
119        const EDGE_SIZE: usize = 50;
120        const ID_SIZE: usize = 16;
121
122        self.added_docs.len() * DOC_SIZE
123            + self.removed_doc_ids.len() * ID_SIZE
124            + self.added_chunks.len() * CHUNK_SIZE
125            + self.removed_chunk_ids.len() * ID_SIZE
126            + self.added_embeddings.len() * EMBEDDING_SIZE
127            + self.removed_embedding_ids.len() * ID_SIZE
128            + self.added_edges.len() * EDGE_SIZE
129            + self.removed_edges.len() * (ID_SIZE * 2 + 1)
130            + 200 // metadata overhead
131    }
132}
133
134impl PartialEq for CognitiveDiff {
135    fn eq(&self, other: &Self) -> bool {
136        self.metadata == other.metadata
137    }
138}
139
140impl Eq for CognitiveDiff {}
141
142/// Input for embedding computation (for trace verification)
143#[derive(Debug, Clone, Serialize, Deserialize)]
144pub struct EmbeddingInput {
145    /// Hash of the text that was embedded
146    pub text_hash: [u8; 32],
147    /// Hash of the model manifest used
148    pub model_hash: [u8; 32],
149}
150
151/// Execution trace for verified execution per CP-003 §10.
152///
153/// This structure enables devices to verify each other's operations
154/// by providing a complete record of the state transition.
155#[derive(Debug, Clone, Serialize, Deserialize)]
156pub struct ExecutionTrace {
157    /// The operation that was executed
158    pub operation: Operation,
159    /// Hash of the state before this operation
160    pub prev_state_root: [u8; 32],
161    /// Hash of the state after this operation
162    pub new_state_root: [u8; 32],
163
164    /// Subtree root for documents
165    pub document_subtree_root: [u8; 32],
166    /// Subtree root for chunks
167    pub chunk_subtree_root: [u8; 32],
168    /// Subtree root for embeddings
169    pub embedding_subtree_root: [u8; 32],
170    /// Subtree root for edges
171    pub edge_subtree_root: [u8; 32],
172
173    /// Embedding computation inputs (for verification)
174    pub embedding_inputs: Vec<EmbeddingInput>,
175    /// Embedding computation outputs (hashes of embeddings)
176    pub embedding_outputs: Vec<[u8; 32]>,
177
178    /// Timestamp when operation was executed
179    pub timestamp: Hlc,
180    /// Device that executed the operation
181    pub device_id: Uuid,
182    /// Signature of this trace (for non-repudiation)
183    pub signature: Vec<u8>,
184}
185
186/// Operation types for deterministic state transitions
187/// Per CP-003 §9.1
188#[derive(Debug, Clone, Serialize, Deserialize)]
189pub enum Operation {
190    /// Add a document with its chunks and embeddings
191    AddDocument {
192        /// The document to add
193        document: Document,
194        /// Associated chunks
195        chunks: Vec<Chunk>,
196        /// Associated embeddings
197        embeddings: Vec<Embedding>,
198        /// Timestamp for ordering
199        timestamp: Hlc,
200    },
201    /// Remove a document (cascades to chunks/embeddings)
202    RemoveDocument {
203        /// ID of document to remove
204        document_id: Uuid,
205        /// Timestamp for ordering
206        timestamp: Hlc,
207    },
208    /// Update a document (remove then add atomically)
209    UpdateDocument {
210        /// ID of document to update
211        document_id: Uuid,
212        /// New chunks
213        new_chunks: Vec<Chunk>,
214        /// New embeddings
215        new_embeddings: Vec<Embedding>,
216        /// Timestamp for ordering
217        timestamp: Hlc,
218    },
219}
220
221#[cfg(test)]
222mod tests {
223    use super::*;
224
225    fn test_hlc() -> Hlc {
226        Hlc::new(1000, [1u8; 16])
227    }
228
229    #[test]
230    fn test_empty_diff() {
231        let diff = CognitiveDiff::empty([0u8; 32], Uuid::from_bytes([1u8; 16]), 0, test_hlc());
232        assert!(diff.is_empty());
233        assert_eq!(diff.change_count(), 0);
234    }
235
236    #[test]
237    fn test_diff_with_changes() {
238        let mut diff = CognitiveDiff::empty([0u8; 32], Uuid::from_bytes([1u8; 16]), 0, test_hlc());
239
240        diff.added_docs.push(Document::new(
241            std::path::PathBuf::from("test.md"),
242            b"content",
243            0,
244        ));
245
246        assert!(!diff.is_empty());
247        assert_eq!(diff.change_count(), 1);
248    }
249
250    #[test]
251    fn test_diff_has_hlc() {
252        let hlc = Hlc::new(12345, [7u8; 16]);
253        let diff = CognitiveDiff::empty([0u8; 32], Uuid::from_bytes([1u8; 16]), 0, hlc.clone());
254        assert_eq!(diff.metadata.hlc, hlc);
255    }
256
257    #[test]
258    fn test_diff_removed_edges_has_kind() {
259        let mut diff = CognitiveDiff::empty([0u8; 32], Uuid::from_bytes([1u8; 16]), 0, test_hlc());
260
261        diff.removed_edges.push((
262            Uuid::from_bytes([2u8; 16]),
263            Uuid::from_bytes([3u8; 16]),
264            EdgeKind::DocToChunk,
265        ));
266
267        assert_eq!(diff.change_count(), 1);
268        assert_eq!(diff.removed_edges[0].2, EdgeKind::DocToChunk);
269    }
270}