Skip to main content

canon_core/
proof.rs

1//! Proof receipts for Canon Protocol
2//!
3//! Generates and verifies cryptographic proof receipts that demonstrate
4//! exactly what context an AI had access to at a given point in time.
5
6use ed25519_dalek::{Signature, Verifier, VerifyingKey};
7use serde::{Deserialize, Serialize};
8
9use crate::state::{compute_merkle_root, generate_merkle_proof, verify_merkle_proof};
10
11// ============================================================================
12// Hex serde helpers for byte arrays
13// ============================================================================
14
15mod hex_bytes {
16    use serde::{Deserialize, Deserializer, Serializer};
17
18    pub mod b16 {
19        use super::{Deserialize, Deserializer, Serializer};
20        pub fn serialize<S: Serializer>(bytes: &[u8; 16], s: S) -> Result<S::Ok, S::Error> {
21            use std::fmt::Write;
22            let mut hex = String::with_capacity(32);
23            for b in bytes {
24                let _ = write!(hex, "{b:02x}");
25            }
26            s.serialize_str(&hex)
27        }
28        pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result<[u8; 16], D::Error> {
29            let s = String::deserialize(d)?;
30            let bytes = (0..s.len())
31                .step_by(2)
32                .map(|i| u8::from_str_radix(&s[i..i + 2], 16).map_err(serde::de::Error::custom))
33                .collect::<Result<Vec<u8>, _>>()?;
34            bytes
35                .try_into()
36                .map_err(|_| serde::de::Error::custom("expected 16 bytes"))
37        }
38    }
39
40    pub mod b32 {
41        use super::{Deserialize, Deserializer, Serializer};
42        pub fn serialize<S: Serializer>(bytes: &[u8; 32], s: S) -> Result<S::Ok, S::Error> {
43            use std::fmt::Write;
44            let mut hex = String::with_capacity(64);
45            for b in bytes {
46                let _ = write!(hex, "{b:02x}");
47            }
48            s.serialize_str(&hex)
49        }
50        pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result<[u8; 32], D::Error> {
51            let s = String::deserialize(d)?;
52            let bytes = (0..s.len())
53                .step_by(2)
54                .map(|i| u8::from_str_radix(&s[i..i + 2], 16).map_err(serde::de::Error::custom))
55                .collect::<Result<Vec<u8>, _>>()?;
56            bytes
57                .try_into()
58                .map_err(|_| serde::de::Error::custom("expected 32 bytes"))
59        }
60    }
61
62    pub mod b64 {
63        use super::{Deserialize, Deserializer, Serializer};
64        pub fn serialize<S: Serializer>(bytes: &[u8; 64], s: S) -> Result<S::Ok, S::Error> {
65            use std::fmt::Write;
66            let mut hex = String::with_capacity(128);
67            for b in bytes {
68                let _ = write!(hex, "{b:02x}");
69            }
70            s.serialize_str(&hex)
71        }
72        pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result<[u8; 64], D::Error> {
73            let s = String::deserialize(d)?;
74            let bytes = (0..s.len())
75                .step_by(2)
76                .map(|i| u8::from_str_radix(&s[i..i + 2], 16).map_err(serde::de::Error::custom))
77                .collect::<Result<Vec<u8>, _>>()?;
78            bytes
79                .try_into()
80                .map_err(|_| serde::de::Error::custom("expected 64 bytes"))
81        }
82    }
83}
84
85fn hex_encode(bytes: &[u8]) -> String {
86    use std::fmt::Write;
87    let mut s = String::with_capacity(bytes.len() * 2);
88    for b in bytes {
89        let _ = write!(s, "{b:02x}");
90    }
91    s
92}
93
94// ============================================================================
95// Types
96// ============================================================================
97
98/// A single step in a Merkle proof path.
99#[derive(Debug, Clone, Serialize, Deserialize)]
100pub struct MerkleSibling {
101    #[serde(with = "hex_bytes::b32")]
102    pub hash: [u8; 32],
103    pub is_left: bool,
104}
105
106/// Merkle inclusion proof for a single chunk.
107#[derive(Debug, Clone, Serialize, Deserialize)]
108pub struct ChunkProof {
109    #[serde(with = "hex_bytes::b16")]
110    pub chunk_id: [u8; 16],
111    #[serde(with = "hex_bytes::b32")]
112    pub chunk_text_hash: [u8; 32],
113    pub index: usize,
114    pub siblings: Vec<MerkleSibling>,
115}
116
117/// Human-readable source information for a proof receipt.
118#[derive(Debug, Clone, Serialize, Deserialize)]
119pub struct SourceRef {
120    pub document_path: String,
121    #[serde(with = "hex_bytes::b16")]
122    pub chunk_id: [u8; 16],
123    pub chunk_text: String,
124    pub chunk_sequence: u32,
125    pub relevance_score: f32,
126}
127
128/// A complete cryptographic proof receipt.
129///
130/// Contains everything needed to independently verify what context
131/// an AI had access to when it made a decision or generated code.
132#[derive(Debug, Clone, Serialize, Deserialize)]
133pub struct ProofReceipt {
134    /// Schema version (starts at 1)
135    pub version: u32,
136    /// The original query text
137    pub query: String,
138    /// BLAKE3 hash of the query
139    #[serde(with = "hex_bytes::b32")]
140    pub query_hash: [u8; 32],
141    /// ISO 8601 timestamp
142    pub timestamp: String,
143    /// BLAKE3 hash of the assembled context string
144    #[serde(with = "hex_bytes::b32")]
145    pub context_hash: [u8; 32],
146    /// Merkle root of the entire graph state
147    #[serde(with = "hex_bytes::b32")]
148    pub state_root: [u8; 32],
149    /// Merkle root of all chunk text hashes (binary tree)
150    #[serde(with = "hex_bytes::b32")]
151    pub chunk_tree_root: [u8; 32],
152    /// Per-chunk Merkle inclusion proofs
153    pub chunk_proofs: Vec<ChunkProof>,
154    /// Human-readable source references
155    pub sources: Vec<SourceRef>,
156    /// Ed25519 signature over `signing_bytes()`
157    #[serde(with = "hex_bytes::b64")]
158    pub signature: [u8; 64],
159    /// Signer's Ed25519 public key
160    #[serde(with = "hex_bytes::b32")]
161    pub signer_public_key: [u8; 32],
162    /// Signer's device ID
163    #[serde(with = "hex_bytes::b16")]
164    pub device_id: [u8; 16],
165}
166
167impl ProofReceipt {
168    /// Compute deterministic bytes for signing.
169    ///
170    /// Layout: version (4) || `query_hash` (32) || `context_hash` (32) || `state_root` (32) || `chunk_tree_root` (32)
171    /// Total: 132 bytes
172    pub fn signing_bytes(&self) -> Vec<u8> {
173        let mut bytes = Vec::with_capacity(132);
174        bytes.extend_from_slice(&self.version.to_le_bytes());
175        bytes.extend_from_slice(&self.query_hash);
176        bytes.extend_from_slice(&self.context_hash);
177        bytes.extend_from_slice(&self.state_root);
178        bytes.extend_from_slice(&self.chunk_tree_root);
179        bytes
180    }
181
182    /// Verify the Ed25519 signature over `signing_bytes()`.
183    pub fn verify_signature(&self) -> crate::Result<()> {
184        let verifying_key = VerifyingKey::from_bytes(&self.signer_public_key)
185            .map_err(|e| crate::CPError::Verification(format!("Invalid public key: {e}")))?;
186        let signature = Signature::from_bytes(&self.signature);
187        verifying_key
188            .verify(&self.signing_bytes(), &signature)
189            .map_err(|e| {
190                crate::CPError::Verification(format!("Signature verification failed: {e}"))
191            })
192    }
193
194    /// Verify all chunk Merkle proofs against `chunk_tree_root`.
195    pub fn verify_chunk_proofs(&self) -> crate::Result<()> {
196        for proof in &self.chunk_proofs {
197            let siblings: Vec<([u8; 32], bool)> =
198                proof.siblings.iter().map(|s| (s.hash, s.is_left)).collect();
199            if !verify_merkle_proof(
200                &proof.chunk_text_hash,
201                proof.index,
202                &siblings,
203                &self.chunk_tree_root,
204            ) {
205                return Err(crate::CPError::Verification(format!(
206                    "Chunk proof failed for chunk {}",
207                    hex_encode(&proof.chunk_id)
208                )));
209            }
210        }
211        Ok(())
212    }
213
214    /// Verify that `context_hash` matches the BLAKE3 hash of the given context string.
215    pub fn verify_context_hash(&self, context: &str) -> bool {
216        let computed = *blake3::hash(context.as_bytes()).as_bytes();
217        computed == self.context_hash
218    }
219
220    /// Run all verification checks: signature + chunk proofs.
221    pub fn verify_all(&self) -> crate::Result<()> {
222        self.verify_signature()?;
223        self.verify_chunk_proofs()?;
224        Ok(())
225    }
226
227    /// Get a short summary string for display.
228    pub fn summary(&self) -> String {
229        format!(
230            "ProofReceipt v{}: {} chunks from {} sources, state_root={}, signed by {}",
231            self.version,
232            self.chunk_proofs.len(),
233            self.sources.len(),
234            &hex_encode(&self.state_root)[..8],
235            &hex_encode(&self.device_id)[..8],
236        )
237    }
238}
239
240/// Helper to build a `ChunkProof` from sorted chunk hashes.
241///
242/// Given all chunk hashes (sorted by chunk ID) and the target chunk's index,
243/// generates the Merkle inclusion proof.
244pub fn build_chunk_proof(
245    chunk_id: [u8; 16],
246    chunk_text_hash: [u8; 32],
247    index: usize,
248    all_chunk_hashes: &[[u8; 32]],
249) -> ChunkProof {
250    let raw_siblings = generate_merkle_proof(all_chunk_hashes, index);
251    let siblings = raw_siblings
252        .into_iter()
253        .map(|(hash, is_left)| MerkleSibling { hash, is_left })
254        .collect();
255
256    ChunkProof {
257        chunk_id,
258        chunk_text_hash,
259        index,
260        siblings,
261    }
262}
263
264/// Compute the Merkle root of chunk text hashes (binary tree).
265pub fn compute_chunk_tree_root(chunk_hashes: &[[u8; 32]]) -> [u8; 32] {
266    compute_merkle_root(chunk_hashes)
267}
268
269// ============================================================================
270// Session Proof — captures what the AI touched during a turn
271// ============================================================================
272
273/// A user prompt captured from the `UserPromptSubmit` hook.
274#[derive(Debug, Clone, Serialize, Deserialize)]
275pub struct UserPrompt {
276    pub timestamp: String,
277    pub prompt: String,
278}
279
280/// A single tool call event in a session.
281#[derive(Debug, Clone, Serialize, Deserialize)]
282pub struct SessionEvent {
283    pub timestamp: String,
284    pub tool: String,
285    /// "read", "write", "edit", "search", "bash", etc.
286    pub action: String,
287    #[serde(skip_serializing_if = "Option::is_none")]
288    pub file_path: Option<String>,
289    /// What was sent to the tool (command, content written, edit diff, search pattern)
290    #[serde(default, skip_serializing_if = "Option::is_none")]
291    pub input_preview: Option<String>,
292    /// What the tool returned (file content read, command output, search results)
293    #[serde(default, skip_serializing_if = "Option::is_none")]
294    pub output_preview: Option<String>,
295}
296
297/// A snapshot of a file the AI touched, with content hash.
298#[derive(Debug, Clone, Serialize, Deserialize)]
299pub struct FileSnapshot {
300    pub path: String,
301    /// BLAKE3 hash of file content at the time
302    #[serde(with = "hex_bytes::b32")]
303    pub content_hash: [u8; 32],
304    /// "read" or "write"
305    pub action: String,
306    pub size_bytes: u64,
307    /// First ~500 chars of the file for human readability
308    pub snippet: String,
309}
310
311/// Semantic search result captured in a proof — what the substrate returned for a query.
312#[derive(Debug, Clone, Serialize, Deserialize)]
313pub struct SemanticSearchResult {
314    /// The query that was run against the substrate
315    pub query: String,
316    /// Hash of the query
317    #[serde(with = "hex_bytes::b32")]
318    pub query_hash: [u8; 32],
319    /// State root of the substrate at search time
320    #[serde(with = "hex_bytes::b32")]
321    pub state_root: [u8; 32],
322    /// Substrate stats at search time
323    pub substrate_docs: usize,
324    pub substrate_chunks: usize,
325    pub substrate_embeddings: usize,
326    /// Top-k results with scores and content
327    pub results: Vec<SemanticHit>,
328}
329
330/// A single hit from semantic search — a ranked chunk with its score and content.
331#[derive(Debug, Clone, Serialize, Deserialize)]
332pub struct SemanticHit {
333    /// Source document path
334    pub doc_path: String,
335    /// Chunk sequence number within the document
336    pub chunk_sequence: u32,
337    /// Hybrid relevance score (0.0-1.0) — how relevant the substrate ranked this chunk
338    pub relevance_score: f32,
339    /// Full chunk text — exactly what the AI would see as context
340    pub chunk_text: String,
341    /// BLAKE3 hash of the chunk text
342    #[serde(with = "hex_bytes::b32")]
343    pub chunk_hash: [u8; 32],
344}
345
346/// A session proof captures everything the AI touched in a single turn.
347///
348/// Unlike a search `ProofReceipt` (which proves what search results the AI got),
349/// a `SessionProof` proves which files the AI read and wrote during a response,
350/// what the user asked, and what the semantic substrate contained.
351///
352/// This is the thing git CAN'T do — git tracks diffs, Canon tracks cognition:
353/// what the AI's understanding of the codebase looked like at decision time.
354#[derive(Debug, Clone, Serialize, Deserialize)]
355pub struct SessionProof {
356    pub version: u32,
357    pub session_id: String,
358    pub timestamp: String,
359    /// What the user asked the AI
360    #[serde(default, skip_serializing_if = "Vec::is_empty")]
361    pub user_prompts: Vec<UserPrompt>,
362    /// Semantic search results for each user prompt — the AI's "knowledge state"
363    #[serde(default, skip_serializing_if = "Vec::is_empty")]
364    pub semantic_context: Vec<SemanticSearchResult>,
365    /// Tool call events in chronological order (with input/output previews)
366    pub events: Vec<SessionEvent>,
367    /// Files the AI read (with content hashes)
368    pub files_read: Vec<FileSnapshot>,
369    /// Files the AI wrote or edited (with content hashes)
370    pub files_written: Vec<FileSnapshot>,
371    /// Merkle root of all file hashes (reads + writes, sorted by path)
372    #[serde(with = "hex_bytes::b32")]
373    pub files_root: [u8; 32],
374    /// Ed25519 signature
375    #[serde(with = "hex_bytes::b64")]
376    pub signature: [u8; 64],
377    /// Signer's public key
378    #[serde(with = "hex_bytes::b32")]
379    pub signer_public_key: [u8; 32],
380    /// Signer's device ID
381    #[serde(with = "hex_bytes::b16")]
382    pub device_id: [u8; 16],
383}
384
385impl SessionProof {
386    /// Compute deterministic bytes for signing.
387    ///
388    /// Layout: version (4) || `session_id_hash` (32) || `prompts_hash` (32) || `semantic_hash` (32) || `files_root` (32)
389    /// Total: 164 bytes
390    pub fn signing_bytes(&self) -> Vec<u8> {
391        let mut bytes = Vec::with_capacity(164);
392        bytes.extend_from_slice(&self.version.to_le_bytes());
393        bytes.extend_from_slice(blake3::hash(self.session_id.as_bytes()).as_bytes());
394        // Hash all user prompts so they can't be tampered
395        let mut prompts_hasher = blake3::Hasher::new();
396        for p in &self.user_prompts {
397            prompts_hasher.update(p.prompt.as_bytes());
398            prompts_hasher.update(&[0xFF]);
399        }
400        bytes.extend_from_slice(prompts_hasher.finalize().as_bytes());
401        // Hash all semantic search results — the AI's knowledge state can't be tampered
402        let mut semantic_hasher = blake3::Hasher::new();
403        for sc in &self.semantic_context {
404            semantic_hasher.update(&sc.query_hash);
405            semantic_hasher.update(&sc.state_root);
406            for hit in &sc.results {
407                semantic_hasher.update(&hit.chunk_hash);
408                semantic_hasher.update(&hit.relevance_score.to_le_bytes());
409            }
410            semantic_hasher.update(&[0xFF]);
411        }
412        bytes.extend_from_slice(semantic_hasher.finalize().as_bytes());
413        bytes.extend_from_slice(&self.files_root);
414        bytes
415    }
416
417    /// Verify the Ed25519 signature.
418    pub fn verify_signature(&self) -> crate::Result<()> {
419        let verifying_key = VerifyingKey::from_bytes(&self.signer_public_key)
420            .map_err(|e| crate::CPError::Verification(format!("Invalid public key: {e}")))?;
421        let signature = Signature::from_bytes(&self.signature);
422        verifying_key
423            .verify(&self.signing_bytes(), &signature)
424            .map_err(|e| {
425                crate::CPError::Verification(format!("Signature verification failed: {e}"))
426            })
427    }
428
429    /// Verify the files Merkle root matches the file snapshots.
430    pub fn verify_files_root(&self) -> crate::Result<()> {
431        let mut all_hashes: Vec<[u8; 32]> = self
432            .files_read
433            .iter()
434            .chain(self.files_written.iter())
435            .map(|f| f.content_hash)
436            .collect();
437        all_hashes.sort_unstable();
438
439        let computed = if all_hashes.is_empty() {
440            *blake3::hash(b"empty").as_bytes()
441        } else {
442            compute_merkle_root(&all_hashes)
443        };
444
445        if computed != self.files_root {
446            return Err(crate::CPError::Verification(
447                "Files root mismatch".to_string(),
448            ));
449        }
450        Ok(())
451    }
452
453    /// Verify everything: signature + files root.
454    pub fn verify_all(&self) -> crate::Result<()> {
455        self.verify_signature()?;
456        self.verify_files_root()?;
457        Ok(())
458    }
459}
460
461/// Compute the files Merkle root from file snapshots.
462pub fn compute_files_root(files: &[FileSnapshot]) -> [u8; 32] {
463    let mut hashes: Vec<[u8; 32]> = files.iter().map(|f| f.content_hash).collect();
464    hashes.sort_unstable();
465    if hashes.is_empty() {
466        *blake3::hash(b"empty").as_bytes()
467    } else {
468        compute_merkle_root(&hashes)
469    }
470}
471
472#[cfg(test)]
473mod tests {
474    use super::*;
475
476    fn make_test_receipt() -> ProofReceipt {
477        // Create a simple test receipt with known values
478        let chunk_hashes: Vec<[u8; 32]> = vec![[1u8; 32], [2u8; 32], [3u8; 32], [4u8; 32]];
479        let chunk_tree_root = compute_chunk_tree_root(&chunk_hashes);
480
481        let query = "test query";
482        let query_hash = *blake3::hash(query.as_bytes()).as_bytes();
483        let context = "test context";
484        let context_hash = *blake3::hash(context.as_bytes()).as_bytes();
485
486        // Build chunk proofs for first two chunks
487        let proof0 = build_chunk_proof([0u8; 16], [1u8; 32], 0, &chunk_hashes);
488        let proof1 = build_chunk_proof([1u8; 16], [2u8; 32], 1, &chunk_hashes);
489
490        // Generate Ed25519 key pair for signing
491        let signing_key = ed25519_dalek::SigningKey::from_bytes(&[42u8; 32]);
492        let public_key = signing_key.verifying_key().to_bytes();
493        let device_id = {
494            let h = blake3::hash(&public_key);
495            let mut id = [0u8; 16];
496            id.copy_from_slice(&h.as_bytes()[..16]);
497            id
498        };
499
500        let mut receipt = ProofReceipt {
501            version: 1,
502            query: query.to_string(),
503            query_hash,
504            timestamp: "2026-02-19T12:00:00Z".to_string(),
505            context_hash,
506            state_root: [99u8; 32],
507            chunk_tree_root,
508            chunk_proofs: vec![proof0, proof1],
509            sources: vec![SourceRef {
510                document_path: "test.rs".to_string(),
511                chunk_id: [0u8; 16],
512                chunk_text: "test chunk text".to_string(),
513                chunk_sequence: 0,
514                relevance_score: 0.95,
515            }],
516            signature: [0u8; 64],
517            signer_public_key: public_key,
518            device_id,
519        };
520
521        // Sign the receipt
522        use ed25519_dalek::Signer;
523        let sig = signing_key.sign(&receipt.signing_bytes());
524        receipt.signature = sig.to_bytes();
525
526        receipt
527    }
528
529    #[test]
530    fn test_signing_bytes_deterministic() {
531        let receipt = make_test_receipt();
532        let bytes1 = receipt.signing_bytes();
533        let bytes2 = receipt.signing_bytes();
534        assert_eq!(bytes1, bytes2);
535        // version(4) + query_hash(32) + context_hash(32) + state_root(32) + chunk_tree_root(32) = 132
536        assert_eq!(bytes1.len(), 132);
537    }
538
539    #[test]
540    fn test_verify_signature_valid() {
541        let receipt = make_test_receipt();
542        assert!(receipt.verify_signature().is_ok());
543    }
544
545    #[test]
546    fn test_verify_signature_tampered() {
547        let mut receipt = make_test_receipt();
548        // Tamper with the state root
549        receipt.state_root[0] ^= 0xFF;
550        assert!(receipt.verify_signature().is_err());
551    }
552
553    #[test]
554    fn test_verify_chunk_proofs_valid() {
555        let receipt = make_test_receipt();
556        assert!(receipt.verify_chunk_proofs().is_ok());
557    }
558
559    #[test]
560    fn test_verify_chunk_proofs_tampered() {
561        let mut receipt = make_test_receipt();
562        // Tamper with a chunk hash
563        receipt.chunk_proofs[0].chunk_text_hash[0] ^= 0xFF;
564        assert!(receipt.verify_chunk_proofs().is_err());
565    }
566
567    #[test]
568    fn test_verify_context_hash() {
569        let receipt = make_test_receipt();
570        assert!(receipt.verify_context_hash("test context"));
571        assert!(!receipt.verify_context_hash("wrong context"));
572    }
573
574    #[test]
575    fn test_verify_all() {
576        let receipt = make_test_receipt();
577        assert!(receipt.verify_all().is_ok());
578    }
579
580    #[test]
581    fn test_serde_roundtrip() {
582        let receipt = make_test_receipt();
583        let json = serde_json::to_string_pretty(&receipt).unwrap();
584        let parsed: ProofReceipt = serde_json::from_str(&json).unwrap();
585
586        assert_eq!(receipt.version, parsed.version);
587        assert_eq!(receipt.query, parsed.query);
588        assert_eq!(receipt.query_hash, parsed.query_hash);
589        assert_eq!(receipt.state_root, parsed.state_root);
590        assert_eq!(receipt.chunk_tree_root, parsed.chunk_tree_root);
591        assert_eq!(receipt.signature, parsed.signature);
592        assert_eq!(receipt.signer_public_key, parsed.signer_public_key);
593        assert_eq!(receipt.device_id, parsed.device_id);
594
595        // Verify the deserialized receipt
596        assert!(parsed.verify_all().is_ok());
597    }
598
599    #[test]
600    fn test_build_chunk_proof() {
601        let hashes = [[1u8; 32], [2u8; 32], [3u8; 32], [4u8; 32]];
602        let root = compute_chunk_tree_root(&hashes);
603
604        for i in 0..4 {
605            let proof = build_chunk_proof([i as u8; 16], hashes[i], i, &hashes);
606            let siblings: Vec<([u8; 32], bool)> =
607                proof.siblings.iter().map(|s| (s.hash, s.is_left)).collect();
608            assert!(verify_merkle_proof(&hashes[i], i, &siblings, &root));
609        }
610    }
611
612    #[test]
613    fn test_summary() {
614        let receipt = make_test_receipt();
615        let summary = receipt.summary();
616        assert!(summary.contains("v1"));
617        assert!(summary.contains("2 chunks"));
618    }
619}