Skip to main content

mnemo_core/
provenance.rs

1//! Per-read memory-provenance signing (v0.4.0-rc3 Task B1).
2//!
3//! v0.3.x signs **writes** — every `MemoryRecord` carries a SHA-256
4//! `content_hash` chained via `prev_hash`, and the audit log export
5//! signs the chain with Ed25519. v0.4.0-rc3 adds the equivalent for
6//! **reads**: every `engine.recall(..., with_provenance=true)` returns
7//! a [`ReadProvenance`] HMAC that proves which writes the recall
8//! derives from. A clinician auditing an LLM response can verify
9//! offline that the cited memories really were the ones the model saw.
10//!
11//! # Threat model
12//!
13//! 1. **Source-record tamper.** An attacker mutates a `MemoryRecord`
14//!    in storage between the recall and the audit. Detected because
15//!    `verify_read_provenance` recomputes each record's `content_hash`
16//!    and compares to the [`RecordRef`] in the provenance.
17//! 2. **HMAC tamper.** An attacker fabricates a provenance receipt
18//!    pointing at innocuous records. Detected because the HMAC binds
19//!    the receipt's `read_id || query_hash || derived_from` to a
20//!    server-side secret the attacker doesn't have.
21//! 3. **Key rotation.** The receipt's `hmac_key_id` lets the verifier
22//!    look up the historical key for a past read, so rotating the
23//!    signing key doesn't break old audits.
24//!
25//! Out of scope: full non-repudiation (would need Ed25519 — HMAC is
26//! cheaper but only verifiable by parties with the key). For
27//! externally-auditable receipts, pair the provenance with the
28//! existing `mnemo-compliance` Ed25519-signed audit log export.
29
30use chrono::{DateTime, Utc};
31use hmac::{Hmac, KeyInit, Mac};
32use serde::{Deserialize, Serialize};
33use sha2::{Digest, Sha256};
34use thiserror::Error;
35use uuid::Uuid;
36
37use crate::model::memory::MemoryRecord;
38
39type HmacSha256 = Hmac<Sha256>;
40
41/// One source record cited by a [`ReadProvenance`].
42///
43/// The `content_hash` and `prev_hash` mirror what's on the record
44/// itself, so the verifier can re-walk the per-record chain without
45/// needing to fetch from storage.
46#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
47pub struct RecordRef {
48    pub id: Uuid,
49    /// SHA-256 of the record's `content_hash` field. Stored as 32 raw
50    /// bytes; serialise as base64 / hex at the wire boundary (we
51    /// serialise as a regular `Vec<u8>` here to avoid pulling
52    /// `serde_bytes` — the wire format isn't pinned to a specific
53    /// encoding).
54    pub content_hash: Vec<u8>,
55    /// `prev_hash` from the same record. `None` for the first record
56    /// in an agent's chain.
57    pub prev_hash: Option<Vec<u8>>,
58}
59
60/// Cryptographic receipt that an `engine.recall` call returned the
61/// listed memories.
62///
63/// Carry this alongside any audit-log export. Verifiers call
64/// [`verify_read_provenance`] with the records they have access to.
65#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
66pub struct ReadProvenance {
67    pub read_id: Uuid,
68    pub agent_id: String,
69    /// SHA-256 of the recall query string. Hashing rather than storing
70    /// the raw query keeps PII off the wire when a downstream system
71    /// only needs to verify the chain.
72    pub query_hash: Vec<u8>,
73    /// Records the recall derived from, in retrieval-rank order.
74    pub derived_from: Vec<RecordRef>,
75    /// HMAC-SHA256 over `read_id || query_hash || concat(derived_from)`.
76    pub hmac: Vec<u8>,
77    /// Identifier of the HMAC key used. Lets verifiers look up the
78    /// right historical key after rotation.
79    pub hmac_key_id: String,
80    pub ts: DateTime<Utc>,
81}
82
83#[derive(Debug, Error)]
84pub enum ProvenanceError {
85    #[error("HMAC mismatch — receipt was tampered or wrong key")]
86    HmacMismatch,
87    #[error(
88        "record {id} content_hash mismatch — source record was modified after provenance was signed"
89    )]
90    RecordContentHashMismatch { id: Uuid },
91    #[error("missing record {id} — verifier wasn't given the source record cited by the receipt")]
92    MissingRecord { id: Uuid },
93    #[error("unknown HMAC key id {key_id} — verifier doesn't have this key in its keystore")]
94    UnknownKey { key_id: String },
95    #[error("HMAC engine init failed: {0}")]
96    HmacInit(String),
97    #[error("query hash mismatch")]
98    QueryHashMismatch,
99}
100
101/// In-process HMAC-SHA256 signer for the recall hot path.
102///
103/// Caller-side responsibility: rotate `(key_id, key)` on whatever
104/// cadence your security posture demands and keep the historical
105/// pairs accessible to the verifier. The struct holds a single key;
106/// to handle multiple keys (active + historical) wrap several
107/// `ProvenanceSigner`s in a `Keystore` (see
108/// [`crate::encryption::ContentEncryption`] for the equivalent
109/// pattern on the at-rest side).
110#[derive(Debug, Clone)]
111pub struct ProvenanceSigner {
112    key_id: String,
113    key: Vec<u8>,
114}
115
116impl ProvenanceSigner {
117    /// Construct from a 32-byte key + a stable identifier.
118    ///
119    /// Operators should set the key from secure storage (Vault,
120    /// AWS KMS, etc.) and choose an id like `"mnemo-prov-2026-04"`
121    /// that survives logging.
122    pub fn new(key_id: impl Into<String>, key: &[u8]) -> Self {
123        Self {
124            key_id: key_id.into(),
125            key: key.to_vec(),
126        }
127    }
128
129    pub fn key_id(&self) -> &str {
130        &self.key_id
131    }
132
133    /// Build a signed [`ReadProvenance`] for one recall.
134    pub fn sign(
135        &self,
136        agent_id: impl Into<String>,
137        query: &str,
138        records: &[MemoryRecord],
139    ) -> Result<ReadProvenance, ProvenanceError> {
140        let read_id = Uuid::now_v7();
141        let query_hash = sha256(query.as_bytes());
142        let derived_from: Vec<RecordRef> = records
143            .iter()
144            .map(|r| RecordRef {
145                id: r.id,
146                content_hash: r.content_hash.clone(),
147                prev_hash: r.prev_hash.clone(),
148            })
149            .collect();
150        let hmac = self.compute_hmac(&read_id, &query_hash, &derived_from)?;
151        Ok(ReadProvenance {
152            read_id,
153            agent_id: agent_id.into(),
154            query_hash,
155            derived_from,
156            hmac,
157            hmac_key_id: self.key_id.clone(),
158            ts: Utc::now(),
159        })
160    }
161
162    fn compute_hmac(
163        &self,
164        read_id: &Uuid,
165        query_hash: &[u8],
166        derived_from: &[RecordRef],
167    ) -> Result<Vec<u8>, ProvenanceError> {
168        let mut mac = <HmacSha256 as KeyInit>::new_from_slice(&self.key)
169            .map_err(|e: hmac::digest::InvalidLength| ProvenanceError::HmacInit(e.to_string()))?;
170        mac.update(read_id.as_bytes());
171        mac.update(query_hash);
172        for r in derived_from {
173            mac.update(r.id.as_bytes());
174            mac.update(&r.content_hash);
175            if let Some(prev) = &r.prev_hash {
176                mac.update(prev);
177            }
178        }
179        Ok(mac.finalize().into_bytes().to_vec())
180    }
181}
182
183/// Verify a [`ReadProvenance`] receipt against the source records.
184///
185/// `records` must contain every record cited by `provenance.derived_from`
186/// (the verifier looks them up by id). Order is irrelevant — they're
187/// matched by id.
188pub fn verify_read_provenance(
189    provenance: &ReadProvenance,
190    records: &[MemoryRecord],
191    keystore: &dyn ProvenanceKeystore,
192) -> Result<(), ProvenanceError> {
193    // Look up the historical key by id so rotated keys don't invalidate
194    // old audits.
195    let signer =
196        keystore
197            .lookup(&provenance.hmac_key_id)
198            .ok_or_else(|| ProvenanceError::UnknownKey {
199                key_id: provenance.hmac_key_id.clone(),
200            })?;
201
202    // Walk derived_from against actual records; recompute each
203    // content_hash to detect post-recall tampering.
204    for r in &provenance.derived_from {
205        let actual = records
206            .iter()
207            .find(|m| m.id == r.id)
208            .ok_or(ProvenanceError::MissingRecord { id: r.id })?;
209        if actual.content_hash != r.content_hash {
210            return Err(ProvenanceError::RecordContentHashMismatch { id: r.id });
211        }
212    }
213
214    // Recompute HMAC and compare in constant time.
215    let expected = signer.compute_hmac(
216        &provenance.read_id,
217        &provenance.query_hash,
218        &provenance.derived_from,
219    )?;
220    if !constant_time_eq(&expected, &provenance.hmac) {
221        return Err(ProvenanceError::HmacMismatch);
222    }
223    Ok(())
224}
225
226/// Pluggable keystore for verifiers — supports at-least one historical key.
227pub trait ProvenanceKeystore: Send + Sync {
228    fn lookup(&self, key_id: &str) -> Option<&ProvenanceSigner>;
229}
230
231/// Single-key implementation for the common case.
232impl ProvenanceKeystore for ProvenanceSigner {
233    fn lookup(&self, key_id: &str) -> Option<&ProvenanceSigner> {
234        if key_id == self.key_id {
235            Some(self)
236        } else {
237            None
238        }
239    }
240}
241
242fn sha256(bytes: &[u8]) -> Vec<u8> {
243    let mut h = Sha256::new();
244    h.update(bytes);
245    h.finalize().to_vec()
246}
247
248fn constant_time_eq(a: &[u8], b: &[u8]) -> bool {
249    if a.len() != b.len() {
250        return false;
251    }
252    let mut acc = 0u8;
253    for (x, y) in a.iter().zip(b.iter()) {
254        acc |= x ^ y;
255    }
256    acc == 0
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262    use crate::hash::compute_content_hash;
263    use crate::model::memory::MemoryRecord;
264
265    fn record(id: Uuid, agent: &str, content: &str) -> MemoryRecord {
266        let mut r = MemoryRecord::new(agent.to_string(), content.to_string());
267        r.id = id;
268        r.content_hash = compute_content_hash(content, agent, &r.created_at);
269        r
270    }
271
272    fn signer() -> ProvenanceSigner {
273        ProvenanceSigner::new("mnemo-prov-test", &[7u8; 32])
274    }
275
276    #[test]
277    fn sign_then_verify_round_trips() {
278        let s = signer();
279        let r1 = record(Uuid::now_v7(), "a", "hello");
280        let r2 = record(Uuid::now_v7(), "a", "world");
281        let prov = s
282            .sign("a", "greeting query", &[r1.clone(), r2.clone()])
283            .unwrap();
284        verify_read_provenance(&prov, &[r1, r2], &s).expect("should verify");
285    }
286
287    #[test]
288    fn tampering_a_source_record_fails_verification() {
289        let s = signer();
290        let r1 = record(Uuid::now_v7(), "a", "original content");
291        let prov = s.sign("a", "q", &[r1.clone()]).unwrap();
292        // Mutate the record's content_hash after signing — this
293        // simulates an attacker modifying the row in storage.
294        let mut tampered = r1.clone();
295        tampered.content_hash = vec![0xFF; 32];
296        let err = verify_read_provenance(&prov, &[tampered], &s).unwrap_err();
297        assert!(matches!(
298            err,
299            ProvenanceError::RecordContentHashMismatch { .. }
300        ));
301    }
302
303    #[test]
304    fn tampering_the_hmac_fails_verification() {
305        let s = signer();
306        let r1 = record(Uuid::now_v7(), "a", "x");
307        let mut prov = s.sign("a", "q", &[r1.clone()]).unwrap();
308        prov.hmac[0] ^= 0xFF;
309        let err = verify_read_provenance(&prov, &[r1], &s).unwrap_err();
310        assert!(matches!(err, ProvenanceError::HmacMismatch));
311    }
312
313    #[test]
314    fn missing_source_record_fails_verification() {
315        let s = signer();
316        let r1 = record(Uuid::now_v7(), "a", "x");
317        let prov = s.sign("a", "q", &[r1]).unwrap();
318        let err = verify_read_provenance(&prov, &[], &s).unwrap_err();
319        assert!(matches!(err, ProvenanceError::MissingRecord { .. }));
320    }
321
322    #[test]
323    fn unknown_key_id_fails_verification() {
324        let s = signer();
325        let r1 = record(Uuid::now_v7(), "a", "x");
326        let mut prov = s.sign("a", "q", &[r1.clone()]).unwrap();
327        prov.hmac_key_id = "rotated-out".into();
328        let err = verify_read_provenance(&prov, &[r1], &s).unwrap_err();
329        assert!(matches!(err, ProvenanceError::UnknownKey { .. }));
330    }
331
332    #[test]
333    fn rotated_key_still_verifies_via_keystore_lookup() {
334        // Two signers with different ids — the verifier picks by id,
335        // proving rotated keys still verify historical reads.
336        let active = ProvenanceSigner::new("mnemo-prov-2026-05", &[1u8; 32]);
337        let archived = ProvenanceSigner::new("mnemo-prov-2026-04", &[2u8; 32]);
338
339        let r1 = record(Uuid::now_v7(), "a", "old read");
340        let prov = archived.sign("a", "q", &[r1.clone()]).unwrap();
341
342        struct Pair<'a>(&'a ProvenanceSigner, &'a ProvenanceSigner);
343        impl<'a> ProvenanceKeystore for Pair<'a> {
344            fn lookup(&self, id: &str) -> Option<&ProvenanceSigner> {
345                if self.0.key_id() == id {
346                    Some(self.0)
347                } else if self.1.key_id() == id {
348                    Some(self.1)
349                } else {
350                    None
351                }
352            }
353        }
354        verify_read_provenance(&prov, &[r1], &Pair(&active, &archived)).unwrap();
355    }
356}