ai-memory 0.7.0

AI-agnostic persistent memory system — MCP server, HTTP API, and CLI for any AI platform
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
// Copyright 2026 AlphaOne LLC
// SPDX-License-Identifier: Apache-2.0

//! Pluggable inference backend trait — issue #651 (RFC pulled forward
//! from v0.8 per operator directive `28860423-d12c-4959-bc8b-8fa9a94a33d9`,
//! 2026-05-18).
//!
//! ## Goal
//!
//! Provide a single trait surface that unifies the substrate's two
//! inference paths today (`embeddings::Embedder` for vector embedding,
//! `llm::OllamaClient` for chat / auto-tag / detect-contradiction)
//! AND provides a forward-compatible hook for the v0.8 GPU / MTP
//! distilled hot-path backend (issues #651 / #654 / Gap #10 of #846).
//!
//! ## Surface
//!
//! ```ignore
//! pub trait InferenceBackend: Send + Sync {
//!     fn embed(&self, text: &str) -> anyhow::Result<Vec<f32>>;
//!     fn chat(&self, prompt: &str) -> anyhow::Result<String>;
//!     fn attested_weights(&self) -> Option<AttestedWeights>;
//! }
//! ```
//!
//! ## Backends shipped at v0.7.0
//!
//! - [`CpuBackend`] — wraps the existing CPU pipeline
//!   (`embeddings::Embedder` + `llm::OllamaClient`). This is what
//!   v0.7.0 actually uses on the recall hot-path.
//! - [`GpuBackend`] — stub returning `not implemented`. Lands as a
//!   trait-conformant placeholder so the v0.8 work (issue #651 Phase 1
//!   — mistralrs or candle in-process GPU backend) can drop in without
//!   any caller-side refactor.
//!
//! ## Attested weights (issue #654)
//!
//! `attested_weights()` returns the loaded model's SHA-256 + an
//! optional Ed25519 signature over the weight bytes. The CPU backend
//! implements MVP supply-chain attestation by hashing the on-disk
//! model file at load time; the GPU backend stub returns `None`.
//! Documentation for the full v0.8 attested weight chain lives at
//! `docs/v0.7.0/inference-attestation.md`.
//!
//! ## Regression test
//!
//! `cpu_backend_round_trips_embed` (in this module) and
//! `gpu_backend_returns_not_implemented` pin the contract.

use anyhow::{Result, anyhow};
use std::sync::Arc;

/// Attested model-weight provenance returned by
/// [`InferenceBackend::attested_weights`]. MVP supply-chain attestation
/// per issue #654 — SHA-256 of the on-disk weight file, plus an
/// optional Ed25519 signature attested by the operator key.
///
/// v0.8 will extend this with a full Sigstore-style chain (cosign
/// bundle, transparency log entry, key-rotation reference). Today the
/// MVP shape is enough to refuse to serve from a tampered weight file
/// at load time.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct AttestedWeights {
    /// Hex-encoded SHA-256 of the model weight bytes.
    pub sha256: String,
    /// Optional base64-encoded Ed25519 signature over `sha256`.
    /// `None` for backends that have not been signed yet.
    pub signature: Option<String>,
    /// Operator-readable label identifying the model
    /// (e.g. `"all-MiniLM-L6-v2"` or `"distilled-hot-path-v0.8"`).
    pub label: String,
}

/// The unified inference surface. v0.8 callers will hold an
/// `Arc<dyn InferenceBackend>` instead of separate embedder + llm
/// handles. At v0.7.0 the recall hot-path still uses the legacy
/// types directly (no callsite churn during the v0.7.0 ship window);
/// the trait is the seam through which the v0.8 GPU/MTP backend will
/// be threaded.
pub trait InferenceBackend: Send + Sync {
    /// Produce a single embedding vector for `text`.
    ///
    /// # Errors
    ///
    /// Implementor-specific (model load failure, tokenisation error,
    /// device OOM, etc.). The GPU stub backend returns a
    /// `not implemented` error.
    fn embed(&self, text: &str) -> Result<Vec<f32>>;

    /// Generate a chat completion for `prompt`. Default system prompt
    /// is `None` (implementor decides); use a concrete backend's API
    /// for system-prompt support.
    ///
    /// # Errors
    ///
    /// Implementor-specific (transport error, model unavailable,
    /// safety refusal, etc.).
    fn chat(&self, prompt: &str) -> Result<String>;

    /// Return the loaded model's SHA-256 + optional signature for
    /// issue #654 supply-chain attestation. `None` if the backend
    /// has no on-disk weights to attest (e.g. a network-only client).
    fn attested_weights(&self) -> Option<AttestedWeights> {
        None
    }
}

/// CPU backend — wraps the existing v0.7.0 inference path
/// (`embeddings::Embedder` + `llm::OllamaClient`). This is a thin
/// adapter; the underlying types are unchanged.
pub struct CpuBackend {
    embedder: Arc<dyn crate::embeddings::Embed>,
    llm: Option<Arc<crate::llm::OllamaClient>>,
    /// Optional pre-computed attested-weights record. Construct via
    /// [`CpuBackend::with_attested_weights`] when the operator has
    /// pinned the model file's SHA-256.
    attested: Option<AttestedWeights>,
}

impl CpuBackend {
    /// Construct a CPU backend from existing handles.
    #[must_use]
    pub fn new(
        embedder: Arc<dyn crate::embeddings::Embed>,
        llm: Option<Arc<crate::llm::OllamaClient>>,
    ) -> Self {
        Self {
            embedder,
            llm,
            attested: None,
        }
    }

    /// Pin an attested-weights record (issue #654). Returns a new
    /// backend wrapping the same handles. The hash is NOT recomputed
    /// here — the caller pre-computes it via
    /// [`compute_attested_weights`] at model-load time so the
    /// `verify_attested_weights` gate can refuse to serve from a
    /// tampered file.
    #[must_use]
    pub fn with_attested_weights(mut self, attested: AttestedWeights) -> Self {
        self.attested = Some(attested);
        self
    }
}

impl InferenceBackend for CpuBackend {
    fn embed(&self, text: &str) -> Result<Vec<f32>> {
        self.embedder.embed(text)
    }

    fn chat(&self, prompt: &str) -> Result<String> {
        let llm = self
            .llm
            .as_ref()
            .ok_or_else(|| anyhow!("CpuBackend: chat unavailable (no OllamaClient configured)"))?;
        llm.generate(prompt, None)
    }

    fn attested_weights(&self) -> Option<AttestedWeights> {
        self.attested.clone()
    }
}

/// GPU backend stub — issue #651 Phase 1 placeholder. Returns
/// `not implemented` from every call. Lands as a trait-conformant
/// type so the v0.8 GPU/MTP backend (mistralrs or candle in-process)
/// can drop in without a single caller-side refactor.
#[derive(Default)]
pub struct GpuBackend {
    /// Operator-readable label (e.g. `"distilled-hot-path-v0.8"`).
    /// Stored even on the stub so attestation plumbing can be
    /// exercised end-to-end during the v0.8 work.
    pub label: String,
}

impl GpuBackend {
    /// Construct a GPU backend stub with the given operator-readable
    /// label.
    #[must_use]
    pub fn new(label: impl Into<String>) -> Self {
        Self {
            label: label.into(),
        }
    }
}

impl InferenceBackend for GpuBackend {
    fn embed(&self, _text: &str) -> Result<Vec<f32>> {
        Err(anyhow!(
            "GpuBackend::embed not implemented (v0.8 work — issue #651 Phase 1; \
             see docs/v0.7.0/inference-attestation.md for the rollout plan)"
        ))
    }

    fn chat(&self, _prompt: &str) -> Result<String> {
        Err(anyhow!(
            "GpuBackend::chat not implemented (v0.8 work — issue #651 Phase 1)"
        ))
    }
}

/// Compute the SHA-256 of a model-weight file on disk and assemble an
/// [`AttestedWeights`] record. Issue #654 MVP supply-chain attestation.
///
/// # Errors
///
/// Returns an error if the file cannot be read.
pub fn compute_attested_weights(
    path: &std::path::Path,
    label: impl Into<String>,
    signature: Option<String>,
) -> Result<AttestedWeights> {
    use sha2::{Digest, Sha256};
    let bytes = std::fs::read(path)
        .map_err(|e| anyhow!("compute_attested_weights: read {}: {e}", path.display()))?;
    let mut hasher = Sha256::new();
    hasher.update(&bytes);
    let digest = hasher.finalize();
    Ok(AttestedWeights {
        sha256: hex::encode(digest),
        signature,
        label: label.into(),
    })
}

/// Verify an in-flight [`AttestedWeights`] record against the file at
/// `path`. Issue #654 MVP gate — call before binding the backend if
/// the operator has pinned a known-good hash.
///
/// Two checks run, both fail-CLOSED:
///
/// 1. **Hash** — the recomputed SHA-256 of the on-disk file MUST equal
///    `expected.sha256`.
/// 2. **Signature** — when `expected.signature` is `Some`, the Ed25519
///    signature MUST verify against the operator's resolved public key
///    ([`crate::governance::rules_store::resolve_operator_pubkey`]) over
///    the recomputed SHA-256 hex string's bytes. A signature that is
///    present but cannot be verified — malformed base64, wrong length,
///    bad signature, OR no operator key resolvable — is a hard refusal.
///    Pre-fix the signature field was stored but NEVER checked, so a
///    record carrying a forged or stale signature passed the gate on the
///    hash alone (silent unverified-signature gap, issue #654).
///
/// # Errors
///
/// Returns an error if the file cannot be read, the recomputed hash does
/// not match `expected.sha256`, or a present signature fails to verify.
pub fn verify_attested_weights(path: &std::path::Path, expected: &AttestedWeights) -> Result<()> {
    let operator_pubkey = crate::governance::rules_store::resolve_operator_pubkey();
    verify_attested_weights_with_key(path, expected, operator_pubkey.as_ref())
}

/// Key-injecting core of [`verify_attested_weights`]. Production callers
/// use the wrapper (which resolves the operator key from disk/env);
/// tests pass an explicit `operator_pubkey` so the signature gate can be
/// exercised hermetically without touching the operator key directory.
///
/// # Errors
///
/// See [`verify_attested_weights`].
pub fn verify_attested_weights_with_key(
    path: &std::path::Path,
    expected: &AttestedWeights,
    operator_pubkey: Option<&ed25519_dalek::VerifyingKey>,
) -> Result<()> {
    let recomputed = compute_attested_weights(path, &expected.label, None)?;
    if recomputed.sha256 != expected.sha256 {
        return Err(anyhow!(
            "verify_attested_weights: hash mismatch for {} (expected {}, got {}) — \
             refusing to serve from a tampered weight file (issue #654)",
            path.display(),
            expected.sha256,
            recomputed.sha256,
        ));
    }

    // Signature gate (issue #654). The canonical signed message is the
    // SHA-256 hex string's ASCII bytes — the same value the operator
    // signs when pinning the weights. A present-but-unverifiable
    // signature fails CLOSED.
    if let Some(sig_b64) = expected.signature.as_deref() {
        let Some(verifying_key) = operator_pubkey else {
            return Err(anyhow!(
                "verify_attested_weights: record for {} carries a signature but no operator \
                 public key could be resolved — refusing to serve (fail-CLOSED, issue #654)",
                path.display(),
            ));
        };
        verify_attested_weights_signature(&recomputed.sha256, sig_b64, verifying_key).map_err(
            |e| {
                anyhow!(
                    "verify_attested_weights: signature verification failed for {} ({e}) — \
                     refusing to serve (issue #654)",
                    path.display(),
                )
            },
        )?;
    }

    Ok(())
}

/// Verify a base64-encoded Ed25519 `signature` over the `sha256` hex
/// string's bytes against `verifying_key`. Accepts both URL-safe-no-pad
/// and standard base64 (mirrors
/// [`crate::governance::rules_store::resolve_operator_pubkey`]).
fn verify_attested_weights_signature(
    sha256: &str,
    signature: &str,
    verifying_key: &ed25519_dalek::VerifyingKey,
) -> Result<(), ed25519_dalek::SignatureError> {
    use base64::Engine;
    use ed25519_dalek::{Signature, Verifier};

    let trimmed = signature.trim();
    let sig_bytes = base64::engine::general_purpose::URL_SAFE_NO_PAD
        .decode(trimmed)
        .or_else(|_| base64::engine::general_purpose::STANDARD.decode(trimmed))
        .map_err(|_| ed25519_dalek::SignatureError::new())?;
    if sig_bytes.len() != ed25519_dalek::SIGNATURE_LENGTH {
        return Err(ed25519_dalek::SignatureError::new());
    }
    let mut sig_arr = [0u8; ed25519_dalek::SIGNATURE_LENGTH];
    sig_arr.copy_from_slice(&sig_bytes);
    let sig = Signature::from_bytes(&sig_arr);
    verifying_key.verify(sha256.as_bytes(), &sig)
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::io::Write;

    struct MockEmbedder;
    impl crate::embeddings::Embed for MockEmbedder {
        fn embed(&self, text: &str) -> Result<Vec<f32>> {
            Ok(vec![text.len() as f32; 4])
        }
    }

    #[test]
    fn cpu_backend_round_trips_embed() {
        let be: Arc<dyn InferenceBackend> = Arc::new(CpuBackend::new(Arc::new(MockEmbedder), None));
        let v = be.embed("hello").expect("embed ok");
        assert_eq!(v, vec![5.0_f32; 4]);
    }

    #[test]
    fn cpu_backend_chat_without_llm_errors() {
        let be = CpuBackend::new(Arc::new(MockEmbedder), None);
        let err = be.chat("anything").expect_err("must err");
        assert!(err.to_string().contains("chat unavailable"));
    }

    #[test]
    fn gpu_backend_returns_not_implemented() {
        let be: Arc<dyn InferenceBackend> = Arc::new(GpuBackend::new("test-gpu"));
        let err = be.embed("x").expect_err("gpu embed must err");
        assert!(err.to_string().contains("not implemented"));
        let err = be.chat("x").expect_err("gpu chat must err");
        assert!(err.to_string().contains("not implemented"));
        assert!(be.attested_weights().is_none());
    }

    #[test]
    fn compute_and_verify_attested_weights_round_trip() {
        // Write a tiny fixture file to .local-runs/ so we honor the
        // no-/tmp HARD RULE in CLAUDE.md.
        let dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(".local-runs");
        std::fs::create_dir_all(&dir).expect("mkdir .local-runs");
        let path = dir.join(format!(
            "inference-attest-fixture-{}.bin",
            uuid::Uuid::new_v4()
        ));
        let mut f = std::fs::File::create(&path).expect("create fixture");
        f.write_all(b"a tiny attested model weight blob")
            .expect("write fixture");
        f.sync_all().expect("sync fixture");
        drop(f);

        let attested =
            compute_attested_weights(&path, "fixture", None).expect("compute_attested_weights ok");
        assert_eq!(attested.sha256.len(), 64, "sha256 hex must be 64 chars");

        verify_attested_weights(&path, &attested).expect("verify ok");

        // Tamper the file; verify must now refuse.
        let mut f = std::fs::OpenOptions::new()
            .append(true)
            .open(&path)
            .expect("open append");
        f.write_all(b"--tampered--").expect("tamper write");
        f.sync_all().expect("sync tamper");
        drop(f);
        let err = verify_attested_weights(&path, &attested)
            .expect_err("verify must refuse tampered file");
        assert!(err.to_string().contains("hash mismatch"));

        let _ = std::fs::remove_file(&path);
    }

    #[test]
    fn cpu_backend_with_attested_weights_round_trip() {
        let attested = AttestedWeights {
            sha256: "0".repeat(64),
            signature: None,
            label: "test".into(),
        };
        let be =
            CpuBackend::new(Arc::new(MockEmbedder), None).with_attested_weights(attested.clone());
        assert_eq!(be.attested_weights(), Some(attested));
    }

    // ---- issue #654 — Ed25519 signature gate on attested weights ----
    //
    // Pre-fix `verify_attested_weights` stored the `signature` field but
    // never checked it, so a record could pass the gate on the hash
    // alone while carrying a forged / stale / absent-key signature. These
    // tests pin the fail-CLOSED signature semantics via the key-injecting
    // core (`verify_attested_weights_with_key`) so the gate is exercised
    // hermetically, independent of the operator key directory.

    fn write_attest_fixture(content: &[u8]) -> std::path::PathBuf {
        let dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(".local-runs");
        std::fs::create_dir_all(&dir).expect("mkdir .local-runs");
        let path = dir.join(format!("inference-attest-sig-{}.bin", uuid::Uuid::new_v4()));
        let mut f = std::fs::File::create(&path).expect("create fixture");
        f.write_all(content).expect("write fixture");
        f.sync_all().expect("sync fixture");
        path
    }

    fn sign_b64(signing_key: &ed25519_dalek::SigningKey, message: &[u8]) -> String {
        use base64::Engine;
        use ed25519_dalek::Signer;
        base64::engine::general_purpose::STANDARD.encode(signing_key.sign(message).to_bytes())
    }

    #[test]
    fn verify_attested_weights_accepts_valid_operator_signature() {
        let mut csprng = rand_core::OsRng;
        let signing_key = ed25519_dalek::SigningKey::generate(&mut csprng);
        let verifying_key = signing_key.verifying_key();

        let path = write_attest_fixture(b"signed weight blob");
        let unsigned = compute_attested_weights(&path, "fixture", None).expect("compute ok");
        // Operator signs the sha256 hex string's bytes — the canonical
        // signed message.
        let signature = sign_b64(&signing_key, unsigned.sha256.as_bytes());
        let attested = AttestedWeights {
            signature: Some(signature),
            ..unsigned
        };

        verify_attested_weights_with_key(&path, &attested, Some(&verifying_key))
            .expect("valid signature must verify");

        let _ = std::fs::remove_file(&path);
    }

    #[test]
    fn verify_attested_weights_rejects_forged_signature() {
        let mut csprng = rand_core::OsRng;
        let operator_key = ed25519_dalek::SigningKey::generate(&mut csprng);
        let attacker_key = ed25519_dalek::SigningKey::generate(&mut csprng);

        let path = write_attest_fixture(b"forged weight blob");
        let unsigned = compute_attested_weights(&path, "fixture", None).expect("compute ok");
        // Signed by the attacker, verified against the operator key → fail.
        let signature = sign_b64(&attacker_key, unsigned.sha256.as_bytes());
        let attested = AttestedWeights {
            signature: Some(signature),
            ..unsigned
        };

        let err =
            verify_attested_weights_with_key(&path, &attested, Some(&operator_key.verifying_key()))
                .expect_err("forged signature must be refused");
        assert!(err.to_string().contains("signature verification failed"));

        let _ = std::fs::remove_file(&path);
    }

    #[test]
    fn verify_attested_weights_fails_closed_when_signed_but_no_key() {
        let mut csprng = rand_core::OsRng;
        let signing_key = ed25519_dalek::SigningKey::generate(&mut csprng);

        let path = write_attest_fixture(b"orphan-signature weight blob");
        let unsigned = compute_attested_weights(&path, "fixture", None).expect("compute ok");
        let signature = sign_b64(&signing_key, unsigned.sha256.as_bytes());
        let attested = AttestedWeights {
            signature: Some(signature),
            ..unsigned
        };

        // Signature present but NO operator key resolvable → fail CLOSED.
        let err = verify_attested_weights_with_key(&path, &attested, None)
            .expect_err("present signature with no key must fail closed");
        assert!(err.to_string().contains("no operator public key"));

        let _ = std::fs::remove_file(&path);
    }

    #[test]
    fn verify_attested_weights_unsigned_record_skips_signature_gate() {
        // No signature → only the hash gate runs; no operator key needed.
        let path = write_attest_fixture(b"unsigned weight blob");
        let attested = compute_attested_weights(&path, "fixture", None).expect("compute ok");
        assert!(attested.signature.is_none());
        verify_attested_weights_with_key(&path, &attested, None)
            .expect("unsigned record must verify on hash alone");
        let _ = std::fs::remove_file(&path);
    }
}