Skip to main content

cortex_llm/
summary.rs

1//! Pluggable LLM summary backend used by the Phase 4.D decay path.
2//!
3//! Where [`crate::adapter::LlmAdapter`] is the general-purpose async LLM call
4//! surface for reflection / proof / extraction, [`SummaryBackend`] is the
5//! **narrow, synchronous** trait the decay-job pipeline talks to when it
6//! needs to compress N candidate memories or episodes down to one summary
7//! memory under operator attestation. The two surfaces are kept disjoint on
8//! purpose:
9//!
10//! - The decay path is operator-gated and pins both `model_name` and
11//!   `prompt_template_blake3` in the attestation envelope; the trait's
12//!   request shape carries those pins verbatim so the implementation can
13//!   refuse on mismatch BEFORE making any backend call.
14//! - The decay path is synchronous (called from inside a transactional
15//!   runner step); we do not need the async ceremony.
16//! - The decay path's failure modes (`BackendNotConfigured`,
17//!   `ModelNotInAllowlist`, `PromptTemplateMismatch`, ...) are different
18//!   in kind from the generic adapter's transport / parse / timeout
19//!   alphabet — keeping them in a typed enum here means
20//!   `cortex_memory::decay` can match on stable variants for grep-friendly
21//!   refusal envelopes.
22//!
23//! ## Implementations shipped today
24//!
25//! - [`NoopSummaryBackend`] — fail-closed default. Every CLI surface that
26//!   wires the decay runner today injects this so an LLM-summary job
27//!   surfaces a typed `BackendNotConfigured` rather than panicking.
28//! - [`ReplaySummaryBackend`] — deterministic CI fixture backend. Mirrors
29//!   [`crate::replay::ReplayAdapter`] but with a much simpler key shape
30//!   (BLAKE3 of the canonicalised summary request fields).
31//!
32//! Hosted backends (Claude, Ollama) plug into the same trait by validating
33//! their own `model_name` allowlist and `prompt_template_blake3` before
34//! making the actual call.
35
36use std::collections::HashMap;
37use std::fmt;
38use std::fs;
39use std::path::Path;
40
41use serde::{Deserialize, Serialize};
42use thiserror::Error;
43
44use crate::adapter::{blake3_hex, TokenUsage};
45
46/// One claim or episode summary fed in as a source for compression.
47///
48/// Kept as a plain owned string so callers do not have to learn the cortex-
49/// store row shape to drive the backend; the decay-path translation layer
50/// is responsible for picking the right text (memory `claim`, episode
51/// `summary`) and forwarding it here.
52pub type SourceClaim = String;
53
54/// Input to a [`SummaryBackend::summarize`] call.
55///
56/// Field shape is deliberately small. The two pins (`model_name`,
57/// `prompt_template_blake3`) come from the operator-signed attestation
58/// envelope at `cortex_memory::decay::summary::LlmSummaryOperatorAttestationEnvelope`
59/// so the backend's allowlist check is structurally bound to the same
60/// signed authority surface.
61#[derive(Debug, Clone, Serialize, Deserialize)]
62pub struct SummaryRequest {
63    /// Pinned model name (e.g. `claude-sonnet-4-7@1`). Backends MUST refuse
64    /// if their resolved model is not on the local allowlist; the
65    /// `ReplaySummaryBackend` refuses on exact-string mismatch with the
66    /// fixture key.
67    pub model_name: String,
68    /// Pinned BLAKE3 digest (prefixed `blake3:`) of the prompt template
69    /// that will be sent. Backends MUST refuse on mismatch with their
70    /// resolved template before any call.
71    pub prompt_template_blake3: String,
72    /// Source claims in the order the decay-job manifest lists them. The
73    /// fixture and any hosted backend MAY reorder internally but MUST NOT
74    /// drop entries.
75    pub source_claims: Vec<SourceClaim>,
76    /// Optional byte budget on the produced summary. Backends MAY truncate
77    /// or refuse on exceedance; the decay path will always re-check the
78    /// produced length downstream.
79    #[serde(default, skip_serializing_if = "Option::is_none")]
80    pub max_output_bytes: Option<usize>,
81    /// Optional decay job id stamped onto the request for correlation /
82    /// fixture key derivation. The fixture backend keys on the canonical
83    /// request bytes, so two calls with the same model, prompt, and
84    /// sources collide unless this field disambiguates them.
85    #[serde(default, skip_serializing_if = "Option::is_none")]
86    pub decay_job_id: Option<String>,
87}
88
89impl SummaryRequest {
90    /// Stable BLAKE3 hash over the canonicalised request fields. The hash
91    /// is the lookup key for the [`ReplaySummaryBackend`] fixture map and
92    /// is also a useful audit-correlation hint.
93    ///
94    /// Hash domain is `cortex.llm.summary.request.v1` so a captured hash
95    /// cannot collide with the [`crate::LlmRequest::prompt_hash`] domain.
96    #[must_use]
97    pub fn request_hash(&self) -> String {
98        let canonical = CanonicalSummaryRequest {
99            domain: "cortex.llm.summary.request.v1",
100            model_name: &self.model_name,
101            prompt_template_blake3: &self.prompt_template_blake3,
102            source_claims: &self.source_claims,
103            max_output_bytes: self.max_output_bytes,
104            decay_job_id: self.decay_job_id.as_deref(),
105        };
106        let bytes =
107            serde_json::to_vec(&canonical).expect("CanonicalSummaryRequest is always serializable");
108        blake3_hex(&bytes)
109    }
110}
111
112/// Internal helper: the subset of [`SummaryRequest`] that participates in
113/// `request_hash()`. Borrowed-references-only so we never copy the payload.
114#[derive(Serialize)]
115struct CanonicalSummaryRequest<'a> {
116    domain: &'static str,
117    model_name: &'a str,
118    prompt_template_blake3: &'a str,
119    source_claims: &'a [SourceClaim],
120    #[serde(skip_serializing_if = "Option::is_none")]
121    max_output_bytes: Option<usize>,
122    #[serde(skip_serializing_if = "Option::is_none")]
123    decay_job_id: Option<&'a str>,
124}
125
126/// Result of a successful [`SummaryBackend::summarize`] call.
127#[derive(Debug, Clone, Serialize, Deserialize)]
128pub struct SummaryResponse {
129    /// The produced summary text. Must be non-empty; the decay path
130    /// re-validates this and refuses if the backend echoes an empty
131    /// claim.
132    pub claim: String,
133    /// Token-usage echo from the provider, if any.
134    #[serde(default, skip_serializing_if = "Option::is_none")]
135    pub token_usage: Option<TokenUsage>,
136    /// The model name the backend says actually produced the response.
137    /// The decay path verifies this byte-equals
138    /// [`SummaryRequest::model_name`] and refuses on mismatch (so a
139    /// silently-routed provider cannot launder an attestation pin).
140    pub model_name_echoed: String,
141}
142
143/// Errors raised by any [`SummaryBackend`] implementation.
144///
145/// Each variant displays in a grep-friendly shape (the discriminator
146/// appears verbatim in the message) so operator scripts can match on a
147/// stable contract.
148#[derive(Debug, Error, Clone, Serialize, Deserialize, PartialEq, Eq)]
149pub enum SummaryError {
150    /// No backend is wired (or the wired backend has no configured
151    /// upstream). This is the [`NoopSummaryBackend`]'s only return shape.
152    #[error("summary_backend_not_configured")]
153    BackendNotConfigured,
154    /// The request's `model_name` pin is not on the backend's local
155    /// allowlist.
156    #[error("summary_model_not_in_allowlist: {0}")]
157    ModelNotInAllowlist(String),
158    /// The request's `prompt_template_blake3` pin does not match the
159    /// backend's resolved template.
160    #[error("summary_prompt_template_mismatch: {0}")]
161    PromptTemplateMismatch(String),
162    /// The backend's upstream call failed (transport, upstream non-2xx,
163    /// timeout, etc.). The message is opaque to the trait but must be
164    /// non-empty.
165    #[error("summary_call_failed: {0}")]
166    CallFailed(String),
167    /// The backend's upstream returned a payload that failed local
168    /// structural validation (empty claim, byte budget exceeded, ...).
169    #[error("summary_output_validation_failed: {0}")]
170    OutputValidationFailed(String),
171}
172
173/// Pluggable LLM summary backend used by the Phase 4.D decay path.
174///
175/// Contract:
176///
177/// - `summarize(request)` takes a deterministic input (model_name pin,
178///   prompt_template_blake3 pin, source claims, byte budget) and returns
179///   either a summary string + token usage or a typed error.
180/// - Implementations MUST validate the `model_name` pin against their own
181///   allowlist BEFORE making any backend call.
182/// - Implementations MUST be deterministic given the same input when used
183///   in CI / fixture mode (see [`ReplaySummaryBackend`]).
184/// - Implementations MUST NOT panic on invalid inputs; the decay runner
185///   relies on every refusal returning a typed [`SummaryError`].
186///
187/// `Send + Sync + Debug` is required so the runtime can hold a
188/// `&dyn SummaryBackend` across thread boundaries.
189pub trait SummaryBackend: fmt::Debug + Send + Sync {
190    /// Issue a summary call. The implementation owns its own timeout
191    /// budget and MUST surface timeouts as
192    /// [`SummaryError::CallFailed`].
193    fn summarize(&self, request: &SummaryRequest) -> Result<SummaryResponse, SummaryError>;
194}
195
196/// Fail-closed default backend.
197///
198/// Every CLI surface that wires the decay runner today injects this so
199/// an LLM-summary job surfaces a typed `BackendNotConfigured` refusal
200/// rather than panicking. Pattern mirrors `NoopCalendarClient` in the
201/// OTS external sink.
202#[derive(Debug, Default, Clone, Copy)]
203pub struct NoopSummaryBackend;
204
205impl SummaryBackend for NoopSummaryBackend {
206    fn summarize(&self, _request: &SummaryRequest) -> Result<SummaryResponse, SummaryError> {
207        Err(SummaryError::BackendNotConfigured)
208    }
209}
210
211/// Deterministic fixture-backed summary backend for CI and tests.
212///
213/// Constructed from a `HashMap<RequestKey, SummaryResponse>` keyed by the
214/// canonical BLAKE3 of the request fields ([`SummaryRequest::request_hash`]).
215/// A lookup miss returns [`SummaryError::BackendNotConfigured`] with a
216/// detailed reason so test failure messages stay actionable.
217///
218/// Use [`ReplaySummaryBackend::from_fixture_file`] to load a JSON fixture
219/// keyed by `(model_name, prompt_template_blake3, ordered source claims,
220/// max_output_bytes, decay_job_id)`.
221#[derive(Debug, Clone)]
222pub struct ReplaySummaryBackend {
223    by_hash: HashMap<String, SummaryResponse>,
224}
225
226/// On-disk JSON shape consumed by
227/// [`ReplaySummaryBackend::from_fixture_file`]. The fixture is an array
228/// of `{ request, response }` entries; the hash key is computed at load
229/// time from the request fields.
230#[derive(Debug, Clone, Serialize, Deserialize)]
231pub struct ReplaySummaryFixture {
232    /// Entry rows in declaration order. Order is not significant —
233    /// duplicates are refused at load time.
234    pub entries: Vec<ReplaySummaryFixtureEntry>,
235}
236
237/// One row of a [`ReplaySummaryFixture`].
238#[derive(Debug, Clone, Serialize, Deserialize)]
239pub struct ReplaySummaryFixtureEntry {
240    /// Request the fixture matches against. Hashed at load time.
241    pub request: SummaryRequest,
242    /// Response the fixture returns.
243    pub response: SummaryResponse,
244}
245
246impl ReplaySummaryBackend {
247    /// Construct a new replay backend from an in-memory request map.
248    ///
249    /// Duplicate request hashes are refused: the caller has either
250    /// supplied two distinct fixtures that compute to the same request
251    /// hash (cryptographic collision — implausible) or repeated the same
252    /// fixture row by mistake (the more common case).
253    pub fn from_entries(entries: Vec<ReplaySummaryFixtureEntry>) -> Result<Self, SummaryError> {
254        let mut by_hash: HashMap<String, SummaryResponse> = HashMap::new();
255        for entry in entries {
256            let key = entry.request.request_hash();
257            if by_hash.insert(key.clone(), entry.response).is_some() {
258                return Err(SummaryError::CallFailed(format!(
259                    "duplicate replay summary fixture for request_hash={key}"
260                )));
261            }
262        }
263        Ok(Self { by_hash })
264    }
265
266    /// Load a fixture file from disk and construct the backend.
267    ///
268    /// File is a JSON document matching the [`ReplaySummaryFixture`]
269    /// shape. Errors map to [`SummaryError::CallFailed`] with a
270    /// descriptive message so CI logs are actionable.
271    pub fn from_fixture_file(path: &Path) -> Result<Self, SummaryError> {
272        let raw = fs::read_to_string(path).map_err(|err| {
273            SummaryError::CallFailed(format!("fixture `{}` not readable: {err}", path.display()))
274        })?;
275        let fixture: ReplaySummaryFixture = serde_json::from_str(&raw).map_err(|err| {
276            SummaryError::CallFailed(format!("fixture `{}` did not parse: {err}", path.display()))
277        })?;
278        Self::from_entries(fixture.entries)
279    }
280
281    /// Number of fixture entries currently held.
282    #[must_use]
283    pub fn fixture_count(&self) -> usize {
284        self.by_hash.len()
285    }
286}
287
288impl SummaryBackend for ReplaySummaryBackend {
289    fn summarize(&self, request: &SummaryRequest) -> Result<SummaryResponse, SummaryError> {
290        let key = request.request_hash();
291        match self.by_hash.get(&key) {
292            Some(response) => Ok(response.clone()),
293            None => Err(SummaryError::BackendNotConfigured),
294        }
295    }
296}
297
298#[cfg(test)]
299mod tests {
300    use super::*;
301    use std::io::Write;
302    use tempfile::TempDir;
303
304    fn sample_request(model: &str, prompt: &str, claims: &[&str]) -> SummaryRequest {
305        SummaryRequest {
306            model_name: model.into(),
307            prompt_template_blake3: prompt.into(),
308            source_claims: claims.iter().map(|s| (*s).to_string()).collect(),
309            max_output_bytes: Some(2048),
310            decay_job_id: Some("dcy_01ARZ3NDEKTSV4RRFFQ69G5FAV".into()),
311        }
312    }
313
314    fn sample_response(claim: &str, model: &str) -> SummaryResponse {
315        SummaryResponse {
316            claim: claim.into(),
317            token_usage: Some(TokenUsage {
318                prompt_tokens: 10,
319                completion_tokens: 20,
320            }),
321            model_name_echoed: model.into(),
322        }
323    }
324
325    #[test]
326    fn request_hash_is_stable_across_calls() {
327        let req = sample_request(
328            "claude-sonnet-4-7@1",
329            "blake3:00000000000000000000000000000000",
330            &["alpha", "beta"],
331        );
332        assert_eq!(req.request_hash(), req.request_hash());
333    }
334
335    #[test]
336    fn request_hash_changes_with_any_field() {
337        let base = sample_request("claude-sonnet-4-7@1", "blake3:0000", &["alpha", "beta"]);
338        let mut model_changed = base.clone();
339        model_changed.model_name = "other".into();
340        assert_ne!(base.request_hash(), model_changed.request_hash());
341
342        let mut claims_reordered = base.clone();
343        claims_reordered.source_claims = vec!["beta".into(), "alpha".into()];
344        assert_ne!(base.request_hash(), claims_reordered.request_hash());
345
346        let mut prompt_changed = base.clone();
347        prompt_changed.prompt_template_blake3 = "blake3:1111".into();
348        assert_ne!(base.request_hash(), prompt_changed.request_hash());
349
350        let mut job_changed = base.clone();
351        job_changed.decay_job_id = Some("dcy_other".into());
352        assert_ne!(base.request_hash(), job_changed.request_hash());
353    }
354
355    #[test]
356    fn noop_backend_fails_closed() {
357        let backend = NoopSummaryBackend;
358        let req = sample_request("any-model", "blake3:00", &["one"]);
359        let err = backend.summarize(&req).unwrap_err();
360        assert_eq!(err, SummaryError::BackendNotConfigured);
361        // Display must contain the discriminator token for grep tooling.
362        assert!(err.to_string().contains("summary_backend_not_configured"));
363    }
364
365    #[test]
366    fn replay_backend_round_trips_a_pre_baked_response() {
367        let req = sample_request("claude-sonnet-4-7@1", "blake3:abcd", &["alpha", "beta"]);
368        let resp = sample_response("alpha and beta", "claude-sonnet-4-7@1");
369        let backend = ReplaySummaryBackend::from_entries(vec![ReplaySummaryFixtureEntry {
370            request: req.clone(),
371            response: resp.clone(),
372        }])
373        .expect("build replay backend");
374        assert_eq!(backend.fixture_count(), 1);
375
376        let got = backend.summarize(&req).expect("hit");
377        assert_eq!(got.claim, resp.claim);
378        assert_eq!(got.model_name_echoed, resp.model_name_echoed);
379    }
380
381    #[test]
382    fn replay_backend_miss_returns_backend_not_configured() {
383        let req = sample_request("m1", "blake3:aaaa", &["alpha"]);
384        let resp = sample_response("summary", "m1");
385        let backend = ReplaySummaryBackend::from_entries(vec![ReplaySummaryFixtureEntry {
386            request: req.clone(),
387            response: resp,
388        }])
389        .expect("build replay backend");
390
391        let other = sample_request("m1", "blake3:aaaa", &["never seen"]);
392        let err = backend.summarize(&other).unwrap_err();
393        assert_eq!(err, SummaryError::BackendNotConfigured);
394    }
395
396    #[test]
397    fn replay_backend_refuses_duplicate_fixture_keys() {
398        let req = sample_request("m1", "blake3:aaaa", &["alpha"]);
399        let resp = sample_response("s1", "m1");
400        let err = ReplaySummaryBackend::from_entries(vec![
401            ReplaySummaryFixtureEntry {
402                request: req.clone(),
403                response: resp.clone(),
404            },
405            ReplaySummaryFixtureEntry {
406                request: req,
407                response: resp,
408            },
409        ])
410        .unwrap_err();
411        match err {
412            SummaryError::CallFailed(msg) => {
413                assert!(msg.contains("duplicate"), "got {msg}");
414            }
415            other => panic!("expected CallFailed, got {other:?}"),
416        }
417    }
418
419    #[test]
420    fn replay_backend_loads_from_disk_fixture_file() {
421        let tmp = TempDir::new().unwrap();
422        let path = tmp.path().join("fixture.json");
423
424        let fixture = ReplaySummaryFixture {
425            entries: vec![ReplaySummaryFixtureEntry {
426                request: sample_request("m1", "blake3:abcd", &["alpha", "beta"]),
427                response: sample_response("summary text", "m1"),
428            }],
429        };
430        let mut f = fs::File::create(&path).unwrap();
431        f.write_all(&serde_json::to_vec_pretty(&fixture).unwrap())
432            .unwrap();
433        drop(f);
434
435        let backend = ReplaySummaryBackend::from_fixture_file(&path).expect("load fixture");
436        assert_eq!(backend.fixture_count(), 1);
437        let got = backend
438            .summarize(&sample_request("m1", "blake3:abcd", &["alpha", "beta"]))
439            .expect("hit");
440        assert_eq!(got.claim, "summary text");
441    }
442
443    #[test]
444    fn replay_backend_load_missing_file_returns_call_failed() {
445        let err = ReplaySummaryBackend::from_fixture_file(Path::new("this/does/not/exist.json"))
446            .unwrap_err();
447        match err {
448            SummaryError::CallFailed(msg) => {
449                assert!(msg.contains("not readable"), "got {msg}");
450            }
451            other => panic!("expected CallFailed, got {other:?}"),
452        }
453    }
454
455    #[test]
456    fn summary_error_display_carries_discriminator_tokens() {
457        assert!(SummaryError::BackendNotConfigured
458            .to_string()
459            .contains("summary_backend_not_configured"));
460        assert!(SummaryError::ModelNotInAllowlist("m".into())
461            .to_string()
462            .contains("summary_model_not_in_allowlist"));
463        assert!(SummaryError::PromptTemplateMismatch("p".into())
464            .to_string()
465            .contains("summary_prompt_template_mismatch"));
466        assert!(SummaryError::CallFailed("c".into())
467            .to_string()
468            .contains("summary_call_failed"));
469        assert!(SummaryError::OutputValidationFailed("o".into())
470            .to_string()
471            .contains("summary_output_validation_failed"));
472    }
473}