cortex_llm/summary.rs
1//! Pluggable LLM summary backend used by the Phase 4.D decay path.
2//!
3//! Where [`crate::adapter::LlmAdapter`] is the general-purpose async LLM call
4//! surface for reflection / proof / extraction, [`SummaryBackend`] is the
5//! **narrow, synchronous** trait the decay-job pipeline talks to when it
6//! needs to compress N candidate memories or episodes down to one summary
7//! memory under operator attestation. The two surfaces are kept disjoint on
8//! purpose:
9//!
10//! - The decay path is operator-gated and pins both `model_name` and
11//! `prompt_template_blake3` in the attestation envelope; the trait's
12//! request shape carries those pins verbatim so the implementation can
13//! refuse on mismatch BEFORE making any backend call.
14//! - The decay path is synchronous (called from inside a transactional
15//! runner step); we do not need the async ceremony.
16//! - The decay path's failure modes (`BackendNotConfigured`,
17//! `ModelNotInAllowlist`, `PromptTemplateMismatch`, ...) are different
18//! in kind from the generic adapter's transport / parse / timeout
19//! alphabet — keeping them in a typed enum here means
20//! `cortex_memory::decay` can match on stable variants for grep-friendly
21//! refusal envelopes.
22//!
23//! ## Implementations shipped today
24//!
25//! - [`NoopSummaryBackend`] — fail-closed default. Every CLI surface that
26//! wires the decay runner today injects this so an LLM-summary job
27//! surfaces a typed `BackendNotConfigured` rather than panicking.
28//! - [`ReplaySummaryBackend`] — deterministic CI fixture backend. Mirrors
29//! [`crate::replay::ReplayAdapter`] but with a much simpler key shape
30//! (BLAKE3 of the canonicalised summary request fields).
31//!
32//! Hosted backends (Claude, Ollama) plug into the same trait by validating
33//! their own `model_name` allowlist and `prompt_template_blake3` before
34//! making the actual call.
35
36use std::collections::HashMap;
37use std::fmt;
38use std::fs;
39use std::path::Path;
40
41use serde::{Deserialize, Serialize};
42use thiserror::Error;
43
44use crate::adapter::{blake3_hex, TokenUsage};
45
46/// One claim or episode summary fed in as a source for compression.
47///
48/// Kept as a plain owned string so callers do not have to learn the cortex-
49/// store row shape to drive the backend; the decay-path translation layer
50/// is responsible for picking the right text (memory `claim`, episode
51/// `summary`) and forwarding it here.
52pub type SourceClaim = String;
53
54/// Input to a [`SummaryBackend::summarize`] call.
55///
56/// Field shape is deliberately small. The two pins (`model_name`,
57/// `prompt_template_blake3`) come from the operator-signed attestation
58/// envelope at `cortex_memory::decay::summary::LlmSummaryOperatorAttestationEnvelope`
59/// so the backend's allowlist check is structurally bound to the same
60/// signed authority surface.
61#[derive(Debug, Clone, Serialize, Deserialize)]
62pub struct SummaryRequest {
63 /// Pinned model name (e.g. `claude-sonnet-4-7@1`). Backends MUST refuse
64 /// if their resolved model is not on the local allowlist; the
65 /// `ReplaySummaryBackend` refuses on exact-string mismatch with the
66 /// fixture key.
67 pub model_name: String,
68 /// Pinned BLAKE3 digest (prefixed `blake3:`) of the prompt template
69 /// that will be sent. Backends MUST refuse on mismatch with their
70 /// resolved template before any call.
71 pub prompt_template_blake3: String,
72 /// Source claims in the order the decay-job manifest lists them. The
73 /// fixture and any hosted backend MAY reorder internally but MUST NOT
74 /// drop entries.
75 pub source_claims: Vec<SourceClaim>,
76 /// Optional byte budget on the produced summary. Backends MAY truncate
77 /// or refuse on exceedance; the decay path will always re-check the
78 /// produced length downstream.
79 #[serde(default, skip_serializing_if = "Option::is_none")]
80 pub max_output_bytes: Option<usize>,
81 /// Optional decay job id stamped onto the request for correlation /
82 /// fixture key derivation. The fixture backend keys on the canonical
83 /// request bytes, so two calls with the same model, prompt, and
84 /// sources collide unless this field disambiguates them.
85 #[serde(default, skip_serializing_if = "Option::is_none")]
86 pub decay_job_id: Option<String>,
87}
88
89impl SummaryRequest {
90 /// Stable BLAKE3 hash over the canonicalised request fields. The hash
91 /// is the lookup key for the [`ReplaySummaryBackend`] fixture map and
92 /// is also a useful audit-correlation hint.
93 ///
94 /// Hash domain is `cortex.llm.summary.request.v1` so a captured hash
95 /// cannot collide with the [`crate::LlmRequest::prompt_hash`] domain.
96 #[must_use]
97 pub fn request_hash(&self) -> String {
98 let canonical = CanonicalSummaryRequest {
99 domain: "cortex.llm.summary.request.v1",
100 model_name: &self.model_name,
101 prompt_template_blake3: &self.prompt_template_blake3,
102 source_claims: &self.source_claims,
103 max_output_bytes: self.max_output_bytes,
104 decay_job_id: self.decay_job_id.as_deref(),
105 };
106 let bytes =
107 serde_json::to_vec(&canonical).expect("CanonicalSummaryRequest is always serializable");
108 blake3_hex(&bytes)
109 }
110}
111
112/// Internal helper: the subset of [`SummaryRequest`] that participates in
113/// `request_hash()`. Borrowed-references-only so we never copy the payload.
114#[derive(Serialize)]
115struct CanonicalSummaryRequest<'a> {
116 domain: &'static str,
117 model_name: &'a str,
118 prompt_template_blake3: &'a str,
119 source_claims: &'a [SourceClaim],
120 #[serde(skip_serializing_if = "Option::is_none")]
121 max_output_bytes: Option<usize>,
122 #[serde(skip_serializing_if = "Option::is_none")]
123 decay_job_id: Option<&'a str>,
124}
125
126/// Result of a successful [`SummaryBackend::summarize`] call.
127#[derive(Debug, Clone, Serialize, Deserialize)]
128pub struct SummaryResponse {
129 /// The produced summary text. Must be non-empty; the decay path
130 /// re-validates this and refuses if the backend echoes an empty
131 /// claim.
132 pub claim: String,
133 /// Token-usage echo from the provider, if any.
134 #[serde(default, skip_serializing_if = "Option::is_none")]
135 pub token_usage: Option<TokenUsage>,
136 /// The model name the backend says actually produced the response.
137 /// The decay path verifies this byte-equals
138 /// [`SummaryRequest::model_name`] and refuses on mismatch (so a
139 /// silently-routed provider cannot launder an attestation pin).
140 pub model_name_echoed: String,
141}
142
143/// Errors raised by any [`SummaryBackend`] implementation.
144///
145/// Each variant displays in a grep-friendly shape (the discriminator
146/// appears verbatim in the message) so operator scripts can match on a
147/// stable contract.
148#[derive(Debug, Error, Clone, Serialize, Deserialize, PartialEq, Eq)]
149pub enum SummaryError {
150 /// No backend is wired (or the wired backend has no configured
151 /// upstream). This is the [`NoopSummaryBackend`]'s only return shape.
152 #[error("summary_backend_not_configured")]
153 BackendNotConfigured,
154 /// The request's `model_name` pin is not on the backend's local
155 /// allowlist.
156 #[error("summary_model_not_in_allowlist: {0}")]
157 ModelNotInAllowlist(String),
158 /// The request's `prompt_template_blake3` pin does not match the
159 /// backend's resolved template.
160 #[error("summary_prompt_template_mismatch: {0}")]
161 PromptTemplateMismatch(String),
162 /// The backend's upstream call failed (transport, upstream non-2xx,
163 /// timeout, etc.). The message is opaque to the trait but must be
164 /// non-empty.
165 #[error("summary_call_failed: {0}")]
166 CallFailed(String),
167 /// The backend's upstream returned a payload that failed local
168 /// structural validation (empty claim, byte budget exceeded, ...).
169 #[error("summary_output_validation_failed: {0}")]
170 OutputValidationFailed(String),
171}
172
173/// Pluggable LLM summary backend used by the Phase 4.D decay path.
174///
175/// Contract:
176///
177/// - `summarize(request)` takes a deterministic input (model_name pin,
178/// prompt_template_blake3 pin, source claims, byte budget) and returns
179/// either a summary string + token usage or a typed error.
180/// - Implementations MUST validate the `model_name` pin against their own
181/// allowlist BEFORE making any backend call.
182/// - Implementations MUST be deterministic given the same input when used
183/// in CI / fixture mode (see [`ReplaySummaryBackend`]).
184/// - Implementations MUST NOT panic on invalid inputs; the decay runner
185/// relies on every refusal returning a typed [`SummaryError`].
186///
187/// `Send + Sync + Debug` is required so the runtime can hold a
188/// `&dyn SummaryBackend` across thread boundaries.
189pub trait SummaryBackend: fmt::Debug + Send + Sync {
190 /// Issue a summary call. The implementation owns its own timeout
191 /// budget and MUST surface timeouts as
192 /// [`SummaryError::CallFailed`].
193 fn summarize(&self, request: &SummaryRequest) -> Result<SummaryResponse, SummaryError>;
194}
195
196/// Fail-closed default backend.
197///
198/// Every CLI surface that wires the decay runner today injects this so
199/// an LLM-summary job surfaces a typed `BackendNotConfigured` refusal
200/// rather than panicking. Pattern mirrors `NoopCalendarClient` in the
201/// OTS external sink.
202#[derive(Debug, Default, Clone, Copy)]
203pub struct NoopSummaryBackend;
204
205impl SummaryBackend for NoopSummaryBackend {
206 fn summarize(&self, _request: &SummaryRequest) -> Result<SummaryResponse, SummaryError> {
207 Err(SummaryError::BackendNotConfigured)
208 }
209}
210
211/// Deterministic fixture-backed summary backend for CI and tests.
212///
213/// Constructed from a `HashMap<RequestKey, SummaryResponse>` keyed by the
214/// canonical BLAKE3 of the request fields ([`SummaryRequest::request_hash`]).
215/// A lookup miss returns [`SummaryError::BackendNotConfigured`] with a
216/// detailed reason so test failure messages stay actionable.
217///
218/// Use [`ReplaySummaryBackend::from_fixture_file`] to load a JSON fixture
219/// keyed by `(model_name, prompt_template_blake3, ordered source claims,
220/// max_output_bytes, decay_job_id)`.
221#[derive(Debug, Clone)]
222pub struct ReplaySummaryBackend {
223 by_hash: HashMap<String, SummaryResponse>,
224}
225
226/// On-disk JSON shape consumed by
227/// [`ReplaySummaryBackend::from_fixture_file`]. The fixture is an array
228/// of `{ request, response }` entries; the hash key is computed at load
229/// time from the request fields.
230#[derive(Debug, Clone, Serialize, Deserialize)]
231pub struct ReplaySummaryFixture {
232 /// Entry rows in declaration order. Order is not significant —
233 /// duplicates are refused at load time.
234 pub entries: Vec<ReplaySummaryFixtureEntry>,
235}
236
237/// One row of a [`ReplaySummaryFixture`].
238#[derive(Debug, Clone, Serialize, Deserialize)]
239pub struct ReplaySummaryFixtureEntry {
240 /// Request the fixture matches against. Hashed at load time.
241 pub request: SummaryRequest,
242 /// Response the fixture returns.
243 pub response: SummaryResponse,
244}
245
246impl ReplaySummaryBackend {
247 /// Construct a new replay backend from an in-memory request map.
248 ///
249 /// Duplicate request hashes are refused: the caller has either
250 /// supplied two distinct fixtures that compute to the same request
251 /// hash (cryptographic collision — implausible) or repeated the same
252 /// fixture row by mistake (the more common case).
253 pub fn from_entries(entries: Vec<ReplaySummaryFixtureEntry>) -> Result<Self, SummaryError> {
254 let mut by_hash: HashMap<String, SummaryResponse> = HashMap::new();
255 for entry in entries {
256 let key = entry.request.request_hash();
257 if by_hash.insert(key.clone(), entry.response).is_some() {
258 return Err(SummaryError::CallFailed(format!(
259 "duplicate replay summary fixture for request_hash={key}"
260 )));
261 }
262 }
263 Ok(Self { by_hash })
264 }
265
266 /// Load a fixture file from disk and construct the backend.
267 ///
268 /// File is a JSON document matching the [`ReplaySummaryFixture`]
269 /// shape. Errors map to [`SummaryError::CallFailed`] with a
270 /// descriptive message so CI logs are actionable.
271 pub fn from_fixture_file(path: &Path) -> Result<Self, SummaryError> {
272 let raw = fs::read_to_string(path).map_err(|err| {
273 SummaryError::CallFailed(format!("fixture `{}` not readable: {err}", path.display()))
274 })?;
275 let fixture: ReplaySummaryFixture = serde_json::from_str(&raw).map_err(|err| {
276 SummaryError::CallFailed(format!("fixture `{}` did not parse: {err}", path.display()))
277 })?;
278 Self::from_entries(fixture.entries)
279 }
280
281 /// Number of fixture entries currently held.
282 #[must_use]
283 pub fn fixture_count(&self) -> usize {
284 self.by_hash.len()
285 }
286}
287
288impl SummaryBackend for ReplaySummaryBackend {
289 fn summarize(&self, request: &SummaryRequest) -> Result<SummaryResponse, SummaryError> {
290 let key = request.request_hash();
291 match self.by_hash.get(&key) {
292 Some(response) => Ok(response.clone()),
293 None => Err(SummaryError::BackendNotConfigured),
294 }
295 }
296}
297
298#[cfg(test)]
299mod tests {
300 use super::*;
301 use std::io::Write;
302 use tempfile::TempDir;
303
304 fn sample_request(model: &str, prompt: &str, claims: &[&str]) -> SummaryRequest {
305 SummaryRequest {
306 model_name: model.into(),
307 prompt_template_blake3: prompt.into(),
308 source_claims: claims.iter().map(|s| (*s).to_string()).collect(),
309 max_output_bytes: Some(2048),
310 decay_job_id: Some("dcy_01ARZ3NDEKTSV4RRFFQ69G5FAV".into()),
311 }
312 }
313
314 fn sample_response(claim: &str, model: &str) -> SummaryResponse {
315 SummaryResponse {
316 claim: claim.into(),
317 token_usage: Some(TokenUsage {
318 prompt_tokens: 10,
319 completion_tokens: 20,
320 }),
321 model_name_echoed: model.into(),
322 }
323 }
324
325 #[test]
326 fn request_hash_is_stable_across_calls() {
327 let req = sample_request(
328 "claude-sonnet-4-7@1",
329 "blake3:00000000000000000000000000000000",
330 &["alpha", "beta"],
331 );
332 assert_eq!(req.request_hash(), req.request_hash());
333 }
334
335 #[test]
336 fn request_hash_changes_with_any_field() {
337 let base = sample_request("claude-sonnet-4-7@1", "blake3:0000", &["alpha", "beta"]);
338 let mut model_changed = base.clone();
339 model_changed.model_name = "other".into();
340 assert_ne!(base.request_hash(), model_changed.request_hash());
341
342 let mut claims_reordered = base.clone();
343 claims_reordered.source_claims = vec!["beta".into(), "alpha".into()];
344 assert_ne!(base.request_hash(), claims_reordered.request_hash());
345
346 let mut prompt_changed = base.clone();
347 prompt_changed.prompt_template_blake3 = "blake3:1111".into();
348 assert_ne!(base.request_hash(), prompt_changed.request_hash());
349
350 let mut job_changed = base.clone();
351 job_changed.decay_job_id = Some("dcy_other".into());
352 assert_ne!(base.request_hash(), job_changed.request_hash());
353 }
354
355 #[test]
356 fn noop_backend_fails_closed() {
357 let backend = NoopSummaryBackend;
358 let req = sample_request("any-model", "blake3:00", &["one"]);
359 let err = backend.summarize(&req).unwrap_err();
360 assert_eq!(err, SummaryError::BackendNotConfigured);
361 // Display must contain the discriminator token for grep tooling.
362 assert!(err.to_string().contains("summary_backend_not_configured"));
363 }
364
365 #[test]
366 fn replay_backend_round_trips_a_pre_baked_response() {
367 let req = sample_request("claude-sonnet-4-7@1", "blake3:abcd", &["alpha", "beta"]);
368 let resp = sample_response("alpha and beta", "claude-sonnet-4-7@1");
369 let backend = ReplaySummaryBackend::from_entries(vec![ReplaySummaryFixtureEntry {
370 request: req.clone(),
371 response: resp.clone(),
372 }])
373 .expect("build replay backend");
374 assert_eq!(backend.fixture_count(), 1);
375
376 let got = backend.summarize(&req).expect("hit");
377 assert_eq!(got.claim, resp.claim);
378 assert_eq!(got.model_name_echoed, resp.model_name_echoed);
379 }
380
381 #[test]
382 fn replay_backend_miss_returns_backend_not_configured() {
383 let req = sample_request("m1", "blake3:aaaa", &["alpha"]);
384 let resp = sample_response("summary", "m1");
385 let backend = ReplaySummaryBackend::from_entries(vec![ReplaySummaryFixtureEntry {
386 request: req.clone(),
387 response: resp,
388 }])
389 .expect("build replay backend");
390
391 let other = sample_request("m1", "blake3:aaaa", &["never seen"]);
392 let err = backend.summarize(&other).unwrap_err();
393 assert_eq!(err, SummaryError::BackendNotConfigured);
394 }
395
396 #[test]
397 fn replay_backend_refuses_duplicate_fixture_keys() {
398 let req = sample_request("m1", "blake3:aaaa", &["alpha"]);
399 let resp = sample_response("s1", "m1");
400 let err = ReplaySummaryBackend::from_entries(vec![
401 ReplaySummaryFixtureEntry {
402 request: req.clone(),
403 response: resp.clone(),
404 },
405 ReplaySummaryFixtureEntry {
406 request: req,
407 response: resp,
408 },
409 ])
410 .unwrap_err();
411 match err {
412 SummaryError::CallFailed(msg) => {
413 assert!(msg.contains("duplicate"), "got {msg}");
414 }
415 other => panic!("expected CallFailed, got {other:?}"),
416 }
417 }
418
419 #[test]
420 fn replay_backend_loads_from_disk_fixture_file() {
421 let tmp = TempDir::new().unwrap();
422 let path = tmp.path().join("fixture.json");
423
424 let fixture = ReplaySummaryFixture {
425 entries: vec![ReplaySummaryFixtureEntry {
426 request: sample_request("m1", "blake3:abcd", &["alpha", "beta"]),
427 response: sample_response("summary text", "m1"),
428 }],
429 };
430 let mut f = fs::File::create(&path).unwrap();
431 f.write_all(&serde_json::to_vec_pretty(&fixture).unwrap())
432 .unwrap();
433 drop(f);
434
435 let backend = ReplaySummaryBackend::from_fixture_file(&path).expect("load fixture");
436 assert_eq!(backend.fixture_count(), 1);
437 let got = backend
438 .summarize(&sample_request("m1", "blake3:abcd", &["alpha", "beta"]))
439 .expect("hit");
440 assert_eq!(got.claim, "summary text");
441 }
442
443 #[test]
444 fn replay_backend_load_missing_file_returns_call_failed() {
445 let err = ReplaySummaryBackend::from_fixture_file(Path::new("this/does/not/exist.json"))
446 .unwrap_err();
447 match err {
448 SummaryError::CallFailed(msg) => {
449 assert!(msg.contains("not readable"), "got {msg}");
450 }
451 other => panic!("expected CallFailed, got {other:?}"),
452 }
453 }
454
455 #[test]
456 fn summary_error_display_carries_discriminator_tokens() {
457 assert!(SummaryError::BackendNotConfigured
458 .to_string()
459 .contains("summary_backend_not_configured"));
460 assert!(SummaryError::ModelNotInAllowlist("m".into())
461 .to_string()
462 .contains("summary_model_not_in_allowlist"));
463 assert!(SummaryError::PromptTemplateMismatch("p".into())
464 .to_string()
465 .contains("summary_prompt_template_mismatch"));
466 assert!(SummaryError::CallFailed("c".into())
467 .to_string()
468 .contains("summary_call_failed"));
469 assert!(SummaryError::OutputValidationFailed("o".into())
470 .to_string()
471 .contains("summary_output_validation_failed"));
472 }
473}