Skip to main content

cortex_llm/
claude_summary.rs

1//! [`ClaudeSummaryBackend`] — Anthropic-backed implementation of
2//! [`SummaryBackend`] for the Phase 4.D decay LLM-summary path.
3//!
4//! Uses [`ClaudeHttpAdapter`] for the actual HTTP call (ureq inside
5//! `spawn_blocking`) and bridges the synchronous [`SummaryBackend`] contract
6//! by spinning up a `new_current_thread` Tokio runtime for each call. The
7//! decay runner is called from synchronous transactional context so the
8//! runtime-per-call pattern is correct here.
9//!
10//! ## Construction
11//!
12//! Construction requires `CORTEX_CLAUDE_API_KEY` to be set. Absence returns
13//! [`SummaryError::BackendNotConfigured`] immediately so the caller can
14//! decide whether to fall back to [`NoopSummaryBackend`] or surface the error.
15//!
16//! ## Prompt template
17//!
18//! The prompt template used for every call is:
19//!
20//! ```text
21//! Summarize the following memory claims into a single concise claim. Claims:
22//! - <claim 1>
23//! - <claim 2>
24//! ...
25//!
26//! Summary:
27//! ```
28//!
29//! The BLAKE3 of this template is checked against
30//! [`SummaryRequest::prompt_template_blake3`] before any call is made.
31//!
32//! ## Output validation
33//!
34//! After a successful call the backend checks:
35//! 1. The response text is non-empty.
36//! 2. The byte length does not exceed `max_output_bytes` (4096 by default,
37//!    overridable via the `SummaryRequest::max_output_bytes` field).
38//! 3. The model name echoed by Anthropic byte-equals `request.model_name`.
39
40use crate::adapter::{blake3_hex, LlmAdapter, LlmMessage, LlmRequest, LlmRole};
41use crate::claude_http::ClaudeHttpAdapter;
42use crate::sensitivity::MaxSensitivity;
43use crate::summary::{SummaryBackend, SummaryError, SummaryRequest, SummaryResponse};
44use crate::TokenUsage;
45
46/// Prompt template fed to Claude for every summary call.
47///
48/// The BLAKE3 digest of this string (as UTF-8 bytes) is the canonical
49/// `prompt_template_blake3` pin that operator attestation envelopes must carry
50/// when targeting this backend.
51const SUMMARY_PROMPT_TEMPLATE: &str =
52    "Summarize the following memory claims into a single concise claim. Claims:\n{claims}\n\nSummary:";
53
54/// Default byte budget for the produced summary text.
55const DEFAULT_MAX_OUTPUT_BYTES: usize = 4096;
56
57/// Maximum tokens requested from the Anthropic API per summary call. Generous
58/// ceiling — the actual output is bounded by `max_output_bytes` on the
59/// response side.
60const SUMMARY_MAX_TOKENS: u32 = 1024;
61
62/// Default timeout for summary calls in milliseconds.
63const SUMMARY_TIMEOUT_MS: u64 = 60_000;
64
65/// Returns the canonical BLAKE3 digest of [`SUMMARY_PROMPT_TEMPLATE`].
66///
67/// Use this to generate the correct `prompt_template_blake3` pin for operator
68/// attestation envelopes that target `ClaudeSummaryBackend`.
69#[must_use]
70pub fn canonical_prompt_template_blake3() -> String {
71    format!("blake3:{}", blake3_hex(SUMMARY_PROMPT_TEMPLATE.as_bytes()))
72}
73
74/// Anthropic-backed [`SummaryBackend`] for the Phase 4.D decay path.
75///
76/// See the module documentation for construction, prompt shape, and output
77/// validation rules.
78#[derive(Debug, Clone)]
79pub struct ClaudeSummaryBackend {
80    adapter: ClaudeHttpAdapter,
81    max_output_bytes: usize,
82}
83
84impl ClaudeSummaryBackend {
85    /// Construct a `ClaudeSummaryBackend` for `model` with `max_sensitivity`.
86    ///
87    /// Returns [`SummaryError::BackendNotConfigured`] when
88    /// `CORTEX_CLAUDE_API_KEY` is absent or empty. Returns
89    /// [`SummaryError::CallFailed`] for model validation errors (empty,
90    /// contains `"latest"`, etc.).
91    pub fn new(
92        model: String,
93        max_sensitivity: Option<MaxSensitivity>,
94    ) -> Result<Self, SummaryError> {
95        let adapter = ClaudeHttpAdapter::new(model, max_sensitivity).map_err(|e| {
96            let msg = e.to_string();
97            // Distinguish missing API key (BackendNotConfigured, recoverable
98            // by wiring the env var) from model-shape errors (CallFailed,
99            // operator must fix the model string).
100            if msg.contains(ClaudeHttpAdapter::ANTHROPIC_API_KEY_ENV) {
101                SummaryError::BackendNotConfigured
102            } else {
103                SummaryError::CallFailed(msg)
104            }
105        })?;
106        Ok(Self {
107            adapter,
108            max_output_bytes: DEFAULT_MAX_OUTPUT_BYTES,
109        })
110    }
111}
112
113impl SummaryBackend for ClaudeSummaryBackend {
114    fn summarize(&self, request: &SummaryRequest) -> Result<SummaryResponse, SummaryError> {
115        // --- Prompt template pin check ---
116        let expected_blake3 = canonical_prompt_template_blake3();
117        if request.prompt_template_blake3 != expected_blake3 {
118            return Err(SummaryError::PromptTemplateMismatch(format!(
119                "request pin `{}` != backend template `{}`",
120                request.prompt_template_blake3, expected_blake3,
121            )));
122        }
123
124        // --- Build the prompt from source claims ---
125        let claims_joined = request
126            .source_claims
127            .iter()
128            .map(|c| format!("- {c}"))
129            .collect::<Vec<_>>()
130            .join("\n");
131        let prompt_text = SUMMARY_PROMPT_TEMPLATE.replace("{claims}", &claims_joined);
132
133        // --- Determine effective byte budget ---
134        let byte_budget = request
135            .max_output_bytes
136            .unwrap_or(self.max_output_bytes)
137            .min(self.max_output_bytes);
138
139        // --- Construct the LlmRequest ---
140        let llm_req = LlmRequest {
141            model: request.model_name.clone(),
142            system: String::new(),
143            messages: vec![LlmMessage {
144                role: LlmRole::User,
145                content: prompt_text,
146            }],
147            temperature: 0.0,
148            max_tokens: SUMMARY_MAX_TOKENS,
149            json_schema: None,
150            timeout_ms: SUMMARY_TIMEOUT_MS,
151        };
152
153        // --- Drive the async adapter synchronously ---
154        let rt = tokio::runtime::Builder::new_current_thread()
155            .enable_all()
156            .build()
157            .map_err(|e| {
158                SummaryError::CallFailed(format!("tokio runtime construction failed: {e}"))
159            })?;
160
161        let llm_resp = rt
162            .block_on(self.adapter.complete(llm_req))
163            .map_err(|e| SummaryError::CallFailed(e.to_string()))?;
164
165        // --- Model name echo check ---
166        if llm_resp.model != request.model_name {
167            return Err(SummaryError::OutputValidationFailed(format!(
168                "provider echoed model `{}` but request pinned `{}`",
169                llm_resp.model, request.model_name,
170            )));
171        }
172
173        // --- Output validation ---
174        if llm_resp.text.is_empty() {
175            return Err(SummaryError::OutputValidationFailed(
176                "provider returned an empty summary".to_string(),
177            ));
178        }
179        if llm_resp.text.len() > byte_budget {
180            return Err(SummaryError::OutputValidationFailed(format!(
181                "summary byte length {} exceeds budget {}",
182                llm_resp.text.len(),
183                byte_budget,
184            )));
185        }
186
187        let token_usage = llm_resp.usage.map(|u| TokenUsage {
188            prompt_tokens: u.prompt_tokens,
189            completion_tokens: u.completion_tokens,
190        });
191
192        Ok(SummaryResponse {
193            claim: llm_resp.text,
194            token_usage,
195            model_name_echoed: llm_resp.model,
196        })
197    }
198}
199
200#[cfg(test)]
201mod tests {
202    use super::*;
203    use std::io::{BufRead, BufReader, Write};
204    use std::net::TcpListener;
205
206    /// Verify that construction fails closed with `BackendNotConfigured` when
207    /// `CORTEX_CLAUDE_API_KEY` is absent.
208    #[test]
209    fn claude_summary_backend_fails_without_api_key() {
210        // Temporarily unset the env var for this test. We restore it after.
211        let saved = std::env::var(ClaudeHttpAdapter::ANTHROPIC_API_KEY_ENV).ok();
212        std::env::remove_var(ClaudeHttpAdapter::ANTHROPIC_API_KEY_ENV);
213
214        let result = ClaudeSummaryBackend::new("claude-3-5-sonnet-20241022".into(), None);
215
216        // Restore before any assertion (test hygiene).
217        if let Some(key) = saved {
218            std::env::set_var(ClaudeHttpAdapter::ANTHROPIC_API_KEY_ENV, key);
219        }
220
221        match result {
222            Err(SummaryError::BackendNotConfigured) => {}
223            other => panic!("expected BackendNotConfigured, got {other:?}"),
224        }
225    }
226
227    /// Verify end-to-end summarise via a mock TCP server that returns a minimal
228    /// Anthropic Messages API response.
229    #[test]
230    fn claude_summary_backend_summarizes_via_mock_adapter() {
231        // Bind on loopback before spawning the server thread so the port is
232        // known before the client thread starts.
233        let listener = TcpListener::bind("127.0.0.1:0").expect("bind mock server");
234        let addr = listener.local_addr().expect("local addr");
235
236        // Minimal Anthropic response body.
237        let summary_text = "Alpha and beta are combined into a single claim.";
238        let response_body = format!(
239            r#"{{"id":"msg_01","type":"message","role":"assistant","content":[{{"type":"text","text":"{summary_text}"}}],"model":"claude-3-5-sonnet-20241022","stop_reason":"end_turn","usage":{{"input_tokens":30,"output_tokens":12}}}}"#
240        );
241        let http_response = format!(
242            "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\n\r\n{}",
243            response_body.len(),
244            response_body
245        );
246
247        // Spawn a single-request mock server.
248        let server_thread = std::thread::spawn(move || {
249            let (mut stream, _) = listener.accept().expect("accept");
250            // Drain the request headers.
251            let mut reader = BufReader::new(stream.try_clone().expect("clone stream"));
252            let mut line = String::new();
253            loop {
254                line.clear();
255                reader.read_line(&mut line).expect("read line");
256                if line == "\r\n" || line.is_empty() {
257                    break;
258                }
259            }
260            // Read the body (Content-Length is set by ureq; we just drain).
261            // For simplicity consume up to 4096 bytes.
262            let mut buf = vec![0u8; 4096];
263            let _ = std::io::Read::read(&mut reader, &mut buf);
264
265            stream
266                .write_all(http_response.as_bytes())
267                .expect("write response");
268        });
269
270        // Construct the backend pointed at our mock server.
271        std::env::set_var(ClaudeHttpAdapter::ANTHROPIC_API_KEY_ENV, "test-key-mock");
272        let backend = ClaudeSummaryBackend {
273            adapter: ClaudeHttpAdapter::new_with_base_url(
274                "claude-3-5-sonnet-20241022".into(),
275                format!("http://{addr}"),
276                None,
277            )
278            .expect("build adapter"),
279            max_output_bytes: DEFAULT_MAX_OUTPUT_BYTES,
280        };
281
282        let request = SummaryRequest {
283            model_name: "claude-3-5-sonnet-20241022".into(),
284            prompt_template_blake3: canonical_prompt_template_blake3(),
285            source_claims: vec!["alpha".to_string(), "beta".to_string()],
286            max_output_bytes: None,
287            decay_job_id: Some("dcy_test".into()),
288        };
289
290        let resp = backend.summarize(&request).expect("summarize");
291
292        server_thread.join().expect("server thread joined");
293
294        assert_eq!(resp.claim, summary_text);
295        assert_eq!(resp.model_name_echoed, "claude-3-5-sonnet-20241022");
296        assert!(resp.token_usage.is_some());
297    }
298}