cortex_llm/
claude_summary.rs1use crate::adapter::{blake3_hex, LlmAdapter, LlmMessage, LlmRequest, LlmRole};
41use crate::claude_http::ClaudeHttpAdapter;
42use crate::sensitivity::MaxSensitivity;
43use crate::summary::{SummaryBackend, SummaryError, SummaryRequest, SummaryResponse};
44use crate::TokenUsage;
45
46const SUMMARY_PROMPT_TEMPLATE: &str =
52 "Summarize the following memory claims into a single concise claim. Claims:\n{claims}\n\nSummary:";
53
54const DEFAULT_MAX_OUTPUT_BYTES: usize = 4096;
56
57const SUMMARY_MAX_TOKENS: u32 = 1024;
61
62const SUMMARY_TIMEOUT_MS: u64 = 60_000;
64
65#[must_use]
70pub fn canonical_prompt_template_blake3() -> String {
71 format!("blake3:{}", blake3_hex(SUMMARY_PROMPT_TEMPLATE.as_bytes()))
72}
73
74#[derive(Debug, Clone)]
79pub struct ClaudeSummaryBackend {
80 adapter: ClaudeHttpAdapter,
81 max_output_bytes: usize,
82}
83
84impl ClaudeSummaryBackend {
85 pub fn new(
92 model: String,
93 max_sensitivity: Option<MaxSensitivity>,
94 ) -> Result<Self, SummaryError> {
95 let adapter = ClaudeHttpAdapter::new(model, max_sensitivity).map_err(|e| {
96 let msg = e.to_string();
97 if msg.contains(ClaudeHttpAdapter::ANTHROPIC_API_KEY_ENV) {
101 SummaryError::BackendNotConfigured
102 } else {
103 SummaryError::CallFailed(msg)
104 }
105 })?;
106 Ok(Self {
107 adapter,
108 max_output_bytes: DEFAULT_MAX_OUTPUT_BYTES,
109 })
110 }
111}
112
113impl SummaryBackend for ClaudeSummaryBackend {
114 fn summarize(&self, request: &SummaryRequest) -> Result<SummaryResponse, SummaryError> {
115 let expected_blake3 = canonical_prompt_template_blake3();
117 if request.prompt_template_blake3 != expected_blake3 {
118 return Err(SummaryError::PromptTemplateMismatch(format!(
119 "request pin `{}` != backend template `{}`",
120 request.prompt_template_blake3, expected_blake3,
121 )));
122 }
123
124 let claims_joined = request
126 .source_claims
127 .iter()
128 .map(|c| format!("- {c}"))
129 .collect::<Vec<_>>()
130 .join("\n");
131 let prompt_text = SUMMARY_PROMPT_TEMPLATE.replace("{claims}", &claims_joined);
132
133 let byte_budget = request
135 .max_output_bytes
136 .unwrap_or(self.max_output_bytes)
137 .min(self.max_output_bytes);
138
139 let llm_req = LlmRequest {
141 model: request.model_name.clone(),
142 system: String::new(),
143 messages: vec![LlmMessage {
144 role: LlmRole::User,
145 content: prompt_text,
146 }],
147 temperature: 0.0,
148 max_tokens: SUMMARY_MAX_TOKENS,
149 json_schema: None,
150 timeout_ms: SUMMARY_TIMEOUT_MS,
151 };
152
153 let rt = tokio::runtime::Builder::new_current_thread()
155 .enable_all()
156 .build()
157 .map_err(|e| {
158 SummaryError::CallFailed(format!("tokio runtime construction failed: {e}"))
159 })?;
160
161 let llm_resp = rt
162 .block_on(self.adapter.complete(llm_req))
163 .map_err(|e| SummaryError::CallFailed(e.to_string()))?;
164
165 if llm_resp.model != request.model_name {
167 return Err(SummaryError::OutputValidationFailed(format!(
168 "provider echoed model `{}` but request pinned `{}`",
169 llm_resp.model, request.model_name,
170 )));
171 }
172
173 if llm_resp.text.is_empty() {
175 return Err(SummaryError::OutputValidationFailed(
176 "provider returned an empty summary".to_string(),
177 ));
178 }
179 if llm_resp.text.len() > byte_budget {
180 return Err(SummaryError::OutputValidationFailed(format!(
181 "summary byte length {} exceeds budget {}",
182 llm_resp.text.len(),
183 byte_budget,
184 )));
185 }
186
187 let token_usage = llm_resp.usage.map(|u| TokenUsage {
188 prompt_tokens: u.prompt_tokens,
189 completion_tokens: u.completion_tokens,
190 });
191
192 Ok(SummaryResponse {
193 claim: llm_resp.text,
194 token_usage,
195 model_name_echoed: llm_resp.model,
196 })
197 }
198}
199
200#[cfg(test)]
201mod tests {
202 use super::*;
203 use std::io::{BufRead, BufReader, Write};
204 use std::net::TcpListener;
205
206 #[test]
209 fn claude_summary_backend_fails_without_api_key() {
210 let saved = std::env::var(ClaudeHttpAdapter::ANTHROPIC_API_KEY_ENV).ok();
212 std::env::remove_var(ClaudeHttpAdapter::ANTHROPIC_API_KEY_ENV);
213
214 let result = ClaudeSummaryBackend::new("claude-3-5-sonnet-20241022".into(), None);
215
216 if let Some(key) = saved {
218 std::env::set_var(ClaudeHttpAdapter::ANTHROPIC_API_KEY_ENV, key);
219 }
220
221 match result {
222 Err(SummaryError::BackendNotConfigured) => {}
223 other => panic!("expected BackendNotConfigured, got {other:?}"),
224 }
225 }
226
227 #[test]
230 fn claude_summary_backend_summarizes_via_mock_adapter() {
231 let listener = TcpListener::bind("127.0.0.1:0").expect("bind mock server");
234 let addr = listener.local_addr().expect("local addr");
235
236 let summary_text = "Alpha and beta are combined into a single claim.";
238 let response_body = format!(
239 r#"{{"id":"msg_01","type":"message","role":"assistant","content":[{{"type":"text","text":"{summary_text}"}}],"model":"claude-3-5-sonnet-20241022","stop_reason":"end_turn","usage":{{"input_tokens":30,"output_tokens":12}}}}"#
240 );
241 let http_response = format!(
242 "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\n\r\n{}",
243 response_body.len(),
244 response_body
245 );
246
247 let server_thread = std::thread::spawn(move || {
249 let (mut stream, _) = listener.accept().expect("accept");
250 let mut reader = BufReader::new(stream.try_clone().expect("clone stream"));
252 let mut line = String::new();
253 loop {
254 line.clear();
255 reader.read_line(&mut line).expect("read line");
256 if line == "\r\n" || line.is_empty() {
257 break;
258 }
259 }
260 let mut buf = vec![0u8; 4096];
263 let _ = std::io::Read::read(&mut reader, &mut buf);
264
265 stream
266 .write_all(http_response.as_bytes())
267 .expect("write response");
268 });
269
270 std::env::set_var(ClaudeHttpAdapter::ANTHROPIC_API_KEY_ENV, "test-key-mock");
272 let backend = ClaudeSummaryBackend {
273 adapter: ClaudeHttpAdapter::new_with_base_url(
274 "claude-3-5-sonnet-20241022".into(),
275 format!("http://{addr}"),
276 None,
277 )
278 .expect("build adapter"),
279 max_output_bytes: DEFAULT_MAX_OUTPUT_BYTES,
280 };
281
282 let request = SummaryRequest {
283 model_name: "claude-3-5-sonnet-20241022".into(),
284 prompt_template_blake3: canonical_prompt_template_blake3(),
285 source_claims: vec!["alpha".to_string(), "beta".to_string()],
286 max_output_bytes: None,
287 decay_job_id: Some("dcy_test".into()),
288 };
289
290 let resp = backend.summarize(&request).expect("summarize");
291
292 server_thread.join().expect("server thread joined");
293
294 assert_eq!(resp.claim, summary_text);
295 assert_eq!(resp.model_name_echoed, "claude-3-5-sonnet-20241022");
296 assert!(resp.token_usage.is_some());
297 }
298}