use crate::adapter::{blake3_hex, LlmAdapter, LlmMessage, LlmRequest, LlmRole};
use crate::claude_http::ClaudeHttpAdapter;
use crate::sensitivity::MaxSensitivity;
use crate::summary::{SummaryBackend, SummaryError, SummaryRequest, SummaryResponse};
use crate::TokenUsage;
const SUMMARY_PROMPT_TEMPLATE: &str =
"Summarize the following memory claims into a single concise claim. Claims:\n{claims}\n\nSummary:";
const DEFAULT_MAX_OUTPUT_BYTES: usize = 4096;
const SUMMARY_MAX_TOKENS: u32 = 1024;
const SUMMARY_TIMEOUT_MS: u64 = 60_000;
#[must_use]
pub fn canonical_prompt_template_blake3() -> String {
format!("blake3:{}", blake3_hex(SUMMARY_PROMPT_TEMPLATE.as_bytes()))
}
#[derive(Debug, Clone)]
pub struct ClaudeSummaryBackend {
adapter: ClaudeHttpAdapter,
max_output_bytes: usize,
}
impl ClaudeSummaryBackend {
pub fn new(
model: String,
max_sensitivity: Option<MaxSensitivity>,
) -> Result<Self, SummaryError> {
let adapter = ClaudeHttpAdapter::new(model, max_sensitivity).map_err(|e| {
let msg = e.to_string();
if msg.contains(ClaudeHttpAdapter::ANTHROPIC_API_KEY_ENV) {
SummaryError::BackendNotConfigured
} else {
SummaryError::CallFailed(msg)
}
})?;
Ok(Self {
adapter,
max_output_bytes: DEFAULT_MAX_OUTPUT_BYTES,
})
}
}
impl SummaryBackend for ClaudeSummaryBackend {
fn summarize(&self, request: &SummaryRequest) -> Result<SummaryResponse, SummaryError> {
let expected_blake3 = canonical_prompt_template_blake3();
if request.prompt_template_blake3 != expected_blake3 {
return Err(SummaryError::PromptTemplateMismatch(format!(
"request pin `{}` != backend template `{}`",
request.prompt_template_blake3, expected_blake3,
)));
}
let claims_joined = request
.source_claims
.iter()
.map(|c| format!("- {c}"))
.collect::<Vec<_>>()
.join("\n");
let prompt_text = SUMMARY_PROMPT_TEMPLATE.replace("{claims}", &claims_joined);
let byte_budget = request
.max_output_bytes
.unwrap_or(self.max_output_bytes)
.min(self.max_output_bytes);
let llm_req = LlmRequest {
model: request.model_name.clone(),
system: String::new(),
messages: vec![LlmMessage {
role: LlmRole::User,
content: prompt_text,
}],
temperature: 0.0,
max_tokens: SUMMARY_MAX_TOKENS,
json_schema: None,
timeout_ms: SUMMARY_TIMEOUT_MS,
};
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.map_err(|e| {
SummaryError::CallFailed(format!("tokio runtime construction failed: {e}"))
})?;
let llm_resp = rt
.block_on(self.adapter.complete(llm_req))
.map_err(|e| SummaryError::CallFailed(e.to_string()))?;
if llm_resp.model != request.model_name {
return Err(SummaryError::OutputValidationFailed(format!(
"provider echoed model `{}` but request pinned `{}`",
llm_resp.model, request.model_name,
)));
}
if llm_resp.text.is_empty() {
return Err(SummaryError::OutputValidationFailed(
"provider returned an empty summary".to_string(),
));
}
if llm_resp.text.len() > byte_budget {
return Err(SummaryError::OutputValidationFailed(format!(
"summary byte length {} exceeds budget {}",
llm_resp.text.len(),
byte_budget,
)));
}
let token_usage = llm_resp.usage.map(|u| TokenUsage {
prompt_tokens: u.prompt_tokens,
completion_tokens: u.completion_tokens,
});
Ok(SummaryResponse {
claim: llm_resp.text,
token_usage,
model_name_echoed: llm_resp.model,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::{BufRead, BufReader, Write};
use std::net::TcpListener;
#[test]
fn claude_summary_backend_fails_without_api_key() {
let saved = std::env::var(ClaudeHttpAdapter::ANTHROPIC_API_KEY_ENV).ok();
std::env::remove_var(ClaudeHttpAdapter::ANTHROPIC_API_KEY_ENV);
let result = ClaudeSummaryBackend::new("claude-3-5-sonnet-20241022".into(), None);
if let Some(key) = saved {
std::env::set_var(ClaudeHttpAdapter::ANTHROPIC_API_KEY_ENV, key);
}
match result {
Err(SummaryError::BackendNotConfigured) => {}
other => panic!("expected BackendNotConfigured, got {other:?}"),
}
}
#[test]
fn claude_summary_backend_summarizes_via_mock_adapter() {
let listener = TcpListener::bind("127.0.0.1:0").expect("bind mock server");
let addr = listener.local_addr().expect("local addr");
let summary_text = "Alpha and beta are combined into a single claim.";
let response_body = format!(
r#"{{"id":"msg_01","type":"message","role":"assistant","content":[{{"type":"text","text":"{summary_text}"}}],"model":"claude-3-5-sonnet-20241022","stop_reason":"end_turn","usage":{{"input_tokens":30,"output_tokens":12}}}}"#
);
let http_response = format!(
"HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\n\r\n{}",
response_body.len(),
response_body
);
let server_thread = std::thread::spawn(move || {
let (mut stream, _) = listener.accept().expect("accept");
let mut reader = BufReader::new(stream.try_clone().expect("clone stream"));
let mut line = String::new();
loop {
line.clear();
reader.read_line(&mut line).expect("read line");
if line == "\r\n" || line.is_empty() {
break;
}
}
let mut buf = vec![0u8; 4096];
let _ = std::io::Read::read(&mut reader, &mut buf);
stream
.write_all(http_response.as_bytes())
.expect("write response");
});
std::env::set_var(ClaudeHttpAdapter::ANTHROPIC_API_KEY_ENV, "test-key-mock");
let backend = ClaudeSummaryBackend {
adapter: ClaudeHttpAdapter::new_with_base_url(
"claude-3-5-sonnet-20241022".into(),
format!("http://{addr}"),
None,
)
.expect("build adapter"),
max_output_bytes: DEFAULT_MAX_OUTPUT_BYTES,
};
let request = SummaryRequest {
model_name: "claude-3-5-sonnet-20241022".into(),
prompt_template_blake3: canonical_prompt_template_blake3(),
source_claims: vec!["alpha".to_string(), "beta".to_string()],
max_output_bytes: None,
decay_job_id: Some("dcy_test".into()),
};
let resp = backend.summarize(&request).expect("summarize");
server_thread.join().expect("server thread joined");
assert_eq!(resp.claim, summary_text);
assert_eq!(resp.model_name_echoed, "claude-3-5-sonnet-20241022");
assert!(resp.token_usage.is_some());
}
}