1use crate::adapter::{blake3_hex, LlmAdapter, LlmMessage, LlmRequest, LlmRole};
47use crate::ollama::OllamaConfig;
48use crate::ollama_http::OllamaHttpAdapter;
49use crate::summary::{SummaryBackend, SummaryError, SummaryRequest, SummaryResponse};
50use crate::TokenUsage;
51
52const SUMMARY_PROMPT_TEMPLATE: &str =
58 "Summarize the following memory entries into a single concise statement preserving key facts: {events}";
59
60const DEFAULT_MAX_OUTPUT_BYTES: usize = 4096;
62
63const SUMMARY_MAX_TOKENS: u32 = 1024;
65
66const SUMMARY_TIMEOUT_MS: u64 = 60_000;
68
69#[must_use]
74pub fn canonical_prompt_template_blake3() -> String {
75 format!("blake3:{}", blake3_hex(SUMMARY_PROMPT_TEMPLATE.as_bytes()))
76}
77
78#[derive(Debug, Clone)]
83pub struct OllamaSummaryBackend {
84 adapter: OllamaHttpAdapter,
85 model_name: String,
88 max_output_bytes: usize,
89}
90
91impl OllamaSummaryBackend {
92 pub fn new(config: OllamaConfig) -> Result<Self, SummaryError> {
101 let model_name = config.model.clone();
102 let adapter = OllamaHttpAdapter::new(config)
103 .map_err(|e| SummaryError::CallFailed(format!("ollama adapter construction: {e}")))?;
104 Ok(Self {
105 adapter,
106 model_name,
107 max_output_bytes: DEFAULT_MAX_OUTPUT_BYTES,
108 })
109 }
110
111 pub fn probe(&self) -> Result<(), SummaryError> {
121 let req = LlmRequest {
122 model: self.model_name.clone(),
123 system: String::new(),
124 messages: vec![LlmMessage {
125 role: LlmRole::User,
126 content: "ping".into(),
127 }],
128 temperature: 0.0,
129 max_tokens: 1,
130 json_schema: None,
131 timeout_ms: 5_000,
132 };
133 let rt = build_rt()?;
134 rt.block_on(self.adapter.complete(req))
135 .map(|_| ())
136 .map_err(|e| SummaryError::CallFailed(format!("ollama probe: {e}")))
137 }
138}
139
140impl SummaryBackend for OllamaSummaryBackend {
141 fn summarize(&self, request: &SummaryRequest) -> Result<SummaryResponse, SummaryError> {
142 let expected_blake3 = canonical_prompt_template_blake3();
144 if request.prompt_template_blake3 != expected_blake3 {
145 return Err(SummaryError::PromptTemplateMismatch(format!(
146 "request pin `{}` != backend template `{}`",
147 request.prompt_template_blake3, expected_blake3,
148 )));
149 }
150
151 let events_joined = request
153 .source_claims
154 .iter()
155 .map(|c| format!("- {c}"))
156 .collect::<Vec<_>>()
157 .join("\n");
158 let prompt_text = SUMMARY_PROMPT_TEMPLATE.replace("{events}", &events_joined);
159
160 let byte_budget = request
161 .max_output_bytes
162 .unwrap_or(self.max_output_bytes)
163 .min(self.max_output_bytes);
164
165 let llm_req = LlmRequest {
166 model: request.model_name.clone(),
167 system: String::new(),
168 messages: vec![LlmMessage {
169 role: LlmRole::User,
170 content: prompt_text,
171 }],
172 temperature: 0.0,
173 max_tokens: SUMMARY_MAX_TOKENS,
174 json_schema: None,
175 timeout_ms: SUMMARY_TIMEOUT_MS,
176 };
177
178 let rt = build_rt()?;
179 let llm_resp = rt
180 .block_on(self.adapter.complete(llm_req))
181 .map_err(|e| SummaryError::CallFailed(e.to_string()))?;
182
183 let echoed_model = if llm_resp.model == request.model_name {
188 llm_resp.model.clone()
189 } else {
190 tracing::debug!(
191 adapter_echoed = %llm_resp.model,
192 configured = %self.model_name,
193 "ollama_summary: model echo mismatch; substituting configured model name"
194 );
195 self.model_name.clone()
196 };
197
198 if llm_resp.text.is_empty() {
199 return Err(SummaryError::OutputValidationFailed(
200 "ollama returned an empty summary".to_string(),
201 ));
202 }
203 if llm_resp.text.len() > byte_budget {
204 return Err(SummaryError::OutputValidationFailed(format!(
205 "summary byte length {} exceeds budget {}",
206 llm_resp.text.len(),
207 byte_budget,
208 )));
209 }
210
211 let token_usage = llm_resp.usage.map(|u| TokenUsage {
212 prompt_tokens: u.prompt_tokens,
213 completion_tokens: u.completion_tokens,
214 });
215
216 Ok(SummaryResponse {
217 claim: llm_resp.text,
218 token_usage,
219 model_name_echoed: echoed_model,
220 })
221 }
222}
223
224fn build_rt() -> Result<tokio::runtime::Runtime, SummaryError> {
225 tokio::runtime::Builder::new_current_thread()
226 .enable_all()
227 .build()
228 .map_err(|e| SummaryError::CallFailed(format!("tokio runtime construction failed: {e}")))
229}
230
231#[cfg(test)]
232mod tests {
233 use super::*;
234 use crate::summary::{ReplaySummaryBackend, ReplaySummaryFixtureEntry, SummaryResponse};
235
236 fn sample_request() -> SummaryRequest {
237 SummaryRequest {
238 model_name: "llama3.1:8b@sha256:0000000000000000000000000000000000000000000000000000000000000000".into(),
239 prompt_template_blake3: canonical_prompt_template_blake3(),
240 source_claims: vec!["fact A".into(), "fact B".into()],
241 max_output_bytes: Some(512),
242 decay_job_id: Some("dcy_01ARZ3NDEKTSV4RRFFQ69G5FAV".into()),
243 }
244 }
245
246 fn sample_response(claim: &str) -> SummaryResponse {
247 SummaryResponse {
248 claim: claim.into(),
249 token_usage: None,
250 model_name_echoed: "llama3.1:8b@sha256:0000000000000000000000000000000000000000000000000000000000000000".into(),
251 }
252 }
253
254 #[test]
255 fn canonical_blake3_has_expected_prefix() {
256 let d = canonical_prompt_template_blake3();
257 assert!(d.starts_with("blake3:"), "got {d}");
258 assert_eq!(d.len(), 71, "got {d}");
260 }
261
262 #[test]
263 fn prompt_template_mismatch_returns_error() {
264 let mut req = sample_request();
269 req.prompt_template_blake3 = "blake3:wrong".into();
270
271 let config = OllamaConfig::new(
274 "http://127.0.0.1:19999",
275 "llama3.1:8b@sha256:0000000000000000000000000000000000000000000000000000000000000000",
276 );
277 let backend = OllamaSummaryBackend::new(config).expect("construct");
278 let err = backend.summarize(&req).unwrap_err();
279 assert!(
280 matches!(err, SummaryError::PromptTemplateMismatch(_)),
281 "got {err:?}"
282 );
283 }
284
285 #[test]
286 fn replay_backend_round_trips_ollama_request() {
287 let req = sample_request();
291 let resp = sample_response("fact A and fact B combined");
292
293 let backend = ReplaySummaryBackend::from_entries(vec![ReplaySummaryFixtureEntry {
294 request: req.clone(),
295 response: resp.clone(),
296 }])
297 .expect("build replay backend");
298
299 let got = backend.summarize(&req).expect("hit");
300 assert_eq!(got.claim, resp.claim);
301 assert_eq!(got.model_name_echoed, resp.model_name_echoed);
302 }
303
304 #[test]
305 fn replay_backend_miss_returns_backend_not_configured() {
306 let req = sample_request();
307 let resp = sample_response("some summary");
308 let backend = ReplaySummaryBackend::from_entries(vec![ReplaySummaryFixtureEntry {
309 request: req,
310 response: resp,
311 }])
312 .expect("build");
313
314 let other = SummaryRequest {
315 model_name: "llama3.1:8b@sha256:0000000000000000000000000000000000000000000000000000000000000000".into(),
316 prompt_template_blake3: canonical_prompt_template_blake3(),
317 source_claims: vec!["never seen claim".into()],
318 max_output_bytes: Some(512),
319 decay_job_id: None,
320 };
321 let err = backend.summarize(&other).unwrap_err();
322 assert_eq!(err, crate::summary::SummaryError::BackendNotConfigured);
323 }
324
325 #[test]
326 fn end_to_end_via_mock_tcp_server() {
327 use std::io::{BufRead, BufReader, Write};
328 use std::net::TcpListener;
329
330 let listener = TcpListener::bind("127.0.0.1:0").expect("bind mock");
331 let addr = listener.local_addr().expect("local addr");
332
333 let summary_text = "Fact A and fact B are both true.";
334 let model_name = "llama3.1:8b@sha256:0000000000000000000000000000000000000000000000000000000000000000";
335 let response_body = serde_json::json!({
336 "model": model_name,
337 "message": { "role": "assistant", "content": summary_text },
338 "done": true
339 })
340 .to_string();
341 let http_response = format!(
342 "HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\n\r\n{}",
343 response_body.len(),
344 response_body
345 );
346
347 let server = std::thread::spawn(move || {
348 let (mut stream, _) = listener.accept().expect("accept");
349 let mut reader = BufReader::new(stream.try_clone().expect("clone"));
350 let mut line = String::new();
351 loop {
352 line.clear();
353 reader.read_line(&mut line).expect("read line");
354 if line == "\r\n" || line.is_empty() {
355 break;
356 }
357 }
358 let mut buf = vec![0u8; 8192];
359 let _ = std::io::Read::read(&mut reader, &mut buf);
360 stream
361 .write_all(http_response.as_bytes())
362 .expect("write response");
363 });
364
365 let config = OllamaConfig::new(
366 format!("http://{addr}"),
367 model_name,
368 );
369 let backend = OllamaSummaryBackend::new(config).expect("construct");
370
371 let request = SummaryRequest {
372 model_name: model_name.into(),
373 prompt_template_blake3: canonical_prompt_template_blake3(),
374 source_claims: vec!["fact A".into(), "fact B".into()],
375 max_output_bytes: None,
376 decay_job_id: Some("dcy_test".into()),
377 };
378
379 let resp = backend.summarize(&request).expect("summarize");
380 server.join().expect("server thread");
381
382 assert_eq!(resp.claim, summary_text);
383 assert_eq!(resp.model_name_echoed, model_name);
384 }
385}