use crate::adapter::{blake3_hex, LlmAdapter, LlmMessage, LlmRequest, LlmRole};
use crate::ollama::OllamaConfig;
use crate::ollama_http::OllamaHttpAdapter;
use crate::summary::{SummaryBackend, SummaryError, SummaryRequest, SummaryResponse};
use crate::TokenUsage;
const SUMMARY_PROMPT_TEMPLATE: &str =
"Summarize the following memory entries into a single concise statement preserving key facts: {events}";
const DEFAULT_MAX_OUTPUT_BYTES: usize = 4096;
const SUMMARY_MAX_TOKENS: u32 = 1024;
const SUMMARY_TIMEOUT_MS: u64 = 60_000;
#[must_use]
pub fn canonical_prompt_template_blake3() -> String {
format!("blake3:{}", blake3_hex(SUMMARY_PROMPT_TEMPLATE.as_bytes()))
}
#[derive(Debug, Clone)]
pub struct OllamaSummaryBackend {
adapter: OllamaHttpAdapter,
model_name: String,
max_output_bytes: usize,
}
impl OllamaSummaryBackend {
pub fn new(config: OllamaConfig) -> Result<Self, SummaryError> {
let model_name = config.model.clone();
let adapter = OllamaHttpAdapter::new(config)
.map_err(|e| SummaryError::CallFailed(format!("ollama adapter construction: {e}")))?;
Ok(Self {
adapter,
model_name,
max_output_bytes: DEFAULT_MAX_OUTPUT_BYTES,
})
}
pub fn probe(&self) -> Result<(), SummaryError> {
let req = LlmRequest {
model: self.model_name.clone(),
system: String::new(),
messages: vec![LlmMessage {
role: LlmRole::User,
content: "ping".into(),
}],
temperature: 0.0,
max_tokens: 1,
json_schema: None,
timeout_ms: 5_000,
};
let rt = build_rt()?;
rt.block_on(self.adapter.complete(req))
.map(|_| ())
.map_err(|e| SummaryError::CallFailed(format!("ollama probe: {e}")))
}
}
impl SummaryBackend for OllamaSummaryBackend {
fn summarize(&self, request: &SummaryRequest) -> Result<SummaryResponse, SummaryError> {
let expected_blake3 = canonical_prompt_template_blake3();
if request.prompt_template_blake3 != expected_blake3 {
return Err(SummaryError::PromptTemplateMismatch(format!(
"request pin `{}` != backend template `{}`",
request.prompt_template_blake3, expected_blake3,
)));
}
let events_joined = request
.source_claims
.iter()
.map(|c| format!("- {c}"))
.collect::<Vec<_>>()
.join("\n");
let prompt_text = SUMMARY_PROMPT_TEMPLATE.replace("{events}", &events_joined);
let byte_budget = request
.max_output_bytes
.unwrap_or(self.max_output_bytes)
.min(self.max_output_bytes);
let llm_req = LlmRequest {
model: request.model_name.clone(),
system: String::new(),
messages: vec![LlmMessage {
role: LlmRole::User,
content: prompt_text,
}],
temperature: 0.0,
max_tokens: SUMMARY_MAX_TOKENS,
json_schema: None,
timeout_ms: SUMMARY_TIMEOUT_MS,
};
let rt = build_rt()?;
let llm_resp = rt
.block_on(self.adapter.complete(llm_req))
.map_err(|e| SummaryError::CallFailed(e.to_string()))?;
let echoed_model = if llm_resp.model == request.model_name {
llm_resp.model.clone()
} else {
tracing::debug!(
adapter_echoed = %llm_resp.model,
configured = %self.model_name,
"ollama_summary: model echo mismatch; substituting configured model name"
);
self.model_name.clone()
};
if llm_resp.text.is_empty() {
return Err(SummaryError::OutputValidationFailed(
"ollama returned an empty summary".to_string(),
));
}
if llm_resp.text.len() > byte_budget {
return Err(SummaryError::OutputValidationFailed(format!(
"summary byte length {} exceeds budget {}",
llm_resp.text.len(),
byte_budget,
)));
}
let token_usage = llm_resp.usage.map(|u| TokenUsage {
prompt_tokens: u.prompt_tokens,
completion_tokens: u.completion_tokens,
});
Ok(SummaryResponse {
claim: llm_resp.text,
token_usage,
model_name_echoed: echoed_model,
})
}
}
fn build_rt() -> Result<tokio::runtime::Runtime, SummaryError> {
tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.map_err(|e| SummaryError::CallFailed(format!("tokio runtime construction failed: {e}")))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::summary::{ReplaySummaryBackend, ReplaySummaryFixtureEntry, SummaryResponse};
fn sample_request() -> SummaryRequest {
SummaryRequest {
model_name: "llama3.1:8b@sha256:0000000000000000000000000000000000000000000000000000000000000000".into(),
prompt_template_blake3: canonical_prompt_template_blake3(),
source_claims: vec!["fact A".into(), "fact B".into()],
max_output_bytes: Some(512),
decay_job_id: Some("dcy_01ARZ3NDEKTSV4RRFFQ69G5FAV".into()),
}
}
fn sample_response(claim: &str) -> SummaryResponse {
SummaryResponse {
claim: claim.into(),
token_usage: None,
model_name_echoed: "llama3.1:8b@sha256:0000000000000000000000000000000000000000000000000000000000000000".into(),
}
}
#[test]
fn canonical_blake3_has_expected_prefix() {
let d = canonical_prompt_template_blake3();
assert!(d.starts_with("blake3:"), "got {d}");
assert_eq!(d.len(), 71, "got {d}");
}
#[test]
fn prompt_template_mismatch_returns_error() {
let mut req = sample_request();
req.prompt_template_blake3 = "blake3:wrong".into();
let config = OllamaConfig::new(
"http://127.0.0.1:19999",
"llama3.1:8b@sha256:0000000000000000000000000000000000000000000000000000000000000000",
);
let backend = OllamaSummaryBackend::new(config).expect("construct");
let err = backend.summarize(&req).unwrap_err();
assert!(
matches!(err, SummaryError::PromptTemplateMismatch(_)),
"got {err:?}"
);
}
#[test]
fn replay_backend_round_trips_ollama_request() {
let req = sample_request();
let resp = sample_response("fact A and fact B combined");
let backend = ReplaySummaryBackend::from_entries(vec![ReplaySummaryFixtureEntry {
request: req.clone(),
response: resp.clone(),
}])
.expect("build replay backend");
let got = backend.summarize(&req).expect("hit");
assert_eq!(got.claim, resp.claim);
assert_eq!(got.model_name_echoed, resp.model_name_echoed);
}
#[test]
fn replay_backend_miss_returns_backend_not_configured() {
let req = sample_request();
let resp = sample_response("some summary");
let backend = ReplaySummaryBackend::from_entries(vec![ReplaySummaryFixtureEntry {
request: req,
response: resp,
}])
.expect("build");
let other = SummaryRequest {
model_name: "llama3.1:8b@sha256:0000000000000000000000000000000000000000000000000000000000000000".into(),
prompt_template_blake3: canonical_prompt_template_blake3(),
source_claims: vec!["never seen claim".into()],
max_output_bytes: Some(512),
decay_job_id: None,
};
let err = backend.summarize(&other).unwrap_err();
assert_eq!(err, crate::summary::SummaryError::BackendNotConfigured);
}
#[test]
fn end_to_end_via_mock_tcp_server() {
use std::io::{BufRead, BufReader, Write};
use std::net::TcpListener;
let listener = TcpListener::bind("127.0.0.1:0").expect("bind mock");
let addr = listener.local_addr().expect("local addr");
let summary_text = "Fact A and fact B are both true.";
let model_name = "llama3.1:8b@sha256:0000000000000000000000000000000000000000000000000000000000000000";
let response_body = serde_json::json!({
"model": model_name,
"message": { "role": "assistant", "content": summary_text },
"done": true
})
.to_string();
let http_response = format!(
"HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: {}\r\n\r\n{}",
response_body.len(),
response_body
);
let server = std::thread::spawn(move || {
let (mut stream, _) = listener.accept().expect("accept");
let mut reader = BufReader::new(stream.try_clone().expect("clone"));
let mut line = String::new();
loop {
line.clear();
reader.read_line(&mut line).expect("read line");
if line == "\r\n" || line.is_empty() {
break;
}
}
let mut buf = vec![0u8; 8192];
let _ = std::io::Read::read(&mut reader, &mut buf);
stream
.write_all(http_response.as_bytes())
.expect("write response");
});
let config = OllamaConfig::new(
format!("http://{addr}"),
model_name,
);
let backend = OllamaSummaryBackend::new(config).expect("construct");
let request = SummaryRequest {
model_name: model_name.into(),
prompt_template_blake3: canonical_prompt_template_blake3(),
source_claims: vec!["fact A".into(), "fact B".into()],
max_output_bytes: None,
decay_job_id: Some("dcy_test".into()),
};
let resp = backend.summarize(&request).expect("summarize");
server.join().expect("server thread");
assert_eq!(resp.claim, summary_text);
assert_eq!(resp.model_name_echoed, model_name);
}
}