use crate::session::{MessageRole, SessionMessage, ToolCallState};
use rig::streaming::StreamingChat;
pub(crate) fn serialize_conversation(messages: &[SessionMessage]) -> String {
let mut result = String::new();
for msg in messages {
let role_tag = match msg.role {
MessageRole::User => "User",
MessageRole::Assistant => "Assistant",
MessageRole::System => "System",
};
result.push_str(&format!("[{}]: {}\n", role_tag, msg.content));
for tc in &msg.tool_calls {
let args_str = serde_json::to_string(&tc.args).unwrap_or_else(|_| "{}".to_string());
result.push_str(&format!("[Tool: {}({})]\n", tc.name, args_str));
match &tc.state {
ToolCallState::Completed { result: out } => {
const PER_TOOL_CAP: usize = 2048;
if out.len() > PER_TOOL_CAP {
let trimmed: String = out.chars().take(PER_TOOL_CAP).collect();
result.push_str(&format!(
"[Result: {} ... (truncated, {} bytes total)]\n",
trimmed,
out.len()
));
} else {
result.push_str(&format!("[Result: {}]\n", out));
}
}
ToolCallState::Interrupted => {
result.push_str("[Result: <interrupted>]\n");
}
ToolCallState::Failed { error } => {
result.push_str(&format!("[Result: <failed: {}>]\n", error));
}
}
}
result.push('\n');
}
result
}
pub(crate) async fn summarize_with_model(
model: super::AnyModel,
prompt: String,
) -> anyhow::Result<String> {
oneshot_with_model(
model,
"summarizer",
"You are a conversation summarizer.",
prompt,
)
.await
}
pub(crate) async fn oneshot_with_model(
model: super::AnyModel,
label: &'static str,
preamble: &'static str,
mut prompt: String,
) -> anyhow::Result<String> {
const ONESHOT_PROMPT_BUDGET_BYTES: usize = 128 * 1024; if prompt.len() > ONESHOT_PROMPT_BUDGET_BYTES {
prompt = head_tail_truncate(&prompt, ONESHOT_PROMPT_BUDGET_BYTES);
}
match model {
super::AnyModel::OpenRouter(m) => run_oneshot(m, label, preamble, prompt).await,
super::AnyModel::OpenAI(m) => run_oneshot(m, label, preamble, prompt).await,
super::AnyModel::Anthropic(m) => run_oneshot(m, label, preamble, prompt).await,
super::AnyModel::Gemini(m) => run_oneshot(m, label, preamble, prompt).await,
super::AnyModel::DeepSeek(m) => run_oneshot(m, label, preamble, prompt).await,
super::AnyModel::Glm(m) => run_oneshot(m, label, preamble, prompt).await,
super::AnyModel::Ollama(m) => run_oneshot(m, label, preamble, prompt).await,
super::AnyModel::Custom(m) => run_oneshot(m, label, preamble, prompt).await,
}
}
pub(crate) fn head_tail_truncate(prompt: &str, budget: usize) -> String {
if prompt.len() <= budget {
return prompt.to_string();
}
let head_budget = budget * 4 / 10;
let tail_budget = budget - head_budget - 128;
let head_end = prompt[..head_budget.min(prompt.len())]
.rfind('\n')
.unwrap_or(head_budget.min(prompt.len()));
let head_end = crate::text::char_boundary_at_or_before(prompt, head_end);
let tail_start_target = prompt.len().saturating_sub(tail_budget);
let tail_start = prompt[tail_start_target..]
.find('\n')
.map(|i| tail_start_target + i + 1)
.unwrap_or(tail_start_target);
let tail_start = crate::text::char_boundary_at_or_after(prompt, tail_start);
if tail_start <= head_end {
return prompt.to_string();
}
let dropped = tail_start - head_end;
format!(
"{}\n\n[... {} bytes truncated by summarizer-prompt budget ...]\n\n{}",
&prompt[..head_end],
dropped,
&prompt[tail_start..],
)
}
async fn run_oneshot<M>(
model: M,
label: &'static str,
preamble: &'static str,
prompt: String,
) -> anyhow::Result<String>
where
M: rig::completion::CompletionModel + Clone + 'static,
M::StreamingResponse: Send + Sync + Unpin + Clone + 'static,
{
use crate::agent::recovery::{RecoveryPolicy, run_with_retry};
let policy = RecoveryPolicy::default();
let response = run_with_retry(&policy, label, || {
let model = model.clone();
let prompt = prompt.clone();
async move {
let agent = rig::agent::AgentBuilder::new(model)
.preamble(preamble)
.build();
let mut stream = agent
.stream_chat(prompt, Vec::<rig::completion::Message>::new())
.multi_turn(1)
.await;
let mut response = String::new();
use futures::StreamExt;
while let Some(item) = stream.next().await {
match item {
Ok(rig::agent::MultiTurnStreamItem::StreamAssistantItem(
rig::streaming::StreamedAssistantContent::Text(text),
)) => response.push_str(&text.text),
Ok(rig::agent::MultiTurnStreamItem::FinalResponse(res)) => {
return Ok(res.response().to_string());
}
Err(e) => return Err(e.to_string()),
_ => {}
}
}
Ok(response)
}
})
.await
.map_err(|msg| anyhow::anyhow!("one-shot LLM call failed: {msg}"))?;
if response.is_empty() {
anyhow::bail!("one-shot LLM call returned empty response");
}
Ok(response)
}
#[cfg(test)]
mod tests {
use super::head_tail_truncate;
#[test]
fn head_tail_truncate_short_prompt_passes_through() {
let s = "line 1\nline 2\nline 3";
assert_eq!(head_tail_truncate(s, 1024), s);
}
#[test]
fn head_tail_truncate_keeps_head_and_tail() {
let mut s = String::new();
for i in 0..2000 {
s.push_str(&format!("line {}\n", i));
}
let out = head_tail_truncate(&s, 4096);
assert!(out.len() < s.len(), "output should be shorter");
assert!(out.starts_with("line 0\n"));
assert!(out.contains("truncated by summarizer-prompt budget"));
assert!(out.ends_with("line 1999\n"));
}
}