use crate::session::{MessageRole, SessionMessage, ToolCallState};
use rig::streaming::StreamingChat;
pub(crate) fn serialize_conversation(messages: &[SessionMessage]) -> String {
let mut result = String::new();
for msg in messages {
let role_tag = match msg.role {
MessageRole::User => "User",
MessageRole::Assistant => "Assistant",
MessageRole::System => "System",
};
result.push_str(&format!("[{}]: {}\n", role_tag, msg.content));
for tc in &msg.tool_calls {
let args_str = serde_json::to_string(&tc.args).unwrap_or_else(|_| "{}".to_string());
result.push_str(&format!("[Tool: {}({})]\n", tc.name, args_str));
match &tc.state {
ToolCallState::Completed { result: out } => {
const PER_TOOL_CAP: usize = 2048;
if out.len() > PER_TOOL_CAP {
let trimmed: String = out.chars().take(PER_TOOL_CAP).collect();
result.push_str(&format!(
"[Result: {} ... (truncated, {} bytes total)]\n",
trimmed,
out.len()
));
} else {
result.push_str(&format!("[Result: {}]\n", out));
}
}
ToolCallState::Interrupted => {
result.push_str("[Result: <interrupted>]\n");
}
ToolCallState::Failed { error } => {
result.push_str(&format!("[Result: <failed: {}>]\n", error));
}
}
}
result.push('\n');
}
result
}
pub(crate) async fn summarize_with_model(
model: super::AnyModel,
prompt: String,
) -> anyhow::Result<String> {
oneshot_with_model(
model,
"summarizer",
"You are a conversation summarizer.",
prompt,
)
.await
}
pub(crate) async fn oneshot_with_model(
model: super::AnyModel,
label: &'static str,
preamble: &'static str,
mut prompt: String,
) -> anyhow::Result<String> {
const ONESHOT_PROMPT_BUDGET_BYTES: usize = 128 * 1024; if prompt.len() > ONESHOT_PROMPT_BUDGET_BYTES {
prompt = head_tail_truncate(&prompt, ONESHOT_PROMPT_BUDGET_BYTES);
}
crate::provider::wire::dump_oneshot(label, preamble, &prompt);
let disable = reasoning_disable_for_kind(oneshot_provider_kind(&model));
match model {
super::AnyModel::OpenRouter(m) => run_oneshot(m, label, preamble, prompt, disable).await,
super::AnyModel::OpenAI(m) => run_oneshot(m, label, preamble, prompt, disable).await,
super::AnyModel::ChatGptOpenAI(m) => run_oneshot(m, label, preamble, prompt, disable).await,
super::AnyModel::OpenAICodex(m) => run_oneshot(m, label, preamble, prompt, disable).await,
super::AnyModel::Anthropic(m) => run_oneshot(m, label, preamble, prompt, disable).await,
super::AnyModel::AnthropicOauth(m) => {
run_oneshot(m, label, preamble, prompt, disable).await
}
super::AnyModel::Gemini(m) => run_oneshot(m, label, preamble, prompt, disable).await,
super::AnyModel::DeepSeek(m) => run_oneshot(m, label, preamble, prompt, disable).await,
super::AnyModel::Glm(m) => run_oneshot(m, label, preamble, prompt, disable).await,
super::AnyModel::Ollama(m) => run_oneshot(m, label, preamble, prompt, disable).await,
super::AnyModel::Custom(m) => run_oneshot(m, label, preamble, prompt, disable).await,
}
}
fn oneshot_provider_kind(model: &super::AnyModel) -> &'static str {
use super::AnyModel as M;
match model {
M::OpenRouter(_) => "openrouter",
M::OpenAI(_) | M::ChatGptOpenAI(_) | M::OpenAICodex(_) => "openai",
M::Anthropic(_) | M::AnthropicOauth(_) => "anthropic",
M::Gemini(_) => "gemini",
M::DeepSeek(_) => "deepseek",
M::Glm(_) => "glm",
M::Ollama(_) => "ollama",
M::Custom(_) => "custom",
}
}
fn reasoning_disable_for_kind(kind: &str) -> Option<serde_json::Value> {
match kind {
"deepseek" | "glm" | "custom" | "openrouter" => {
Some(serde_json::json!({ "chat_template_kwargs": { "thinking": false } }))
}
"ollama" => Some(serde_json::json!({ "think": false })),
"gemini" => Some(serde_json::json!({ "thinking_config": { "thinking_budget": 0 } })),
_ => None,
}
}
pub(crate) fn head_tail_truncate(prompt: &str, budget: usize) -> String {
if prompt.len() <= budget {
return prompt.to_string();
}
let head_budget = budget * 4 / 10;
let tail_budget = budget - head_budget - 128;
let head_end = prompt[..head_budget.min(prompt.len())]
.rfind('\n')
.unwrap_or(head_budget.min(prompt.len()));
let head_end = crate::text::char_boundary_at_or_before(prompt, head_end);
let tail_start_target = prompt.len().saturating_sub(tail_budget);
let tail_start = prompt[tail_start_target..]
.find('\n')
.map(|i| tail_start_target + i + 1)
.unwrap_or(tail_start_target);
let tail_start = crate::text::char_boundary_at_or_after(prompt, tail_start);
if tail_start <= head_end {
return prompt.to_string();
}
let dropped = tail_start - head_end;
format!(
"{}\n\n[... {} bytes truncated by summarizer-prompt budget ...]\n\n{}",
&prompt[..head_end],
dropped,
&prompt[tail_start..],
)
}
async fn run_oneshot<M>(
model: M,
label: &'static str,
preamble: &'static str,
prompt: String,
reasoning_disable: Option<serde_json::Value>,
) -> anyhow::Result<String>
where
M: rig::completion::CompletionModel + Clone + 'static,
M::StreamingResponse: Send + Sync + Unpin + Clone + 'static,
{
use crate::agent::recovery::{RecoveryPolicy, run_with_retry};
let policy = RecoveryPolicy::default();
let response = run_with_retry(&policy, label, || {
let model = model.clone();
let prompt = prompt.clone();
let reasoning_disable = reasoning_disable.clone();
async move {
let mut builder = rig::agent::AgentBuilder::new(model).preamble(preamble);
if let Some(params) = reasoning_disable {
builder = builder.additional_params(params);
}
let agent = builder.build();
let mut stream = agent
.stream_chat(prompt, Vec::<rig::completion::Message>::new())
.multi_turn(1)
.await;
let mut response = String::new();
use futures::StreamExt;
while let Some(item) = stream.next().await {
match item {
Ok(rig::agent::MultiTurnStreamItem::StreamAssistantItem(
rig::streaming::StreamedAssistantContent::Text(text),
)) => response.push_str(&text.text),
Ok(rig::agent::MultiTurnStreamItem::FinalResponse(res)) => {
return Ok(res.response().to_string());
}
Err(e) => return Err(e.to_string()),
_ => {}
}
}
Ok(response)
}
})
.await
.map_err(|msg| anyhow::anyhow!("one-shot LLM call failed: {msg}"))?;
if response.is_empty() {
anyhow::bail!("one-shot LLM call returned empty response");
}
Ok(response)
}
#[cfg(test)]
mod tests {
use super::{head_tail_truncate, reasoning_disable_for_kind};
#[test]
fn head_tail_truncate_short_prompt_passes_through() {
let s = "line 1\nline 2\nline 3";
assert_eq!(head_tail_truncate(s, 1024), s);
}
#[test]
fn reasoning_disable_shapes_per_provider() {
for kind in ["deepseek", "glm", "custom", "openrouter"] {
assert_eq!(
reasoning_disable_for_kind(kind),
Some(serde_json::json!({ "chat_template_kwargs": { "thinking": false } })),
"{kind} should disable thinking via chat_template_kwargs",
);
}
assert_eq!(
reasoning_disable_for_kind("ollama"),
Some(serde_json::json!({ "think": false })),
);
assert_eq!(
reasoning_disable_for_kind("gemini"),
Some(serde_json::json!({ "thinking_config": { "thinking_budget": 0 } })),
);
assert_eq!(reasoning_disable_for_kind("anthropic"), None);
assert_eq!(reasoning_disable_for_kind("openai"), None);
}
#[test]
fn head_tail_truncate_keeps_head_and_tail() {
let mut s = String::new();
for i in 0..2000 {
s.push_str(&format!("line {}\n", i));
}
let out = head_tail_truncate(&s, 4096);
assert!(out.len() < s.len(), "output should be shorter");
assert!(out.starts_with("line 0\n"));
assert!(out.contains("truncated by summarizer-prompt budget"));
assert!(out.ends_with("line 1999\n"));
}
}