use std::sync::Arc;
use futures::StreamExt;
use tokio_util::sync::CancellationToken;
use crate::llm::{
CompletionRequest, HostedCapabilities, LlmProvider, Message, MessageContent, ProviderChunk,
Role, SamplingParams, StopReason, ThinkingConfig, ToolChoice, ToolResultBody,
ToolResultContent,
};
use crate::session::CompactionReport;
use crate::session::history::estimate_message_tokens;
use crate::tool::ToolSchema;
const MIN_TAIL_TOKENS: u64 = 2_000;
const MAX_TAIL_TOKENS: u64 = 8_000;
const TOOL_RESULT_MAX_CHARS: usize = 2_000;
pub(super) const SUMMARY_PREFIX: &str =
"[Compacted context summary — earlier conversation was condensed to save context.]";
const SUMMARIZER_SYSTEM: &str = "\
You are a context-summarization assistant for a coding agent session. You are given the \
earlier part of a conversation that is about to be dropped to free up context. Summarize \
ONLY what you are given. The newest turns are kept verbatim outside your summary, so focus \
on older context that still matters for continuing the work.
If a <previous-summary> block is present, treat it as the current anchored summary and UPDATE \
it: keep still-true facts, drop stale ones, merge in new facts. Always follow the exact \
section structure the user asks for, keep every section even if empty, preserve exact file \
paths / identifiers / commands / error strings, and prefer terse bullets over prose. Do not \
answer or continue the task itself, and do not mention that you are summarizing. Respond in \
the same language as the conversation.";
const SUMMARY_TEMPLATE: &str = "\
Summarize the conversation above into the following Markdown structure. Keep every heading \
even if a section is empty (write `(none)`):
## Goal
The user's overall objective and the current concrete task.
## Constraints & Preferences
Hard requirements, user preferences, and conventions to respect.
## Progress
### Done
### In Progress
### Blocked
## Key Decisions
Important choices made and why.
## Next Steps
Concrete, ordered next actions to continue the work.
## Key Context
Critical facts, data, snippets, or references needed to continue.
## Relevant Files
`path` — why it matters (one per line).";
#[derive(Clone)]
pub(crate) struct CompactionCtx {
pub provider: Arc<dyn LlmProvider>,
pub model: String,
pub sampling: SamplingParams,
pub tools: Vec<ToolSchema>,
pub cancel: CancellationToken,
}
pub(super) struct CompactionPlan {
pub head: Vec<Message>,
pub prev_summary: Option<String>,
pub drop_count: usize,
pub tokens_before: u64,
}
pub(super) fn plan(messages: &[Message], threshold: u64) -> Option<CompactionPlan> {
let tail_budget = (threshold / 4).clamp(MIN_TAIL_TOKENS, MAX_TAIL_TOKENS);
let Some(boundary) = select_boundary(messages, tail_budget) else {
tracing::warn!(
messages = messages.len(),
tail_budget,
"compaction skipped: no safe boundary to summarize before (history too short to split)"
);
return None;
};
let (head, _tail) = messages.split_at(boundary);
let prev_summary = extract_previous_summary(head);
Some(CompactionPlan {
head: head.to_vec(),
prev_summary,
drop_count: boundary,
tokens_before: estimate_total(messages),
})
}
pub(super) fn summary_message(summary: &str) -> Message {
Message {
role: Role::Assistant,
content: vec![MessageContent::Text {
text: format!("{SUMMARY_PREFIX}\n{summary}"),
}]
.into(),
}
}
pub(crate) async fn run_sync(
history: &dyn crate::session::History,
ctx: &CompactionCtx,
threshold: u64,
) -> Option<CompactionReport> {
let messages = history.snapshot();
let plan = plan(&messages, threshold)?;
let summary = summarize(ctx, &plan.head, plan.prev_summary.as_deref()).await?;
let summary_msg = summary_message(&summary);
history.splice_prefix(plan.drop_count, summary_msg);
let tokens_after = estimate_total(&history.snapshot());
tracing::info!(
drop_count = plan.drop_count,
tokens_before = plan.tokens_before,
tokens_after,
"context compacted (sync)"
);
Some(CompactionReport {
tokens_before: plan.tokens_before,
tokens_after,
})
}
fn select_boundary(messages: &[Message], tail_budget: u64) -> Option<usize> {
let turn_starts: Vec<usize> = messages
.iter()
.enumerate()
.filter(|(_, m)| is_turn_start(m))
.map(|(i, _)| i)
.collect();
if let Some(&last_start) = turn_starts.last()
&& last_start > 0
{
return Some(select_in_budget(
messages,
&turn_starts,
tail_budget,
last_start,
));
}
let assistant_starts: Vec<usize> = messages
.iter()
.enumerate()
.filter(|(i, m)| *i > 0 && m.role == Role::Assistant)
.map(|(i, _)| i)
.collect();
let last_assistant = *assistant_starts.last()?;
Some(select_in_budget(
messages,
&assistant_starts,
tail_budget,
last_assistant,
))
}
fn select_in_budget(
messages: &[Message],
boundaries: &[usize],
tail_budget: u64,
min_boundary: usize,
) -> usize {
let mut best: Option<usize> = None;
let mut acc: u64 = 0;
let mut next_boundary = messages.len();
for &start in boundaries.iter().rev() {
acc = acc.saturating_add(estimate_range(messages, start, next_boundary));
next_boundary = start;
if start == 0 {
break;
}
if acc <= tail_budget {
best = Some(start);
} else {
break;
}
}
best.unwrap_or(min_boundary)
}
pub(super) fn is_turn_start(msg: &Message) -> bool {
msg.role == Role::User
&& msg
.content
.iter()
.any(|c| !matches!(c, MessageContent::ToolResult { .. }))
}
fn estimate_range(messages: &[Message], start: usize, end: usize) -> u64 {
messages
.iter()
.take(end)
.skip(start)
.map(estimate_message_tokens)
.fold(0u64, u64::saturating_add)
}
fn estimate_total(messages: &[Message]) -> u64 {
messages
.iter()
.map(estimate_message_tokens)
.fold(0u64, u64::saturating_add)
}
fn extract_previous_summary(head: &[Message]) -> Option<String> {
head.iter()
.filter(|m| m.role == Role::Assistant)
.find_map(|m| {
m.content.iter().find_map(|c| match c {
MessageContent::Text { text } => text
.strip_prefix(SUMMARY_PREFIX)
.map(|rest| rest.trim_start().to_string()),
_ => None,
})
})
}
pub(super) async fn summarize(
ctx: &CompactionCtx,
head: &[Message],
prev_summary: Option<&str>,
) -> Option<String> {
let mut messages: Vec<Message> = head.iter().map(prepare_head_message).collect();
messages.push(Message {
role: Role::User,
content: vec![MessageContent::Text {
text: build_prompt(prev_summary),
}]
.into(),
});
let messages = super::sanitize::sanitize_tool_pairing(messages);
let req = CompletionRequest {
model: ctx.model.clone(),
system: Some(SUMMARIZER_SYSTEM.into()),
messages,
tools: ctx.tools.clone(),
tool_choice: ToolChoice::None,
sampling: SamplingParams {
thinking: ThinkingConfig::Disabled,
..ctx.sampling.clone()
},
hosted_capabilities: HostedCapabilities::default(),
};
let mut stream = match ctx.provider.complete(req, ctx.cancel.clone()).await {
Ok(s) => s,
Err(err) => {
tracing::warn!(error = %err, "compaction summarize failed: provider error");
return None;
}
};
let mut text = String::new();
loop {
tokio::select! {
biased;
() = ctx.cancel.cancelled() => {
tracing::warn!("compaction summarize cancelled");
return None;
}
next = stream.next() => match next {
None => break,
Some(Ok(ProviderChunk::TextDelta { text: delta })) => text.push_str(&delta),
Some(Ok(ProviderChunk::Stop { reason })) => {
if matches!(reason, StopReason::Refusal) {
tracing::warn!("compaction summarize refused by model");
return None;
}
}
Some(Ok(_)) => {}
Some(Err(err)) => {
tracing::warn!(error = %err, "compaction summarize failed: stream error");
return None;
}
}
}
}
let text = text.trim().to_string();
if text.is_empty() {
tracing::warn!("compaction summarize produced empty summary");
return None;
}
Some(text)
}
fn build_prompt(prev_summary: Option<&str>) -> String {
match prev_summary {
Some(prev) => format!(
"Update the anchored summary below with the new conversation history.\n\n\
<previous-summary>\n{prev}\n</previous-summary>\n\n{SUMMARY_TEMPLATE}"
),
None => SUMMARY_TEMPLATE.to_string(),
}
}
fn prepare_head_message(msg: &Message) -> Message {
let content: Vec<MessageContent> = msg
.content
.iter()
.map(|c| match c {
MessageContent::ToolResult {
tool_use_id,
output,
is_error,
} => MessageContent::ToolResult {
tool_use_id: tool_use_id.clone(),
output: truncate_tool_output(output),
is_error: *is_error,
},
MessageContent::Image { .. } => MessageContent::Text {
text: "[image omitted from summary]".to_string(),
},
other => other.clone(),
})
.collect();
Message {
role: msg.role,
content: content.into(),
}
}
fn truncate_tool_output(output: &ToolResultBody) -> ToolResultBody {
match output {
ToolResultBody::Text { text } => ToolResultBody::Text {
text: truncate_chars(text, TOOL_RESULT_MAX_CHARS),
},
ToolResultBody::Json { value } => {
let s = value.to_string();
if s.len() <= TOOL_RESULT_MAX_CHARS {
ToolResultBody::Json {
value: value.clone(),
}
} else {
ToolResultBody::Text {
text: truncate_chars(&s, TOOL_RESULT_MAX_CHARS),
}
}
}
ToolResultBody::Content { blocks } => {
let mut text = String::new();
for block in blocks {
match block {
ToolResultContent::Text { text: t } => text.push_str(t),
ToolResultContent::Image { mime, .. } => {
text.push_str(&format!("\n[image: {mime}]"));
}
}
}
ToolResultBody::Text {
text: truncate_chars(&text, TOOL_RESULT_MAX_CHARS),
}
}
}
}
fn truncate_chars(s: &str, max_chars: usize) -> String {
if s.chars().count() <= max_chars {
return s.to_string();
}
let kept: String = s.chars().take(max_chars).collect();
format!("{kept}\n…[truncated for summary]")
}
#[cfg(test)]
mod tests;