use async_trait::async_trait;
use std::sync::Arc;
use crate::model::{
collect_model_response, ChatMessage, ModelClient, ModelClientError, ModelResponse,
ModelTurnInput,
};
use crate::tools::ToolSpec;
pub const DEFAULT_TRIGGER_FRACTION: f64 = 0.90;
pub const DEFAULT_TAIL_MIN_MESSAGES: usize = 4;
pub const DEFAULT_SUMMARY_MAX_TOKENS: i32 = 2_000;
pub const DEFAULT_USER_MESSAGE_TOKEN_BUDGET: u64 = 20_000;
pub fn estimate_tokens(s: &str) -> u64 {
if s.trim().is_empty() {
return 0;
}
let mut ascii: u64 = 0;
let mut non_ascii: u64 = 0;
for c in s.chars() {
if c.is_ascii() {
ascii += 1;
} else {
non_ascii += 1;
}
}
ascii.div_ceil(4) + non_ascii
}
pub fn estimate_chat_message_tokens(m: &ChatMessage) -> u64 {
let tokens = match m {
ChatMessage::User { content, .. } => estimate_tokens(content),
ChatMessage::Assistant {
text,
tool_calls,
thinking,
} => {
let text_tokens = text.as_deref().map(estimate_tokens).unwrap_or(0);
let tc_tokens: u64 = tool_calls
.iter()
.map(|tc| estimate_tokens(&tc.input.to_string()) + estimate_tokens(&tc.name) + 8)
.sum();
let thinking_tokens = thinking
.as_ref()
.map(|t| {
estimate_tokens(&t.text)
+ t.signature.as_deref().map(estimate_tokens).unwrap_or(0)
})
.unwrap_or(0);
text_tokens + tc_tokens + thinking_tokens
}
ChatMessage::Tool { content, .. } => estimate_tokens(content) + 16,
};
tokens.max(1)
}
pub fn estimate_messages_tokens(messages: &[ChatMessage]) -> u64 {
messages.iter().map(estimate_chat_message_tokens).sum()
}
pub fn resolve_context_window_tokens(model: &str) -> u64 {
let m = model.to_ascii_lowercase();
if m.contains("opus-4-7") || m.contains("opus-4-6") || m.contains("sonnet-4-6") {
return 1_000_000;
}
if m.contains("claude") {
return 200_000;
}
if m.contains("gpt-4") || m.contains("gpt-4o") || m.contains("gpt-4.1") {
return 128_000;
}
if m.starts_with("o1") || m.starts_with("o3") || m.starts_with("o4") {
return 200_000;
}
if m.contains("minimax") || m.contains("deepseek") {
return 1_000_000;
}
128_000
}
pub struct CompactionContext {
pub system_prompt: Option<String>,
pub model_client: Arc<dyn ModelClient>,
pub context_window_tokens: u64,
pub tools: Vec<ToolSpec>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct CompactionOutcome {
pub messages: Vec<ChatMessage>,
pub usage: Option<crate::event::HarnessUsage>,
}
#[derive(Debug, thiserror::Error)]
pub enum CompactionError {
#[error("compaction model call failed: {0}")]
ModelCall(#[from] ModelClientError),
#[error("model produced empty summary; refusing to fold history")]
EmptySummary,
}
#[async_trait]
pub trait CompactionStrategy: Send + Sync {
fn should_compact(&self, messages: &[ChatMessage], context_window_tokens: u64) -> bool;
async fn compact(
&self,
messages: Vec<ChatMessage>,
ctx: &CompactionContext,
) -> Result<CompactionOutcome, CompactionError>;
}
pub struct SummarizeCompactionStrategy {
pub trigger_fraction: f64,
pub tail_min_messages: usize,
pub summary_max_tokens: i32,
pub summary_prompt: String,
pub user_message_token_budget: u64,
}
impl Default for SummarizeCompactionStrategy {
fn default() -> Self {
Self {
trigger_fraction: DEFAULT_TRIGGER_FRACTION,
tail_min_messages: DEFAULT_TAIL_MIN_MESSAGES,
summary_max_tokens: DEFAULT_SUMMARY_MAX_TOKENS,
summary_prompt: DEFAULT_SUMMARY_PROMPT.into(),
user_message_token_budget: DEFAULT_USER_MESSAGE_TOKEN_BUDGET,
}
}
}
impl SummarizeCompactionStrategy {
pub fn with_trigger_fraction(mut self, fraction: f64) -> Self {
self.trigger_fraction = fraction;
self
}
pub fn with_tail_min_messages(mut self, n: usize) -> Self {
self.tail_min_messages = n;
self
}
pub fn with_summary_max_tokens(mut self, n: i32) -> Self {
self.summary_max_tokens = n;
self
}
pub fn with_user_message_token_budget(mut self, budget: u64) -> Self {
self.user_message_token_budget = budget;
self
}
}
pub const DEFAULT_SUMMARY_PROMPT: &str = "You are performing a CONTEXT CHECKPOINT COMPACTION. \
Create a handoff summary for another agent instance that will resume this task.\n\n\
Include:\n\
- Current progress and key decisions made\n\
- Important context, constraints, or user preferences that must be respected\n\
- What remains to be done (clear next steps)\n\
- Any critical data, file paths, command outputs, or references needed to continue\n\n\
If a prior <conversation-summary> block exists in this conversation, produce an UPDATED \
summary that supersedes it (incorporating all activity since). \
Output only the summary text — no preamble, no closing remarks.";
#[async_trait]
impl CompactionStrategy for SummarizeCompactionStrategy {
fn should_compact(&self, messages: &[ChatMessage], context_window_tokens: u64) -> bool {
if messages.len() <= self.tail_min_messages {
return false;
}
let tokens = estimate_messages_tokens(messages);
let threshold = ((context_window_tokens as f64) * self.trigger_fraction).round() as u64;
tokens > threshold
}
async fn compact(
&self,
messages: Vec<ChatMessage>,
ctx: &CompactionContext,
) -> Result<CompactionOutcome, CompactionError> {
if messages.len() <= self.tail_min_messages {
return Ok(CompactionOutcome {
messages,
usage: None,
});
}
let mut summarize_messages = messages.clone();
summarize_messages.push(ChatMessage::User {
content: self.summary_prompt.clone(),
attachments: vec![],
});
let request = ModelTurnInput {
system_prompt: ctx.system_prompt.clone(),
messages: summarize_messages,
tools: ctx.tools.clone(),
tool_choice: crate::model::ToolChoice::Auto,
parallel_tool_calls: None,
};
let stream = ctx.model_client.stream(request).await?;
let response = collect_model_response(stream).await?;
let (summary_text, usage) = match response {
ModelResponse::Message { text, usage, .. } => (text, usage),
ModelResponse::ToolCall { .. } => return Err(CompactionError::EmptySummary),
};
if summary_text.trim().is_empty() {
return Err(CompactionError::EmptySummary);
}
let user_texts = collect_user_message_texts(&messages);
if user_texts.is_empty() {
return Ok(CompactionOutcome { messages, usage });
}
let out = build_compacted_history(
&user_texts,
&summary_text,
self.user_message_token_budget,
);
Ok(CompactionOutcome {
messages: out,
usage,
})
}
}
fn serialize_summary(summary: &str) -> String {
format!("<conversation-summary>\n{summary}\n</conversation-summary>")
}
fn collect_user_message_texts(messages: &[ChatMessage]) -> Vec<String> {
messages
.iter()
.filter_map(|m| match m {
ChatMessage::User { content, .. } if !is_summary_message(content) => {
Some(content.clone())
}
_ => None,
})
.collect()
}
fn is_summary_message(content: &str) -> bool {
content.trim_start().starts_with("<conversation-summary>")
}
fn build_compacted_history(
user_texts: &[String],
summary_text: &str,
token_budget: u64,
) -> Vec<ChatMessage> {
let mut selected: Vec<String> = Vec::new();
let mut remaining = token_budget;
for text in user_texts.iter().rev() {
if remaining == 0 {
break;
}
let tokens = estimate_tokens(text);
if tokens <= remaining {
selected.push(text.clone());
remaining -= tokens;
} else {
selected.push(truncate_to_token_budget(text, remaining));
break;
}
}
selected.reverse(); let mut out = Vec::with_capacity(selected.len() + 1);
for text in selected {
out.push(ChatMessage::User {
content: text,
attachments: vec![],
});
}
out.push(ChatMessage::User {
content: serialize_summary(summary_text),
attachments: vec![],
});
out
}
fn truncate_to_token_budget(s: &str, budget: u64) -> String {
if budget == 0 {
return String::new();
}
let mut ascii: u64 = 0;
let mut non_ascii: u64 = 0;
let mut end = 0usize;
for (byte_pos, c) in s.char_indices() {
let (na, nn) = if c.is_ascii() {
(ascii + 1, non_ascii)
} else {
(ascii, non_ascii + 1)
};
if na.div_ceil(4) + nn > budget {
break;
}
ascii = na;
non_ascii = nn;
end = byte_pos + c.len_utf8();
}
s[..end].to_string()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::model::{ModelChunk, ModelClient};
use crate::tools::ToolInvocation;
use async_trait::async_trait;
use futures::stream::{BoxStream, StreamExt};
#[derive(Clone)]
struct FixedSummaryClient {
summary: String,
}
#[async_trait]
impl ModelClient for FixedSummaryClient {
async fn stream(
&self,
_input: ModelTurnInput,
) -> Result<BoxStream<'static, Result<ModelChunk, ModelClientError>>, ModelClientError>
{
let chunks = vec![
Ok(ModelChunk::TextDelta {
msg_id: "sum".into(),
delta: self.summary.clone(),
}),
Ok(ModelChunk::Done {
stop_reason: "end_turn".into(),
usage: None,
}),
];
Ok(futures::stream::iter(chunks).boxed())
}
}
fn user(s: &str) -> ChatMessage {
ChatMessage::User {
content: s.into(),
attachments: vec![],
}
}
fn assistant_text(s: &str) -> ChatMessage {
ChatMessage::Assistant {
text: Some(s.into()),
tool_calls: vec![],
thinking: None,
}
}
fn tool_msg(id: &str, content: &str) -> ChatMessage {
ChatMessage::Tool {
tool_call_id: id.into(),
content: content.into(),
is_error: false,
attachments: vec![],
}
}
#[test]
fn token_estimate_grows_with_content_size() {
let small = user("hi");
let big = user(&"x".repeat(8000));
assert!(estimate_chat_message_tokens(&big) > estimate_chat_message_tokens(&small));
}
#[test]
fn estimate_tokens_splits_ascii_and_cjk() {
assert_eq!(estimate_tokens(""), 0);
assert_eq!(estimate_tokens(" \n"), 0);
assert_eq!(estimate_tokens("abcd"), 1);
assert_eq!(estimate_tokens("abcde"), 2); assert_eq!(estimate_tokens("你好世界"), 4); assert_eq!(estimate_tokens("hi你好"), 3); }
#[test]
fn token_estimate_counts_cjk_near_one_per_char() {
let cjk = user(&"汉".repeat(1000));
let estimate = estimate_chat_message_tokens(&cjk);
assert!(
estimate >= 1000,
"CJK undercounted: got {estimate}, want >= 1000"
);
}
#[test]
fn token_estimate_includes_tool_call_input() {
let bare = assistant_text("done");
let with_tool = ChatMessage::Assistant {
text: Some("done".into()),
tool_calls: vec![ToolInvocation {
id: "tc".into(),
name: "bash".into(),
input: serde_json::json!({"command": "echo lots of bytes here for sure"}),
}],
thinking: None,
};
assert!(estimate_chat_message_tokens(&with_tool) > estimate_chat_message_tokens(&bare));
}
#[test]
fn context_window_table_known_models() {
assert_eq!(resolve_context_window_tokens("claude-opus-4-7"), 1_000_000);
assert_eq!(
resolve_context_window_tokens("claude-sonnet-4-6"),
1_000_000
);
assert_eq!(resolve_context_window_tokens("claude-haiku-4-5"), 200_000);
assert_eq!(resolve_context_window_tokens("claude-3-5-sonnet"), 200_000);
assert_eq!(resolve_context_window_tokens("gpt-4o"), 128_000);
assert_eq!(resolve_context_window_tokens("gpt-4.1-mini"), 128_000);
assert_eq!(resolve_context_window_tokens("o3-mini"), 200_000);
assert_eq!(resolve_context_window_tokens("MiniMax-M2"), 1_000_000);
assert_eq!(resolve_context_window_tokens("unknown-model"), 128_000);
}
#[test]
fn should_compact_skips_when_below_threshold() {
let strat = SummarizeCompactionStrategy::default();
let messages = vec![user("hello"), assistant_text("hi")];
assert!(!strat.should_compact(&messages, 200_000));
}
#[test]
fn should_compact_fires_when_above_threshold() {
let strat = SummarizeCompactionStrategy::default();
let messages = vec![
user(&"x".repeat(8000)),
assistant_text(&"y".repeat(8000)),
user(&"x".repeat(8000)),
assistant_text(&"y".repeat(8000)),
user(&"x".repeat(8000)),
];
assert!(strat.should_compact(&messages, 11_000));
}
#[test]
fn should_compact_respects_tail_min_floor() {
let strat = SummarizeCompactionStrategy::default();
let messages = vec![
user(&"x".repeat(100_000)),
assistant_text(&"y".repeat(100_000)),
];
assert!(!strat.should_compact(&messages, 1_000));
}
#[tokio::test]
async fn compact_folds_history_into_summary_plus_tail() {
let strat = SummarizeCompactionStrategy::default().with_tail_min_messages(2);
let ctx = CompactionContext {
system_prompt: None,
model_client: Arc::new(FixedSummaryClient {
summary: "we ran ls and grep".into(),
}),
context_window_tokens: 10_000,
tools: vec![],
};
let messages = vec![
user("first user"),
assistant_text("response 1"),
user("second user"),
tool_msg("tc1", "tool result"),
user("third user"),
assistant_text("final response"),
];
let outcome = strat.compact(messages, &ctx).await.unwrap();
let out = outcome.messages;
assert_eq!(out.len(), 4, "3 user messages + 1 summary");
match &out[0] {
ChatMessage::User { content, .. } => assert_eq!(content, "first user"),
other => panic!("expected User at [0], got {other:?}"),
}
match &out[1] {
ChatMessage::User { content, .. } => assert_eq!(content, "second user"),
other => panic!("expected User at [1], got {other:?}"),
}
match &out[2] {
ChatMessage::User { content, .. } => assert_eq!(content, "third user"),
other => panic!("expected User at [2], got {other:?}"),
}
assert!(
matches!(&out[3], ChatMessage::User { content, .. }
if content.contains("<conversation-summary>") && content.contains("we ran ls and grep"))
);
assert!(out.len() < 6);
assert!(outcome.usage.is_none());
}
#[tokio::test]
async fn compact_returns_empty_summary_error_on_blank_response() {
let strat = SummarizeCompactionStrategy::default().with_tail_min_messages(2);
let ctx = CompactionContext {
system_prompt: None,
model_client: Arc::new(FixedSummaryClient { summary: "".into() }),
context_window_tokens: 10_000,
tools: vec![],
};
let messages = vec![
user("a"),
assistant_text("b"),
user("c"),
assistant_text("d"),
];
let err = strat.compact(messages, &ctx).await.unwrap_err();
assert!(matches!(err, CompactionError::EmptySummary));
}
#[tokio::test]
async fn compact_skips_when_messages_at_or_below_tail_min() {
let strat = SummarizeCompactionStrategy::default().with_tail_min_messages(4);
let ctx = CompactionContext {
system_prompt: None,
model_client: Arc::new(FixedSummaryClient {
summary: "irrelevant".into(),
}),
context_window_tokens: 1_000,
tools: vec![],
};
let messages = vec![
user("1"),
assistant_text("2"),
user("3"),
assistant_text("4"),
];
let outcome = strat.compact(messages.clone(), &ctx).await.unwrap();
assert_eq!(outcome.messages, messages);
assert!(outcome.usage.is_none());
}
}