use async_trait::async_trait;
use std::sync::Arc;
use crate::model::{
collect_model_response, ChatMessage, ModelClient, ModelClientError, ModelResponse,
ModelTurnInput,
};
use crate::tools::ToolSpec;
pub const DEFAULT_TRIGGER_FRACTION: f64 = 0.75;
pub const DEFAULT_TAIL_MIN_MESSAGES: usize = 4;
pub const DEFAULT_SUMMARY_MAX_TOKENS: i32 = 2_000;
pub fn estimate_tokens(s: &str) -> u64 {
if s.trim().is_empty() {
return 0;
}
let mut ascii: u64 = 0;
let mut non_ascii: u64 = 0;
for c in s.chars() {
if c.is_ascii() {
ascii += 1;
} else {
non_ascii += 1;
}
}
ascii.div_ceil(4) + non_ascii
}
pub fn estimate_chat_message_tokens(m: &ChatMessage) -> u64 {
let tokens = match m {
ChatMessage::User { content, .. } => estimate_tokens(content),
ChatMessage::Assistant {
text,
tool_calls,
thinking,
} => {
let text_tokens = text.as_deref().map(estimate_tokens).unwrap_or(0);
let tc_tokens: u64 = tool_calls
.iter()
.map(|tc| estimate_tokens(&tc.input.to_string()) + estimate_tokens(&tc.name) + 8)
.sum();
let thinking_tokens = thinking
.as_ref()
.map(|t| {
estimate_tokens(&t.text)
+ t.signature.as_deref().map(estimate_tokens).unwrap_or(0)
})
.unwrap_or(0);
text_tokens + tc_tokens + thinking_tokens
}
ChatMessage::Tool { content, .. } => estimate_tokens(content) + 16,
};
tokens.max(1)
}
pub fn estimate_messages_tokens(messages: &[ChatMessage]) -> u64 {
messages.iter().map(estimate_chat_message_tokens).sum()
}
pub fn resolve_context_window_tokens(model: &str) -> u64 {
let m = model.to_ascii_lowercase();
if m.contains("opus-4-7") || m.contains("opus-4-6") || m.contains("sonnet-4-6") {
return 1_000_000;
}
if m.contains("claude") {
return 200_000;
}
if m.contains("gpt-4") || m.contains("gpt-4o") || m.contains("gpt-4.1") {
return 128_000;
}
if m.starts_with("o1") || m.starts_with("o3") || m.starts_with("o4") {
return 200_000;
}
if m.contains("minimax") || m.contains("deepseek") {
return 1_000_000;
}
128_000
}
pub struct CompactionContext {
pub system_prompt: Option<String>,
pub model_client: Arc<dyn ModelClient>,
pub context_window_tokens: u64,
pub tools: Vec<ToolSpec>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct CompactionOutcome {
pub messages: Vec<ChatMessage>,
pub usage: Option<crate::event::HarnessUsage>,
}
#[derive(Debug, thiserror::Error)]
pub enum CompactionError {
#[error("compaction model call failed: {0}")]
ModelCall(#[from] ModelClientError),
#[error("model produced empty summary; refusing to fold history")]
EmptySummary,
}
#[async_trait]
pub trait CompactionStrategy: Send + Sync {
fn should_compact(&self, messages: &[ChatMessage], context_window_tokens: u64) -> bool;
async fn compact(
&self,
messages: Vec<ChatMessage>,
ctx: &CompactionContext,
) -> Result<CompactionOutcome, CompactionError>;
}
pub struct SummarizeCompactionStrategy {
pub trigger_fraction: f64,
pub tail_min_messages: usize,
pub summary_max_tokens: i32,
pub summary_prompt: String,
}
impl Default for SummarizeCompactionStrategy {
fn default() -> Self {
Self {
trigger_fraction: DEFAULT_TRIGGER_FRACTION,
tail_min_messages: DEFAULT_TAIL_MIN_MESSAGES,
summary_max_tokens: DEFAULT_SUMMARY_MAX_TOKENS,
summary_prompt: DEFAULT_SUMMARY_PROMPT.into(),
}
}
}
impl SummarizeCompactionStrategy {
pub fn with_trigger_fraction(mut self, fraction: f64) -> Self {
self.trigger_fraction = fraction;
self
}
pub fn with_tail_min_messages(mut self, n: usize) -> Self {
self.tail_min_messages = n;
self
}
pub fn with_summary_max_tokens(mut self, n: i32) -> Self {
self.summary_max_tokens = n;
self
}
}
pub const DEFAULT_SUMMARY_PROMPT: &str = "Produce a concise summary of the conversation above. \
Preserve: key decisions, file paths, command outputs, in-flight tasks, and explicit \
next steps. If a prior <conversation-summary> block exists in this conversation, \
produce an UPDATED summary that supersedes it (incorporating new activity since). \
Output only the summary text — no preamble, no closing remarks.";
#[async_trait]
impl CompactionStrategy for SummarizeCompactionStrategy {
fn should_compact(&self, messages: &[ChatMessage], context_window_tokens: u64) -> bool {
if messages.len() <= self.tail_min_messages {
return false;
}
let tokens = estimate_messages_tokens(messages);
let threshold = ((context_window_tokens as f64) * self.trigger_fraction).round() as u64;
tokens > threshold
}
async fn compact(
&self,
messages: Vec<ChatMessage>,
ctx: &CompactionContext,
) -> Result<CompactionOutcome, CompactionError> {
if messages.len() <= self.tail_min_messages {
return Ok(CompactionOutcome {
messages,
usage: None,
});
}
let mut summarize_messages = messages.clone();
summarize_messages.push(ChatMessage::User {
content: self.summary_prompt.clone(),
attachments: vec![],
});
let request = ModelTurnInput {
system_prompt: ctx.system_prompt.clone(),
messages: summarize_messages,
tools: ctx.tools.clone(),
tool_choice: crate::model::ToolChoice::Auto,
parallel_tool_calls: None,
};
let stream = ctx.model_client.stream(request).await?;
let response = collect_model_response(stream).await?;
let (summary_text, usage) = match response {
ModelResponse::Message { text, usage, .. } => (text, usage),
ModelResponse::ToolCall { .. } => return Err(CompactionError::EmptySummary),
};
if summary_text.trim().is_empty() {
return Err(CompactionError::EmptySummary);
}
let total = messages.len();
let tail_count = self.tail_min_messages.min(total);
let mut tail_start = total - tail_count;
while tail_start < total && !matches!(messages[tail_start], ChatMessage::User { .. }) {
tail_start += 1;
}
let tail: Vec<ChatMessage> = if tail_start < total {
messages[tail_start..].to_vec()
} else {
return Ok(CompactionOutcome { messages, usage });
};
let mut out = Vec::with_capacity(tail.len() + 1);
out.push(ChatMessage::User {
content: serialize_summary(&summary_text),
attachments: vec![],
});
out.extend(tail);
Ok(CompactionOutcome {
messages: out,
usage,
})
}
}
fn serialize_summary(summary: &str) -> String {
format!("<conversation-summary>\n{summary}\n</conversation-summary>")
}
#[cfg(test)]
mod tests {
use super::*;
use crate::model::{ModelChunk, ModelClient};
use crate::tools::ToolInvocation;
use async_trait::async_trait;
use futures::stream::{BoxStream, StreamExt};
#[derive(Clone)]
struct FixedSummaryClient {
summary: String,
}
#[async_trait]
impl ModelClient for FixedSummaryClient {
async fn stream(
&self,
_input: ModelTurnInput,
) -> Result<BoxStream<'static, Result<ModelChunk, ModelClientError>>, ModelClientError>
{
let chunks = vec![
Ok(ModelChunk::TextDelta {
msg_id: "sum".into(),
delta: self.summary.clone(),
}),
Ok(ModelChunk::Done {
stop_reason: "end_turn".into(),
usage: None,
}),
];
Ok(futures::stream::iter(chunks).boxed())
}
}
fn user(s: &str) -> ChatMessage {
ChatMessage::User {
content: s.into(),
attachments: vec![],
}
}
fn assistant_text(s: &str) -> ChatMessage {
ChatMessage::Assistant {
text: Some(s.into()),
tool_calls: vec![],
thinking: None,
}
}
fn tool_msg(id: &str, content: &str) -> ChatMessage {
ChatMessage::Tool {
tool_call_id: id.into(),
content: content.into(),
is_error: false,
attachments: vec![],
}
}
#[test]
fn token_estimate_grows_with_content_size() {
let small = user("hi");
let big = user(&"x".repeat(8000));
assert!(estimate_chat_message_tokens(&big) > estimate_chat_message_tokens(&small));
}
#[test]
fn estimate_tokens_splits_ascii_and_cjk() {
assert_eq!(estimate_tokens(""), 0);
assert_eq!(estimate_tokens(" \n"), 0);
assert_eq!(estimate_tokens("abcd"), 1);
assert_eq!(estimate_tokens("abcde"), 2); assert_eq!(estimate_tokens("你好世界"), 4); assert_eq!(estimate_tokens("hi你好"), 3); }
#[test]
fn token_estimate_counts_cjk_near_one_per_char() {
let cjk = user(&"汉".repeat(1000));
let estimate = estimate_chat_message_tokens(&cjk);
assert!(
estimate >= 1000,
"CJK undercounted: got {estimate}, want >= 1000"
);
}
#[test]
fn token_estimate_includes_tool_call_input() {
let bare = assistant_text("done");
let with_tool = ChatMessage::Assistant {
text: Some("done".into()),
tool_calls: vec![ToolInvocation {
id: "tc".into(),
name: "bash".into(),
input: serde_json::json!({"command": "echo lots of bytes here for sure"}),
}],
thinking: None,
};
assert!(estimate_chat_message_tokens(&with_tool) > estimate_chat_message_tokens(&bare));
}
#[test]
fn context_window_table_known_models() {
assert_eq!(resolve_context_window_tokens("claude-opus-4-7"), 1_000_000);
assert_eq!(
resolve_context_window_tokens("claude-sonnet-4-6"),
1_000_000
);
assert_eq!(resolve_context_window_tokens("claude-haiku-4-5"), 200_000);
assert_eq!(resolve_context_window_tokens("claude-3-5-sonnet"), 200_000);
assert_eq!(resolve_context_window_tokens("gpt-4o"), 128_000);
assert_eq!(resolve_context_window_tokens("gpt-4.1-mini"), 128_000);
assert_eq!(resolve_context_window_tokens("o3-mini"), 200_000);
assert_eq!(resolve_context_window_tokens("MiniMax-M2"), 1_000_000);
assert_eq!(resolve_context_window_tokens("unknown-model"), 128_000);
}
#[test]
fn should_compact_skips_when_below_threshold() {
let strat = SummarizeCompactionStrategy::default();
let messages = vec![user("hello"), assistant_text("hi")];
assert!(!strat.should_compact(&messages, 200_000));
}
#[test]
fn should_compact_fires_when_above_threshold() {
let strat = SummarizeCompactionStrategy::default();
let messages = vec![
user(&"x".repeat(8000)),
assistant_text(&"y".repeat(8000)),
user(&"x".repeat(8000)),
assistant_text(&"y".repeat(8000)),
user(&"x".repeat(8000)),
];
assert!(strat.should_compact(&messages, 12_000));
}
#[test]
fn should_compact_respects_tail_min_floor() {
let strat = SummarizeCompactionStrategy::default();
let messages = vec![
user(&"x".repeat(100_000)),
assistant_text(&"y".repeat(100_000)),
];
assert!(!strat.should_compact(&messages, 1_000));
}
#[tokio::test]
async fn compact_folds_history_into_summary_plus_tail() {
let strat = SummarizeCompactionStrategy::default().with_tail_min_messages(2);
let ctx = CompactionContext {
system_prompt: None,
model_client: Arc::new(FixedSummaryClient {
summary: "we ran ls and grep".into(),
}),
context_window_tokens: 10_000,
tools: vec![],
};
let messages = vec![
user("first user"),
assistant_text("response 1"),
user("second user"),
tool_msg("tc1", "tool result"),
user("third user"),
assistant_text("final response"),
];
let outcome = strat.compact(messages, &ctx).await.unwrap();
let out = outcome.messages;
assert!(
matches!(&out[0], ChatMessage::User { content, .. } if content.contains("<conversation-summary>") && content.contains("we ran ls and grep"))
);
match &out[1] {
ChatMessage::User { content, .. } => assert_eq!(content, "third user"),
other => panic!("expected User in tail, got {other:?}"),
}
assert!(out.len() < 6);
assert!(outcome.usage.is_none());
}
#[tokio::test]
async fn compact_returns_empty_summary_error_on_blank_response() {
let strat = SummarizeCompactionStrategy::default().with_tail_min_messages(2);
let ctx = CompactionContext {
system_prompt: None,
model_client: Arc::new(FixedSummaryClient { summary: "".into() }),
context_window_tokens: 10_000,
tools: vec![],
};
let messages = vec![
user("a"),
assistant_text("b"),
user("c"),
assistant_text("d"),
];
let err = strat.compact(messages, &ctx).await.unwrap_err();
assert!(matches!(err, CompactionError::EmptySummary));
}
#[tokio::test]
async fn compact_skips_when_messages_at_or_below_tail_min() {
let strat = SummarizeCompactionStrategy::default().with_tail_min_messages(4);
let ctx = CompactionContext {
system_prompt: None,
model_client: Arc::new(FixedSummaryClient {
summary: "irrelevant".into(),
}),
context_window_tokens: 1_000,
tools: vec![],
};
let messages = vec![
user("1"),
assistant_text("2"),
user("3"),
assistant_text("4"),
];
let outcome = strat.compact(messages.clone(), &ctx).await.unwrap();
assert_eq!(outcome.messages, messages);
assert!(outcome.usage.is_none());
}
}