1use async_trait::async_trait;
28use std::sync::Arc;
29
30use crate::model::{
31 collect_model_response, ChatMessage, ModelClient, ModelClientError, ModelResponse,
32 ModelTurnInput,
33};
34use crate::tools::ToolSpec;
35
36pub const DEFAULT_TRIGGER_FRACTION: f64 = 0.90;
40
41pub const DEFAULT_TAIL_MIN_MESSAGES: usize = 4;
45
46pub const DEFAULT_SUMMARY_MAX_TOKENS: i32 = 2_000;
51
52pub const DEFAULT_USER_MESSAGE_TOKEN_BUDGET: u64 = 20_000;
57
58pub fn estimate_tokens(s: &str) -> u64 {
66 if s.trim().is_empty() {
67 return 0;
68 }
69 let mut ascii: u64 = 0;
70 let mut non_ascii: u64 = 0;
71 for c in s.chars() {
72 if c.is_ascii() {
73 ascii += 1;
74 } else {
75 non_ascii += 1;
76 }
77 }
78 ascii.div_ceil(4) + non_ascii
79}
80
81pub fn estimate_chat_message_tokens(m: &ChatMessage) -> u64 {
86 let tokens = match m {
87 ChatMessage::User { content, .. } => estimate_tokens(content),
88 ChatMessage::Assistant {
89 text,
90 tool_calls,
91 thinking,
92 } => {
93 let text_tokens = text.as_deref().map(estimate_tokens).unwrap_or(0);
94 let tc_tokens: u64 = tool_calls
95 .iter()
96 .map(|tc| estimate_tokens(&tc.input.to_string()) + estimate_tokens(&tc.name) + 8)
97 .sum();
98 let thinking_tokens = thinking
99 .as_ref()
100 .map(|t| {
101 estimate_tokens(&t.text)
102 + t.signature.as_deref().map(estimate_tokens).unwrap_or(0)
103 })
104 .unwrap_or(0);
105 text_tokens + tc_tokens + thinking_tokens
106 }
107 ChatMessage::Tool { content, .. } => estimate_tokens(content) + 16,
108 };
109 tokens.max(1)
110}
111
112pub fn estimate_messages_tokens(messages: &[ChatMessage]) -> u64 {
115 messages.iter().map(estimate_chat_message_tokens).sum()
116}
117
118pub fn resolve_context_window_tokens(model: &str) -> u64 {
127 let m = model.to_ascii_lowercase();
128 if m.contains("opus-4-7") || m.contains("opus-4-6") || m.contains("sonnet-4-6") {
130 return 1_000_000;
131 }
132 if m.contains("claude") {
134 return 200_000;
135 }
136 if m.contains("gpt-4") || m.contains("gpt-4o") || m.contains("gpt-4.1") {
138 return 128_000;
139 }
140 if m.starts_with("o1") || m.starts_with("o3") || m.starts_with("o4") {
142 return 200_000;
143 }
144 if m.contains("minimax") || m.contains("deepseek") {
146 return 1_000_000;
147 }
148 128_000
150}
151
152pub struct CompactionContext {
156 pub system_prompt: Option<String>,
157 pub model_client: Arc<dyn ModelClient>,
158 pub context_window_tokens: u64,
159 pub tools: Vec<ToolSpec>,
160}
161
162#[derive(Debug, Clone, PartialEq)]
168pub struct CompactionOutcome {
169 pub messages: Vec<ChatMessage>,
170 pub usage: Option<crate::event::HarnessUsage>,
171}
172
173#[derive(Debug, thiserror::Error)]
174pub enum CompactionError {
175 #[error("compaction model call failed: {0}")]
176 ModelCall(#[from] ModelClientError),
177 #[error("model produced empty summary; refusing to fold history")]
182 EmptySummary,
183}
184
185#[async_trait]
186pub trait CompactionStrategy: Send + Sync {
187 fn should_compact(&self, messages: &[ChatMessage], context_window_tokens: u64) -> bool;
189
190 async fn compact(
195 &self,
196 messages: Vec<ChatMessage>,
197 ctx: &CompactionContext,
198 ) -> Result<CompactionOutcome, CompactionError>;
199}
200
201pub struct SummarizeCompactionStrategy {
215 pub trigger_fraction: f64,
216 pub tail_min_messages: usize,
219 pub summary_max_tokens: i32,
220 pub summary_prompt: String,
221 pub user_message_token_budget: u64,
223}
224
225impl Default for SummarizeCompactionStrategy {
226 fn default() -> Self {
227 Self {
228 trigger_fraction: DEFAULT_TRIGGER_FRACTION,
229 tail_min_messages: DEFAULT_TAIL_MIN_MESSAGES,
230 summary_max_tokens: DEFAULT_SUMMARY_MAX_TOKENS,
231 summary_prompt: DEFAULT_SUMMARY_PROMPT.into(),
232 user_message_token_budget: DEFAULT_USER_MESSAGE_TOKEN_BUDGET,
233 }
234 }
235}
236
237impl SummarizeCompactionStrategy {
238 pub fn with_trigger_fraction(mut self, fraction: f64) -> Self {
239 self.trigger_fraction = fraction;
240 self
241 }
242
243 pub fn with_tail_min_messages(mut self, n: usize) -> Self {
244 self.tail_min_messages = n;
245 self
246 }
247
248 pub fn with_summary_max_tokens(mut self, n: i32) -> Self {
249 self.summary_max_tokens = n;
250 self
251 }
252
253 pub fn with_user_message_token_budget(mut self, budget: u64) -> Self {
254 self.user_message_token_budget = budget;
255 self
256 }
257}
258
259pub const DEFAULT_SUMMARY_PROMPT: &str = "You are performing a CONTEXT CHECKPOINT COMPACTION. \
263 Create a handoff summary for another agent instance that will resume this task.\n\n\
264 Include:\n\
265 - Current progress and key decisions made\n\
266 - Important context, constraints, or user preferences that must be respected\n\
267 - What remains to be done (clear next steps)\n\
268 - Any critical data, file paths, command outputs, or references needed to continue\n\n\
269 If a prior <conversation-summary> block exists in this conversation, produce an UPDATED \
270 summary that supersedes it (incorporating all activity since). \
271 Output only the summary text — no preamble, no closing remarks.";
272
273#[async_trait]
274impl CompactionStrategy for SummarizeCompactionStrategy {
275 fn should_compact(&self, messages: &[ChatMessage], context_window_tokens: u64) -> bool {
276 if messages.len() <= self.tail_min_messages {
279 return false;
280 }
281 let tokens = estimate_messages_tokens(messages);
282 let threshold = ((context_window_tokens as f64) * self.trigger_fraction).round() as u64;
283 tokens > threshold
284 }
285
286 async fn compact(
287 &self,
288 messages: Vec<ChatMessage>,
289 ctx: &CompactionContext,
290 ) -> Result<CompactionOutcome, CompactionError> {
291 if messages.len() <= self.tail_min_messages {
295 return Ok(CompactionOutcome {
296 messages,
297 usage: None,
298 });
299 }
300
301 let mut summarize_messages = messages.clone();
307 summarize_messages.push(ChatMessage::User {
308 content: self.summary_prompt.clone(),
309 attachments: vec![],
310 });
311 let request = ModelTurnInput {
312 system_prompt: ctx.system_prompt.clone(),
313 messages: summarize_messages,
314 tools: ctx.tools.clone(),
315 tool_choice: crate::model::ToolChoice::Auto,
316 parallel_tool_calls: None,
317 };
318
319 let stream = ctx.model_client.stream(request).await?;
326 let response = collect_model_response(stream).await?;
327 let (summary_text, usage) = match response {
328 ModelResponse::Message { text, usage, .. } => (text, usage),
329 ModelResponse::ToolCall { .. } => return Err(CompactionError::EmptySummary),
332 };
333 if summary_text.trim().is_empty() {
334 return Err(CompactionError::EmptySummary);
335 }
336
337 let user_texts = collect_user_message_texts(&messages);
340 if user_texts.is_empty() {
341 return Ok(CompactionOutcome { messages, usage });
344 }
345
346 let out = build_compacted_history(
351 &user_texts,
352 &summary_text,
353 self.user_message_token_budget,
354 );
355 Ok(CompactionOutcome {
356 messages: out,
357 usage,
358 })
359 }
360}
361
362fn serialize_summary(summary: &str) -> String {
363 format!("<conversation-summary>\n{summary}\n</conversation-summary>")
364}
365
366fn collect_user_message_texts(messages: &[ChatMessage]) -> Vec<String> {
370 messages
371 .iter()
372 .filter_map(|m| match m {
373 ChatMessage::User { content, .. } if !is_summary_message(content) => {
374 Some(content.clone())
375 }
376 _ => None,
377 })
378 .collect()
379}
380
381fn is_summary_message(content: &str) -> bool {
382 content.trim_start().starts_with("<conversation-summary>")
383}
384
385fn build_compacted_history(
392 user_texts: &[String],
393 summary_text: &str,
394 token_budget: u64,
395) -> Vec<ChatMessage> {
396 let mut selected: Vec<String> = Vec::new();
397 let mut remaining = token_budget;
398 for text in user_texts.iter().rev() {
399 if remaining == 0 {
400 break;
401 }
402 let tokens = estimate_tokens(text);
403 if tokens <= remaining {
404 selected.push(text.clone());
405 remaining -= tokens;
406 } else {
407 selected.push(truncate_to_token_budget(text, remaining));
410 break;
411 }
412 }
413 selected.reverse(); let mut out = Vec::with_capacity(selected.len() + 1);
415 for text in selected {
416 out.push(ChatMessage::User {
417 content: text,
418 attachments: vec![],
419 });
420 }
421 out.push(ChatMessage::User {
423 content: serialize_summary(summary_text),
424 attachments: vec![],
425 });
426 out
427}
428
429fn truncate_to_token_budget(s: &str, budget: u64) -> String {
433 if budget == 0 {
434 return String::new();
435 }
436 let mut ascii: u64 = 0;
437 let mut non_ascii: u64 = 0;
438 let mut end = 0usize;
439 for (byte_pos, c) in s.char_indices() {
440 let (na, nn) = if c.is_ascii() {
441 (ascii + 1, non_ascii)
442 } else {
443 (ascii, non_ascii + 1)
444 };
445 if na.div_ceil(4) + nn > budget {
446 break;
447 }
448 ascii = na;
449 non_ascii = nn;
450 end = byte_pos + c.len_utf8();
451 }
452 s[..end].to_string()
453}
454
455#[cfg(test)]
456mod tests {
457 use super::*;
458 use crate::model::{ModelChunk, ModelClient};
459 use crate::tools::ToolInvocation;
460 use async_trait::async_trait;
461 use futures::stream::{BoxStream, StreamExt};
462
463 #[derive(Clone)]
467 struct FixedSummaryClient {
468 summary: String,
469 }
470 #[async_trait]
471 impl ModelClient for FixedSummaryClient {
472 async fn stream(
473 &self,
474 _input: ModelTurnInput,
475 ) -> Result<BoxStream<'static, Result<ModelChunk, ModelClientError>>, ModelClientError>
476 {
477 let chunks = vec![
478 Ok(ModelChunk::TextDelta {
479 msg_id: "sum".into(),
480 delta: self.summary.clone(),
481 }),
482 Ok(ModelChunk::Done {
483 stop_reason: "end_turn".into(),
484 usage: None,
485 }),
486 ];
487 Ok(futures::stream::iter(chunks).boxed())
488 }
489 }
490
491 fn user(s: &str) -> ChatMessage {
492 ChatMessage::User {
493 content: s.into(),
494 attachments: vec![],
495 }
496 }
497
498 fn assistant_text(s: &str) -> ChatMessage {
499 ChatMessage::Assistant {
500 text: Some(s.into()),
501 tool_calls: vec![],
502 thinking: None,
503 }
504 }
505
506 fn tool_msg(id: &str, content: &str) -> ChatMessage {
507 ChatMessage::Tool {
508 tool_call_id: id.into(),
509 content: content.into(),
510 is_error: false,
511 attachments: vec![],
512 }
513 }
514
515 #[test]
516 fn token_estimate_grows_with_content_size() {
517 let small = user("hi");
518 let big = user(&"x".repeat(8000));
519 assert!(estimate_chat_message_tokens(&big) > estimate_chat_message_tokens(&small));
520 }
521
522 #[test]
523 fn estimate_tokens_splits_ascii_and_cjk() {
524 assert_eq!(estimate_tokens(""), 0);
527 assert_eq!(estimate_tokens(" \n"), 0);
528 assert_eq!(estimate_tokens("abcd"), 1);
529 assert_eq!(estimate_tokens("abcde"), 2); assert_eq!(estimate_tokens("你好世界"), 4); assert_eq!(estimate_tokens("hi你好"), 3); }
533
534 #[test]
535 fn token_estimate_counts_cjk_near_one_per_char() {
536 let cjk = user(&"汉".repeat(1000));
540 let estimate = estimate_chat_message_tokens(&cjk);
541 assert!(
542 estimate >= 1000,
543 "CJK undercounted: got {estimate}, want >= 1000"
544 );
545 }
546
547 #[test]
548 fn token_estimate_includes_tool_call_input() {
549 let bare = assistant_text("done");
552 let with_tool = ChatMessage::Assistant {
553 text: Some("done".into()),
554 tool_calls: vec![ToolInvocation {
555 id: "tc".into(),
556 name: "bash".into(),
557 input: serde_json::json!({"command": "echo lots of bytes here for sure"}),
558 }],
559 thinking: None,
560 };
561 assert!(estimate_chat_message_tokens(&with_tool) > estimate_chat_message_tokens(&bare));
562 }
563
564 #[test]
565 fn context_window_table_known_models() {
566 assert_eq!(resolve_context_window_tokens("claude-opus-4-7"), 1_000_000);
567 assert_eq!(
568 resolve_context_window_tokens("claude-sonnet-4-6"),
569 1_000_000
570 );
571 assert_eq!(resolve_context_window_tokens("claude-haiku-4-5"), 200_000);
572 assert_eq!(resolve_context_window_tokens("claude-3-5-sonnet"), 200_000);
573 assert_eq!(resolve_context_window_tokens("gpt-4o"), 128_000);
574 assert_eq!(resolve_context_window_tokens("gpt-4.1-mini"), 128_000);
575 assert_eq!(resolve_context_window_tokens("o3-mini"), 200_000);
576 assert_eq!(resolve_context_window_tokens("MiniMax-M2"), 1_000_000);
577 assert_eq!(resolve_context_window_tokens("unknown-model"), 128_000);
579 }
580
581 #[test]
582 fn should_compact_skips_when_below_threshold() {
583 let strat = SummarizeCompactionStrategy::default();
584 let messages = vec![user("hello"), assistant_text("hi")];
585 assert!(!strat.should_compact(&messages, 200_000));
587 }
588
589 #[test]
590 fn should_compact_fires_when_above_threshold() {
591 let strat = SummarizeCompactionStrategy::default();
592 let messages = vec![
596 user(&"x".repeat(8000)),
597 assistant_text(&"y".repeat(8000)),
598 user(&"x".repeat(8000)),
599 assistant_text(&"y".repeat(8000)),
600 user(&"x".repeat(8000)),
601 ];
602 assert!(strat.should_compact(&messages, 11_000));
603 }
604
605 #[test]
606 fn should_compact_respects_tail_min_floor() {
607 let strat = SummarizeCompactionStrategy::default();
608 let messages = vec![
610 user(&"x".repeat(100_000)),
611 assistant_text(&"y".repeat(100_000)),
612 ];
613 assert!(!strat.should_compact(&messages, 1_000));
614 }
615
616 #[tokio::test]
617 async fn compact_folds_history_into_summary_plus_tail() {
618 let strat = SummarizeCompactionStrategy::default().with_tail_min_messages(2);
619 let ctx = CompactionContext {
620 system_prompt: None,
621 model_client: Arc::new(FixedSummaryClient {
622 summary: "we ran ls and grep".into(),
623 }),
624 context_window_tokens: 10_000,
625 tools: vec![],
626 };
627 let messages = vec![
628 user("first user"),
629 assistant_text("response 1"),
630 user("second user"),
631 tool_msg("tc1", "tool result"),
632 user("third user"),
633 assistant_text("final response"),
634 ];
635 let outcome = strat.compact(messages, &ctx).await.unwrap();
636 let out = outcome.messages;
637 assert_eq!(out.len(), 4, "3 user messages + 1 summary");
640 match &out[0] {
641 ChatMessage::User { content, .. } => assert_eq!(content, "first user"),
642 other => panic!("expected User at [0], got {other:?}"),
643 }
644 match &out[1] {
645 ChatMessage::User { content, .. } => assert_eq!(content, "second user"),
646 other => panic!("expected User at [1], got {other:?}"),
647 }
648 match &out[2] {
649 ChatMessage::User { content, .. } => assert_eq!(content, "third user"),
650 other => panic!("expected User at [2], got {other:?}"),
651 }
652 assert!(
654 matches!(&out[3], ChatMessage::User { content, .. }
655 if content.contains("<conversation-summary>") && content.contains("we ran ls and grep"))
656 );
657 assert!(out.len() < 6);
659 assert!(outcome.usage.is_none());
661 }
662
663 #[tokio::test]
664 async fn compact_returns_empty_summary_error_on_blank_response() {
665 let strat = SummarizeCompactionStrategy::default().with_tail_min_messages(2);
666 let ctx = CompactionContext {
667 system_prompt: None,
668 model_client: Arc::new(FixedSummaryClient { summary: "".into() }),
669 context_window_tokens: 10_000,
670 tools: vec![],
671 };
672 let messages = vec![
673 user("a"),
674 assistant_text("b"),
675 user("c"),
676 assistant_text("d"),
677 ];
678 let err = strat.compact(messages, &ctx).await.unwrap_err();
679 assert!(matches!(err, CompactionError::EmptySummary));
680 }
681
682 #[tokio::test]
683 async fn compact_skips_when_messages_at_or_below_tail_min() {
684 let strat = SummarizeCompactionStrategy::default().with_tail_min_messages(4);
685 let ctx = CompactionContext {
686 system_prompt: None,
687 model_client: Arc::new(FixedSummaryClient {
688 summary: "irrelevant".into(),
689 }),
690 context_window_tokens: 1_000,
691 tools: vec![],
692 };
693 let messages = vec![
694 user("1"),
695 assistant_text("2"),
696 user("3"),
697 assistant_text("4"),
698 ];
699 let outcome = strat.compact(messages.clone(), &ctx).await.unwrap();
700 assert_eq!(outcome.messages, messages);
702 assert!(outcome.usage.is_none());
703 }
704}