1use crate::llm::message::{
21 ContentBlock, Message, MessageLevel, SystemMessage, SystemMessageType, UserMessage,
22};
23use crate::services::tokens;
24use uuid::Uuid;
25
26const AUTOCOMPACT_BUFFER_TOKENS: u64 = 13_000;
28
29const MAX_OUTPUT_TOKENS_FOR_SUMMARY: u64 = 20_000;
31
32const MAX_CONSECUTIVE_FAILURES: u32 = 3;
34
35pub const MAX_OUTPUT_TOKENS_RECOVERY_LIMIT: u32 = 3;
37
38const COMPACTABLE_TOOLS: &[&str] = &["FileRead", "Bash", "Grep", "Glob", "FileEdit", "FileWrite"];
40
41#[derive(Debug, Clone)]
43pub struct TokenWarningState {
44 pub percent_left: u64,
46 pub is_above_warning: bool,
48 pub is_above_error: bool,
50 pub should_compact: bool,
52 pub is_blocking: bool,
54}
55
56#[derive(Debug, Clone, Default)]
58pub struct CompactTracking {
59 pub consecutive_failures: u32,
60 pub was_compacted: bool,
61}
62
63pub fn effective_context_window(model: &str) -> u64 {
65 let context = tokens::context_window_for_model(model);
66 let reserved = tokens::max_output_tokens_for_model(model).min(MAX_OUTPUT_TOKENS_FOR_SUMMARY);
67 context.saturating_sub(reserved)
68}
69
70pub fn auto_compact_threshold(model: &str) -> u64 {
72 effective_context_window(model).saturating_sub(AUTOCOMPACT_BUFFER_TOKENS)
73}
74
75pub fn token_warning_state(messages: &[Message], model: &str) -> TokenWarningState {
77 let token_count = tokens::estimate_context_tokens(messages);
78 let threshold = auto_compact_threshold(model);
79 let effective = effective_context_window(model);
80
81 let percent_left = if effective > 0 {
82 ((effective.saturating_sub(token_count)) as f64 / effective as f64 * 100.0)
83 .round()
84 .max(0.0) as u64
85 } else {
86 0
87 };
88
89 let warning_buffer = 20_000;
90
91 TokenWarningState {
92 percent_left,
93 is_above_warning: token_count >= effective.saturating_sub(warning_buffer),
94 is_above_error: token_count >= effective.saturating_sub(warning_buffer),
95 should_compact: token_count >= threshold,
96 is_blocking: token_count >= effective.saturating_sub(3_000),
97 }
98}
99
100pub fn should_auto_compact(messages: &[Message], model: &str, tracking: &CompactTracking) -> bool {
102 if tracking.consecutive_failures >= MAX_CONSECUTIVE_FAILURES {
104 return false;
105 }
106
107 let state = token_warning_state(messages, model);
108 state.should_compact
109}
110
111pub fn microcompact(messages: &mut [Message], keep_recent: usize) -> u64 {
116 let keep_recent = keep_recent.max(1);
117
118 let mut compactable_indices: Vec<(usize, usize)> = Vec::new(); for (msg_idx, msg) in messages.iter().enumerate() {
122 if let Message::User(u) = msg {
123 for (block_idx, block) in u.content.iter().enumerate() {
124 if let ContentBlock::ToolResult { tool_use_id, .. } = block {
125 if is_compactable_tool_result(messages, tool_use_id) {
127 compactable_indices.push((msg_idx, block_idx));
128 }
129 }
130 }
131 }
132 }
133
134 if compactable_indices.len() <= keep_recent {
135 return 0;
136 }
137
138 let clear_count = compactable_indices.len() - keep_recent;
140 let to_clear = &compactable_indices[..clear_count];
141
142 let mut freed_tokens = 0u64;
143
144 for &(msg_idx, block_idx) in to_clear {
145 if let Message::User(ref mut u) = messages[msg_idx]
146 && let ContentBlock::ToolResult {
147 ref mut content,
148 tool_use_id: _,
149 is_error: _,
150 ..
151 } = u.content[block_idx]
152 {
153 let old_tokens = tokens::estimate_tokens(content);
154 let placeholder = "[Old tool result cleared]".to_string();
155 let new_tokens = tokens::estimate_tokens(&placeholder);
156 *content = placeholder;
157 freed_tokens += old_tokens.saturating_sub(new_tokens);
158 }
159 }
160
161 freed_tokens
162}
163
164fn is_compactable_tool_result(messages: &[Message], tool_use_id: &str) -> bool {
166 for msg in messages {
167 if let Message::Assistant(a) = msg {
168 for block in &a.content {
169 if let ContentBlock::ToolUse { id, name, .. } = block
170 && id == tool_use_id
171 {
172 return COMPACTABLE_TOOLS
173 .iter()
174 .any(|t| t.eq_ignore_ascii_case(name));
175 }
176 }
177 }
178 }
179 false
180}
181
182pub fn compact_boundary_message(summary: &str) -> Message {
184 Message::System(SystemMessage {
185 uuid: Uuid::new_v4(),
186 timestamp: chrono::Utc::now().to_rfc3339(),
187 subtype: SystemMessageType::CompactBoundary,
188 content: format!("[Conversation compacted. Summary: {summary}]"),
189 level: MessageLevel::Info,
190 })
191}
192
193pub fn build_compact_summary_prompt(messages: &[Message]) -> String {
196 let mut context = String::new();
197 for msg in messages {
198 match msg {
199 Message::User(u) => {
200 context.push_str("User: ");
201 for block in &u.content {
202 if let ContentBlock::Text { text } = block {
203 context.push_str(text);
204 }
205 }
206 context.push('\n');
207 }
208 Message::Assistant(a) => {
209 context.push_str("Assistant: ");
210 for block in &a.content {
211 if let ContentBlock::Text { text } = block {
212 context.push_str(text);
213 }
214 }
215 context.push('\n');
216 }
217 _ => {}
218 }
219 }
220
221 format!(
222 "Summarize this conversation concisely, preserving key decisions, \
223 file changes made, and important context. Focus on what the user \
224 was trying to accomplish and what was done.\n\n{context}"
225 )
226}
227
228pub fn max_output_recovery_message() -> Message {
230 Message::User(UserMessage {
231 uuid: Uuid::new_v4(),
232 timestamp: chrono::Utc::now().to_rfc3339(),
233 content: vec![ContentBlock::Text {
234 text: "Output token limit hit. Resume directly — no apology, no recap \
235 of what you were doing. Pick up mid-thought if that is where the \
236 cut happened. Break remaining work into smaller pieces."
237 .to_string(),
238 }],
239 is_meta: true,
240 is_compact_summary: false,
241 })
242}
243
244pub fn parse_prompt_too_long_gap(error_text: &str) -> Option<u64> {
249 let re = regex::Regex::new(r"(\d+)\s*tokens?\s*>\s*(\d+)").ok()?;
250 let captures = re.captures(error_text)?;
251 let actual: u64 = captures.get(1)?.as_str().parse().ok()?;
252 let limit: u64 = captures.get(2)?.as_str().parse().ok()?;
253 let gap = actual.saturating_sub(limit);
254 if gap > 0 { Some(gap) } else { None }
255}
256
257pub async fn compact_with_llm(
268 messages: &mut Vec<Message>,
269 llm: &dyn crate::llm::provider::Provider,
270 model: &str,
271) -> Option<usize> {
272 if messages.len() < 4 {
273 return None; }
275
276 let keep_count = calculate_keep_count(messages);
279 let split_point = messages.len().saturating_sub(keep_count);
280
281 if split_point < 2 {
282 return None; }
284
285 let to_summarize = &messages[..split_point];
286 let summary_prompt = build_compact_summary_prompt(to_summarize);
287
288 let summary_messages = vec![crate::llm::message::user_message(&summary_prompt)];
290 let request = crate::llm::provider::ProviderRequest {
291 messages: summary_messages,
292 system_prompt: "You are a conversation summarizer. Produce a concise summary \
293 preserving key decisions, file changes, and important context. \
294 Do not use tools."
295 .to_string(),
296 tools: vec![],
297 model: model.to_string(),
298 max_tokens: 4096,
299 temperature: None,
300 enable_caching: false,
301 tool_choice: Default::default(),
302 metadata: None,
303 };
304
305 let mut rx = match llm.stream(&request).await {
306 Ok(rx) => rx,
307 Err(e) => {
308 tracing::warn!("Compact LLM call failed: {e}");
309 return None;
310 }
311 };
312
313 let mut summary = String::new();
315 while let Some(event) = rx.recv().await {
316 if let crate::llm::stream::StreamEvent::TextDelta(text) = event {
317 summary.push_str(&text);
318 }
319 }
320
321 if summary.is_empty() {
322 return None;
323 }
324
325 let kept = messages[split_point..].to_vec();
327 let removed = split_point;
328
329 messages.clear();
330 messages.push(compact_boundary_message(&summary));
331 messages.push(Message::User(UserMessage {
332 uuid: Uuid::new_v4(),
333 timestamp: chrono::Utc::now().to_rfc3339(),
334 content: vec![ContentBlock::Text {
335 text: format!("[Conversation compacted. Prior context summary:]\n\n{summary}"),
336 }],
337 is_meta: true,
338 is_compact_summary: true,
339 }));
340 messages.extend(kept);
341
342 tracing::info!("Compacted {removed} messages into summary");
343 Some(removed)
344}
345
346fn calculate_keep_count(messages: &[Message]) -> usize {
351 let min_text_messages = 5;
352 let min_tokens = 10_000u64;
353 let max_tokens = 40_000u64;
354
355 let mut count = 0usize;
356 let mut text_count = 0usize;
357 let mut token_total = 0u64;
358
359 for msg in messages.iter().rev() {
361 let tokens = crate::services::tokens::estimate_message_tokens(msg);
362 token_total += tokens;
363 count += 1;
364
365 let has_text = match msg {
367 Message::User(u) => u
368 .content
369 .iter()
370 .any(|b| matches!(b, ContentBlock::Text { .. })),
371 Message::Assistant(a) => a
372 .content
373 .iter()
374 .any(|b| matches!(b, ContentBlock::Text { .. })),
375 _ => false,
376 };
377 if has_text {
378 text_count += 1;
379 }
380
381 if text_count >= min_text_messages && token_total >= min_tokens {
383 break;
384 }
385 if token_total >= max_tokens {
387 break;
388 }
389 }
390
391 count
392}
393
394#[cfg(test)]
395mod tests {
396 use super::*;
397
398 #[test]
399 fn test_auto_compact_threshold() {
400 let threshold = auto_compact_threshold("claude-sonnet");
403 assert_eq!(threshold, 200_000 - 16_384 - 13_000);
404 }
405
406 #[test]
407 fn test_parse_prompt_too_long_gap() {
408 let msg = "prompt is too long: 137500 tokens > 135000 maximum";
409 assert_eq!(parse_prompt_too_long_gap(msg), Some(2500));
410 }
411
412 #[test]
413 fn test_parse_prompt_too_long_no_match() {
414 assert_eq!(parse_prompt_too_long_gap("some other error"), None);
415 }
416
417 #[test]
418 fn test_effective_context_window() {
419 let eff = effective_context_window("claude-sonnet");
421 assert!(eff > 100_000);
422 assert!(eff < 200_000);
423 }
424
425 #[test]
426 fn test_token_warning_state_empty() {
427 let state = token_warning_state(&[], "claude-sonnet");
428 assert_eq!(state.percent_left, 100);
429 assert!(!state.is_above_warning);
430 assert!(!state.is_blocking);
431 }
432
433 #[test]
434 fn test_should_auto_compact_empty() {
435 let tracking = CompactTracking::default();
436 assert!(!should_auto_compact(&[], "claude-sonnet", &tracking));
437 }
438
439 #[test]
440 fn test_should_auto_compact_circuit_breaker() {
441 let tracking = CompactTracking {
442 consecutive_failures: 5,
443 was_compacted: false,
444 };
445 assert!(!should_auto_compact(&[], "claude-sonnet", &tracking));
447 }
448
449 #[test]
450 fn test_microcompact_empty() {
451 let mut messages = vec![];
452 let freed = microcompact(&mut messages, 2);
453 assert_eq!(freed, 0);
454 }
455
456 #[test]
457 fn test_microcompact_keeps_recent() {
458 use crate::llm::message::*;
459 let mut messages = vec![
461 Message::Assistant(AssistantMessage {
462 uuid: uuid::Uuid::new_v4(),
463 timestamp: String::new(),
464 content: vec![ContentBlock::ToolUse {
465 id: "call_1".into(),
466 name: "FileRead".into(),
467 input: serde_json::json!({}),
468 }],
469 model: None,
470 usage: None,
471 stop_reason: None,
472 request_id: None,
473 }),
474 Message::User(UserMessage {
475 uuid: uuid::Uuid::new_v4(),
476 timestamp: String::new(),
477 content: vec![ContentBlock::ToolResult {
478 tool_use_id: "call_1".into(),
479 content: "file content here".repeat(100),
480 is_error: false,
481 extra_content: vec![],
482 }],
483 is_meta: true,
484 is_compact_summary: false,
485 }),
486 ];
487 let freed = microcompact(&mut messages, 5);
489 assert_eq!(freed, 0);
490 }
491
492 #[test]
493 fn test_compact_boundary_message() {
494 let msg = compact_boundary_message("test summary");
495 if let Message::System(s) = msg {
496 assert_eq!(
497 s.subtype,
498 crate::llm::message::SystemMessageType::CompactBoundary
499 );
500 } else {
501 panic!("Expected system message");
502 }
503 }
504
505 #[test]
506 fn test_max_output_recovery_message() {
507 let msg = max_output_recovery_message();
508 match msg {
509 Message::User(u) => {
510 assert!(!u.content.is_empty());
511 }
512 _ => panic!("Expected user message"),
513 }
514 }
515
516 #[test]
517 fn test_build_compact_summary_prompt() {
518 use crate::llm::message::*;
519 let messages = vec![user_message("hello"), user_message("world")];
520 let prompt = build_compact_summary_prompt(&messages);
521 assert!(prompt.contains("Summarize"));
522 }
523
524 #[test]
525 fn test_effective_context_window_gpt_model() {
526 let eff = effective_context_window("gpt-4o");
527 assert_eq!(eff, 128_000 - 16_384);
529 }
530
531 #[test]
532 fn test_auto_compact_threshold_gpt_model() {
533 let threshold = auto_compact_threshold("gpt-4o");
534 assert_eq!(threshold, 128_000 - 16_384 - 13_000);
535 }
536
537 #[test]
538 fn test_parse_prompt_too_long_gap_with_comma_format() {
539 let msg = "prompt is too long: 137500 tokens > 135000 maximum";
541 assert_eq!(parse_prompt_too_long_gap(msg), Some(2500));
542 }
543
544 #[test]
545 fn test_parse_prompt_too_long_gap_equal_tokens_returns_none() {
546 let msg = "prompt is too long: 135000 tokens > 135000 maximum";
547 assert_eq!(parse_prompt_too_long_gap(msg), None);
549 }
550
551 #[test]
552 fn test_token_warning_state_large_count_should_compact() {
553 use crate::llm::message::*;
554 let big_text = "a".repeat(800_000); let messages = vec![user_message(&big_text)];
557 let state = token_warning_state(&messages, "claude-sonnet");
558 assert!(state.should_compact);
559 }
560
561 #[test]
562 fn test_should_auto_compact_empty_tracking_small_conversation() {
563 let tracking = CompactTracking::default();
564 let messages = vec![crate::llm::message::user_message("tiny")];
565 assert!(!should_auto_compact(&messages, "claude-sonnet", &tracking));
566 }
567
568 #[test]
569 fn test_compact_boundary_message_content_format() {
570 let msg = compact_boundary_message("my summary");
571 if let Message::System(s) = &msg {
572 assert!(s.content.contains("my summary"));
573 assert!(s.content.starts_with("[Conversation compacted."));
574 } else {
575 panic!("Expected System message");
576 }
577 }
578
579 #[test]
580 fn test_build_compact_summary_prompt_includes_user_and_assistant() {
581 use crate::llm::message::*;
582 let messages = vec![
583 user_message("user said this"),
584 Message::Assistant(AssistantMessage {
585 uuid: uuid::Uuid::new_v4(),
586 timestamp: String::new(),
587 content: vec![ContentBlock::Text {
588 text: "assistant said that".into(),
589 }],
590 model: None,
591 usage: None,
592 stop_reason: None,
593 request_id: None,
594 }),
595 ];
596 let prompt = build_compact_summary_prompt(&messages);
597 assert!(prompt.contains("user said this"));
598 assert!(prompt.contains("assistant said that"));
599 assert!(prompt.contains("User:"));
600 assert!(prompt.contains("Assistant:"));
601 }
602
603 #[test]
604 fn test_max_output_recovery_message_is_meta() {
605 let msg = max_output_recovery_message();
606 if let Message::User(u) = &msg {
607 assert!(u.is_meta);
608 } else {
609 panic!("Expected User message");
610 }
611 }
612
613 #[test]
614 fn test_calculate_keep_count_returns_at_least_5_for_large_list() {
615 use crate::llm::message::*;
616 let messages: Vec<Message> = (0..20)
618 .map(|i| user_message(format!("message {i}")))
619 .collect();
620 let keep = calculate_keep_count(&messages);
621 assert!(keep >= 5, "keep_count was {keep}, expected at least 5");
622 }
623}