1use crate::llm::message::{
21 ContentBlock, Message, MessageLevel, SystemMessage, SystemMessageType, UserMessage,
22};
23use crate::services::tokens;
24use uuid::Uuid;
25
26const AUTOCOMPACT_BUFFER_TOKENS: u64 = 13_000;
28
29const MAX_OUTPUT_TOKENS_FOR_SUMMARY: u64 = 20_000;
31
32const MAX_CONSECUTIVE_FAILURES: u32 = 3;
34
35pub const MAX_OUTPUT_TOKENS_RECOVERY_LIMIT: u32 = 3;
37
38const COMPACTABLE_TOOLS: &[&str] = &["FileRead", "Bash", "Grep", "Glob", "FileEdit", "FileWrite"];
40
41#[derive(Debug, Clone)]
43pub struct TokenWarningState {
44 pub percent_left: u64,
46 pub is_above_warning: bool,
48 pub is_above_error: bool,
50 pub should_compact: bool,
52 pub is_blocking: bool,
54}
55
56#[derive(Debug, Clone, Default)]
58pub struct CompactTracking {
59 pub consecutive_failures: u32,
60 pub was_compacted: bool,
61}
62
63pub fn effective_context_window(model: &str) -> u64 {
65 let context = tokens::context_window_for_model(model);
66 let reserved = tokens::max_output_tokens_for_model(model).min(MAX_OUTPUT_TOKENS_FOR_SUMMARY);
67 context.saturating_sub(reserved)
68}
69
70pub fn auto_compact_threshold(model: &str) -> u64 {
72 effective_context_window(model).saturating_sub(AUTOCOMPACT_BUFFER_TOKENS)
73}
74
75pub fn token_warning_state(messages: &[Message], model: &str) -> TokenWarningState {
77 let token_count = tokens::estimate_context_tokens(messages);
78 let threshold = auto_compact_threshold(model);
79 let effective = effective_context_window(model);
80
81 let percent_left = if effective > 0 {
82 ((effective.saturating_sub(token_count)) as f64 / effective as f64 * 100.0)
83 .round()
84 .max(0.0) as u64
85 } else {
86 0
87 };
88
89 let warning_buffer = 20_000;
90
91 TokenWarningState {
92 percent_left,
93 is_above_warning: token_count >= effective.saturating_sub(warning_buffer),
94 is_above_error: token_count >= effective.saturating_sub(warning_buffer),
95 should_compact: token_count >= threshold,
96 is_blocking: token_count >= effective.saturating_sub(3_000),
97 }
98}
99
100pub fn should_auto_compact(messages: &[Message], model: &str, tracking: &CompactTracking) -> bool {
102 if tracking.consecutive_failures >= MAX_CONSECUTIVE_FAILURES {
104 return false;
105 }
106
107 let state = token_warning_state(messages, model);
108 state.should_compact
109}
110
111pub fn microcompact(messages: &mut [Message], keep_recent: usize) -> u64 {
116 let keep_recent = keep_recent.max(1);
117
118 let mut compactable_indices: Vec<(usize, usize)> = Vec::new(); for (msg_idx, msg) in messages.iter().enumerate() {
122 if let Message::User(u) = msg {
123 for (block_idx, block) in u.content.iter().enumerate() {
124 if let ContentBlock::ToolResult { tool_use_id, .. } = block {
125 if is_compactable_tool_result(messages, tool_use_id) {
127 compactable_indices.push((msg_idx, block_idx));
128 }
129 }
130 }
131 }
132 }
133
134 if compactable_indices.len() <= keep_recent {
135 return 0;
136 }
137
138 let clear_count = compactable_indices.len() - keep_recent;
140 let to_clear = &compactable_indices[..clear_count];
141
142 let mut freed_tokens = 0u64;
143
144 for &(msg_idx, block_idx) in to_clear {
145 if let Message::User(ref mut u) = messages[msg_idx]
146 && let ContentBlock::ToolResult {
147 ref mut content,
148 tool_use_id: _,
149 is_error: _,
150 ..
151 } = u.content[block_idx]
152 {
153 let old_tokens = tokens::estimate_tokens(content);
154 let placeholder = "[Old tool result cleared]".to_string();
155 let new_tokens = tokens::estimate_tokens(&placeholder);
156 *content = placeholder;
157 freed_tokens += old_tokens.saturating_sub(new_tokens);
158 }
159 }
160
161 freed_tokens
162}
163
164fn is_compactable_tool_result(messages: &[Message], tool_use_id: &str) -> bool {
166 for msg in messages {
167 if let Message::Assistant(a) = msg {
168 for block in &a.content {
169 if let ContentBlock::ToolUse { id, name, .. } = block
170 && id == tool_use_id
171 {
172 return COMPACTABLE_TOOLS
173 .iter()
174 .any(|t| t.eq_ignore_ascii_case(name));
175 }
176 }
177 }
178 }
179 false
180}
181
182pub fn compact_boundary_message(summary: &str) -> Message {
184 Message::System(SystemMessage {
185 uuid: Uuid::new_v4(),
186 timestamp: chrono::Utc::now().to_rfc3339(),
187 subtype: SystemMessageType::CompactBoundary,
188 content: format!("[Conversation compacted. Summary: {summary}]"),
189 level: MessageLevel::Info,
190 })
191}
192
193pub fn build_compact_summary_prompt(messages: &[Message]) -> String {
196 let mut context = String::new();
197 for msg in messages {
198 match msg {
199 Message::User(u) => {
200 context.push_str("User: ");
201 for block in &u.content {
202 if let ContentBlock::Text { text } = block {
203 context.push_str(text);
204 }
205 }
206 context.push('\n');
207 }
208 Message::Assistant(a) => {
209 context.push_str("Assistant: ");
210 for block in &a.content {
211 if let ContentBlock::Text { text } = block {
212 context.push_str(text);
213 }
214 }
215 context.push('\n');
216 }
217 _ => {}
218 }
219 }
220
221 format!(
222 "Summarize this conversation concisely, preserving key decisions, \
223 file changes made, and important context. Focus on what the user \
224 was trying to accomplish and what was done.\n\n{context}"
225 )
226}
227
228pub fn max_output_recovery_message() -> Message {
230 Message::User(UserMessage {
231 uuid: Uuid::new_v4(),
232 timestamp: chrono::Utc::now().to_rfc3339(),
233 content: vec![ContentBlock::Text {
234 text: "Output token limit hit. Resume directly — no apology, no recap \
235 of what you were doing. Pick up mid-thought if that is where the \
236 cut happened. Break remaining work into smaller pieces."
237 .to_string(),
238 }],
239 is_meta: true,
240 is_compact_summary: false,
241 })
242}
243
244pub fn parse_prompt_too_long_gap(error_text: &str) -> Option<u64> {
249 let re = regex::Regex::new(r"(\d+)\s*tokens?\s*>\s*(\d+)").ok()?;
250 let captures = re.captures(error_text)?;
251 let actual: u64 = captures.get(1)?.as_str().parse().ok()?;
252 let limit: u64 = captures.get(2)?.as_str().parse().ok()?;
253 let gap = actual.saturating_sub(limit);
254 if gap > 0 { Some(gap) } else { None }
255}
256
257pub async fn compact_with_llm(
268 messages: &mut Vec<Message>,
269 llm: &dyn crate::llm::provider::Provider,
270 model: &str,
271 cancel: tokio_util::sync::CancellationToken,
272) -> Option<usize> {
273 if messages.len() < 4 {
274 return None; }
276
277 let keep_count = calculate_keep_count(messages);
280 let split_point = messages.len().saturating_sub(keep_count);
281
282 if split_point < 2 {
283 return None; }
285
286 let to_summarize = &messages[..split_point];
287 let summary_prompt = build_compact_summary_prompt(to_summarize);
288
289 let summary_messages = vec![crate::llm::message::user_message(&summary_prompt)];
291 let request = crate::llm::provider::ProviderRequest {
292 messages: summary_messages,
293 system_prompt: "You are a conversation summarizer. Produce a concise summary \
294 preserving key decisions, file changes, and important context. \
295 Do not use tools."
296 .to_string(),
297 tools: vec![],
298 model: model.to_string(),
299 max_tokens: 4096,
300 temperature: None,
301 enable_caching: false,
302 tool_choice: Default::default(),
303 metadata: None,
304 cancel,
305 };
306
307 let mut rx = match llm.stream(&request).await {
308 Ok(rx) => rx,
309 Err(e) => {
310 tracing::warn!("Compact LLM call failed: {e}");
311 return None;
312 }
313 };
314
315 let mut summary = String::new();
317 while let Some(event) = rx.recv().await {
318 if let crate::llm::stream::StreamEvent::TextDelta(text) = event {
319 summary.push_str(&text);
320 }
321 }
322
323 if summary.is_empty() {
324 return None;
325 }
326
327 let kept = messages[split_point..].to_vec();
329 let removed = split_point;
330
331 messages.clear();
332 messages.push(compact_boundary_message(&summary));
333 messages.push(Message::User(UserMessage {
334 uuid: Uuid::new_v4(),
335 timestamp: chrono::Utc::now().to_rfc3339(),
336 content: vec![ContentBlock::Text {
337 text: format!("[Conversation compacted. Prior context summary:]\n\n{summary}"),
338 }],
339 is_meta: true,
340 is_compact_summary: true,
341 }));
342 messages.extend(kept);
343
344 tracing::info!("Compacted {removed} messages into summary");
345 Some(removed)
346}
347
348fn calculate_keep_count(messages: &[Message]) -> usize {
353 let min_text_messages = 5;
354 let min_tokens = 10_000u64;
355 let max_tokens = 40_000u64;
356
357 let mut count = 0usize;
358 let mut text_count = 0usize;
359 let mut token_total = 0u64;
360
361 for msg in messages.iter().rev() {
363 let tokens = crate::services::tokens::estimate_message_tokens(msg);
364 token_total += tokens;
365 count += 1;
366
367 let has_text = match msg {
369 Message::User(u) => u
370 .content
371 .iter()
372 .any(|b| matches!(b, ContentBlock::Text { .. })),
373 Message::Assistant(a) => a
374 .content
375 .iter()
376 .any(|b| matches!(b, ContentBlock::Text { .. })),
377 _ => false,
378 };
379 if has_text {
380 text_count += 1;
381 }
382
383 if text_count >= min_text_messages && token_total >= min_tokens {
385 break;
386 }
387 if token_total >= max_tokens {
389 break;
390 }
391 }
392
393 count
394}
395
396#[cfg(test)]
397mod tests {
398 use super::*;
399
400 #[test]
401 fn test_auto_compact_threshold() {
402 let threshold = auto_compact_threshold("claude-sonnet");
405 assert_eq!(threshold, 200_000 - 16_384 - 13_000);
406 }
407
408 #[test]
409 fn test_parse_prompt_too_long_gap() {
410 let msg = "prompt is too long: 137500 tokens > 135000 maximum";
411 assert_eq!(parse_prompt_too_long_gap(msg), Some(2500));
412 }
413
414 #[test]
415 fn test_parse_prompt_too_long_no_match() {
416 assert_eq!(parse_prompt_too_long_gap("some other error"), None);
417 }
418
419 #[test]
420 fn test_effective_context_window() {
421 let eff = effective_context_window("claude-sonnet");
423 assert!(eff > 100_000);
424 assert!(eff < 200_000);
425 }
426
427 #[test]
428 fn test_token_warning_state_empty() {
429 let state = token_warning_state(&[], "claude-sonnet");
430 assert_eq!(state.percent_left, 100);
431 assert!(!state.is_above_warning);
432 assert!(!state.is_blocking);
433 }
434
435 #[test]
436 fn test_should_auto_compact_empty() {
437 let tracking = CompactTracking::default();
438 assert!(!should_auto_compact(&[], "claude-sonnet", &tracking));
439 }
440
441 #[test]
442 fn test_should_auto_compact_circuit_breaker() {
443 let tracking = CompactTracking {
444 consecutive_failures: 5,
445 was_compacted: false,
446 };
447 assert!(!should_auto_compact(&[], "claude-sonnet", &tracking));
449 }
450
451 #[test]
452 fn test_microcompact_empty() {
453 let mut messages = vec![];
454 let freed = microcompact(&mut messages, 2);
455 assert_eq!(freed, 0);
456 }
457
458 #[test]
459 fn test_microcompact_keeps_recent() {
460 use crate::llm::message::*;
461 let mut messages = vec![
463 Message::Assistant(AssistantMessage {
464 uuid: uuid::Uuid::new_v4(),
465 timestamp: String::new(),
466 content: vec![ContentBlock::ToolUse {
467 id: "call_1".into(),
468 name: "FileRead".into(),
469 input: serde_json::json!({}),
470 }],
471 model: None,
472 usage: None,
473 stop_reason: None,
474 request_id: None,
475 }),
476 Message::User(UserMessage {
477 uuid: uuid::Uuid::new_v4(),
478 timestamp: String::new(),
479 content: vec![ContentBlock::ToolResult {
480 tool_use_id: "call_1".into(),
481 content: "file content here".repeat(100),
482 is_error: false,
483 extra_content: vec![],
484 }],
485 is_meta: true,
486 is_compact_summary: false,
487 }),
488 ];
489 let freed = microcompact(&mut messages, 5);
491 assert_eq!(freed, 0);
492 }
493
494 #[test]
495 fn test_compact_boundary_message() {
496 let msg = compact_boundary_message("test summary");
497 if let Message::System(s) = msg {
498 assert_eq!(
499 s.subtype,
500 crate::llm::message::SystemMessageType::CompactBoundary
501 );
502 } else {
503 panic!("Expected system message");
504 }
505 }
506
507 #[test]
508 fn test_max_output_recovery_message() {
509 let msg = max_output_recovery_message();
510 match msg {
511 Message::User(u) => {
512 assert!(!u.content.is_empty());
513 }
514 _ => panic!("Expected user message"),
515 }
516 }
517
518 #[test]
519 fn test_build_compact_summary_prompt() {
520 use crate::llm::message::*;
521 let messages = vec![user_message("hello"), user_message("world")];
522 let prompt = build_compact_summary_prompt(&messages);
523 assert!(prompt.contains("Summarize"));
524 }
525
526 #[test]
527 fn test_effective_context_window_gpt_model() {
528 let eff = effective_context_window("gpt-4o");
529 assert_eq!(eff, 128_000 - 16_384);
531 }
532
533 #[test]
534 fn test_auto_compact_threshold_gpt_model() {
535 let threshold = auto_compact_threshold("gpt-4o");
536 assert_eq!(threshold, 128_000 - 16_384 - 13_000);
537 }
538
539 #[test]
540 fn test_parse_prompt_too_long_gap_with_comma_format() {
541 let msg = "prompt is too long: 137500 tokens > 135000 maximum";
543 assert_eq!(parse_prompt_too_long_gap(msg), Some(2500));
544 }
545
546 #[test]
547 fn test_parse_prompt_too_long_gap_equal_tokens_returns_none() {
548 let msg = "prompt is too long: 135000 tokens > 135000 maximum";
549 assert_eq!(parse_prompt_too_long_gap(msg), None);
551 }
552
553 #[test]
554 fn test_token_warning_state_large_count_should_compact() {
555 use crate::llm::message::*;
556 let big_text = "a".repeat(800_000); let messages = vec![user_message(&big_text)];
559 let state = token_warning_state(&messages, "claude-sonnet");
560 assert!(state.should_compact);
561 }
562
563 #[test]
564 fn test_should_auto_compact_empty_tracking_small_conversation() {
565 let tracking = CompactTracking::default();
566 let messages = vec![crate::llm::message::user_message("tiny")];
567 assert!(!should_auto_compact(&messages, "claude-sonnet", &tracking));
568 }
569
570 #[test]
571 fn test_compact_boundary_message_content_format() {
572 let msg = compact_boundary_message("my summary");
573 if let Message::System(s) = &msg {
574 assert!(s.content.contains("my summary"));
575 assert!(s.content.starts_with("[Conversation compacted."));
576 } else {
577 panic!("Expected System message");
578 }
579 }
580
581 #[test]
582 fn test_build_compact_summary_prompt_includes_user_and_assistant() {
583 use crate::llm::message::*;
584 let messages = vec![
585 user_message("user said this"),
586 Message::Assistant(AssistantMessage {
587 uuid: uuid::Uuid::new_v4(),
588 timestamp: String::new(),
589 content: vec![ContentBlock::Text {
590 text: "assistant said that".into(),
591 }],
592 model: None,
593 usage: None,
594 stop_reason: None,
595 request_id: None,
596 }),
597 ];
598 let prompt = build_compact_summary_prompt(&messages);
599 assert!(prompt.contains("user said this"));
600 assert!(prompt.contains("assistant said that"));
601 assert!(prompt.contains("User:"));
602 assert!(prompt.contains("Assistant:"));
603 }
604
605 #[test]
606 fn test_max_output_recovery_message_is_meta() {
607 let msg = max_output_recovery_message();
608 if let Message::User(u) = &msg {
609 assert!(u.is_meta);
610 } else {
611 panic!("Expected User message");
612 }
613 }
614
615 #[test]
616 fn test_calculate_keep_count_returns_at_least_5_for_large_list() {
617 use crate::llm::message::*;
618 let messages: Vec<Message> = (0..20)
620 .map(|i| user_message(format!("message {i}")))
621 .collect();
622 let keep = calculate_keep_count(&messages);
623 assert!(keep >= 5, "keep_count was {keep}, expected at least 5");
624 }
625}