1use crate::constants::env::{ai, ai_code};
8use crate::services::token_estimation::{
9 rough_token_count_estimation, rough_token_count_estimation_for_message,
10};
11use crate::types::*;
12
13pub const DEFAULT_CONTEXT_WINDOW: u32 = 200_000;
15
16pub fn get_default_context_window() -> u32 {
18 if let Ok(override_val) = std::env::var(ai::CONTEXT_WINDOW) {
19 if let Ok(parsed) = override_val.parse::<u32>() {
20 if parsed > 0 {
21 return parsed;
22 }
23 }
24 }
25 DEFAULT_CONTEXT_WINDOW
26}
27
28pub fn get_compact_prompt() -> String {
31 r#"CRITICAL: Respond with TEXT ONLY. Do NOT call any tools.
32
33- Do NOT use Read, Bash, Grep, Glob, Edit, Write, or ANY other tool.
34- You already have all the context you need in the conversation above.
35- Tool calls will be REJECTED and will waste your only turn — you will fail the task.
36- Your entire response must be plain text: an <analysis> block followed by a <summary> block.
37
38Your task is to create a detailed summary of the conversation so far, paying close attention to the user's explicit requests and your previous actions.
39This summary should be thorough in capturing technical details, code patterns, and architectural decisions that would be essential for continuing development work without losing context.
40
41Before providing your final summary, wrap your analysis in <analysis> tags to organize your thoughts and ensure you've covered all necessary points. In your analysis process:
42
431. Chronologically analyze each message and section of the conversation. For each section thoroughly identify:
44 - The user's explicit requests and intents
45 - Your approach to addressing the user's requests
46 - Key decisions, technical concepts and code patterns
47 - Specific details like:
48 - file names
49 - full code snippets
50 - function signatures
51 - file edits
52 - Errors that you ran into and how you fixed them
53 - Pay special attention to specific user feedback that you received, especially if the user told you to do something differently.
542. Double-check for technical accuracy and completeness, addressing each required element thoroughly.
55
56Your summary should include the following sections:
57
581. Primary Request and Intent: Capture all of the user's explicit requests and intents in detail
592. Key Technical Concepts: List all important technical concepts, technologies, and frameworks discussed.
603. Files and Code Sections: Enumerate specific files and code sections examined, modified, or created. Pay special attention to the most recent messages and include full code snippets where applicable and include a summary of why this file read or edit is important.
614. Errors and fixes: List all errors that you ran into, and how you fixed them. Pay special attention to specific user feedback that you received, especially if the user told you to do something differently.
625. Problem Solving: Document problems solved and any ongoing troubleshooting efforts.
636. All user messages: List ALL user messages that are not tool results. These are critical for understanding the users' feedback and changing intent.
647. Pending Tasks: Outline any pending tasks that you have explicitly been asked to work on.
658. Current Work: Describe in detail precisely what was being worked on immediately before this summary request, paying special attention to the most recent messages from both user and assistant. Include file names and code snippets where applicable.
669. Context for Continuing Work: Key context, decisions, or state needed to continue the work.
67
68IMPORTANT: Be extremely thorough — include ALL important technical details, code patterns, and architectural decisions. This summary must provide enough context for the next turn to continue seamlessly.
69
70REMINDER: Do NOT call any tools. Respond with plain text only — an <analysis> block followed by a <summary> block. Tool calls will be rejected and you will fail the task.
71"#.to_string()
72}
73
74pub const MAX_OUTPUT_TOKENS_FOR_SUMMARY: u32 = 20_000;
77
78pub const AUTOCOMPACT_BUFFER_TOKENS: u32 = 13_000;
80
81pub const WARNING_THRESHOLD_BUFFER_TOKENS: u32 = 20_000;
83
84pub const ERROR_THRESHOLD_BUFFER_TOKENS: u32 = 20_000;
86
87pub fn get_blocking_limit(model: &str) -> u32 {
89 let effective_window = get_effective_context_window_size(model);
90 let default_blocking_limit = effective_window.saturating_sub(MANUAL_COMPACT_BUFFER_TOKENS);
91
92 if let Ok(override_val) = std::env::var(ai::BLOCKING_LIMIT_OVERRIDE) {
94 if let Ok(parsed) = override_val.parse::<u32>() {
95 if parsed > 0 {
96 return parsed;
97 }
98 }
99 }
100
101 default_blocking_limit
102}
103
104pub const MANUAL_COMPACT_BUFFER_TOKENS: u32 = 3_000;
106
107pub const MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES: u32 = 3;
109
110pub const POST_COMPACT_MAX_FILES_TO_RESTORE: u32 = 5;
112
113pub const POST_COMPACT_TOKEN_BUDGET: u32 = 50_000;
115
116pub const POST_COMPACT_MAX_TOKENS_PER_FILE: u32 = 5_000;
118
119pub const POST_COMPACT_MAX_TOKENS_PER_SKILL: u32 = 5_000;
121
122pub const POST_COMPACT_SKILLS_TOKEN_BUDGET: u32 = 25_000;
124
125pub fn get_effective_context_window_size(model: &str) -> u32 {
127 let context_window = get_context_window_for_model(model);
128 context_window.saturating_sub(MAX_OUTPUT_TOKENS_FOR_SUMMARY)
129}
130
131pub fn get_context_window_for_model(model: &str) -> u32 {
133 if let Ok(override_val) = std::env::var(ai::AUTO_COMPACT_WINDOW) {
135 if let Ok(parsed) = override_val.parse::<u32>() {
136 if parsed > 0 {
137 return parsed;
138 }
139 }
140 }
141
142 let lower = model.to_lowercase();
144 if lower.contains("sonnet") {
145 get_default_context_window()
147 } else if lower.contains("haiku") {
148 get_default_context_window()
150 } else if lower.contains("opus") {
151 get_default_context_window()
153 } else {
154 get_default_context_window()
155 }
156}
157
158pub fn get_auto_compact_threshold(model: &str) -> u32 {
160 let effective_window = get_effective_context_window_size(model);
161
162 let autocompact_threshold = effective_window.saturating_sub(AUTOCOMPACT_BUFFER_TOKENS);
163
164 if let Ok(env_percent) = std::env::var(ai::AUTOCOMPACT_PCT_OVERRIDE) {
166 if let Ok(parsed) = env_percent.parse::<f64>() {
167 if parsed > 0.0 && parsed <= 100.0 {
168 let percentage_threshold =
169 ((effective_window as f64 * (parsed / 100.0)) as u32).min(effective_window);
170 return percentage_threshold.min(autocompact_threshold);
171 }
172 }
173 }
174
175 autocompact_threshold
176}
177
178#[derive(Debug, Clone)]
181pub struct TokenWarningState {
182 pub percent_left: f64,
183 pub is_above_warning_threshold: bool,
184 pub is_above_error_threshold: bool,
185 pub is_above_auto_compact_threshold: bool,
186 pub is_at_blocking_limit: bool,
187}
188
189pub fn calculate_token_warning_state(token_usage: u32, model: &str) -> TokenWarningState {
190 let auto_compact_threshold = get_auto_compact_threshold(model);
191 let effective_window = get_effective_context_window_size(model);
192
193 let threshold = if is_auto_compact_enabled_for_calculation() {
195 auto_compact_threshold
196 } else {
197 effective_window
198 };
199
200 let percent_left = if threshold > 0 {
201 ((threshold.saturating_sub(token_usage) as f64 / threshold as f64) * 100.0).max(0.0)
202 } else {
203 100.0
204 };
205
206 let warning_threshold = threshold.saturating_sub(WARNING_THRESHOLD_BUFFER_TOKENS);
207 let error_threshold = threshold.saturating_sub(ERROR_THRESHOLD_BUFFER_TOKENS);
208
209 let is_above_warning_threshold = token_usage >= warning_threshold;
210 let is_above_error_threshold = token_usage >= error_threshold;
211 let is_above_auto_compact_threshold =
212 is_auto_compact_enabled_for_calculation() && token_usage >= auto_compact_threshold;
213
214 let default_blocking_limit = effective_window.saturating_sub(MANUAL_COMPACT_BUFFER_TOKENS);
216
217 let blocking_limit = if let Ok(override_val) = std::env::var(ai_code::BLOCKING_LIMIT_OVERRIDE) {
219 if let Ok(parsed) = override_val.parse::<u32>() {
220 if parsed > 0 {
221 parsed
222 } else {
223 default_blocking_limit
224 }
225 } else {
226 default_blocking_limit
227 }
228 } else {
229 default_blocking_limit
230 };
231
232 let is_at_blocking_limit = token_usage >= blocking_limit;
233
234 TokenWarningState {
235 percent_left,
236 is_above_warning_threshold,
237 is_above_error_threshold,
238 is_above_auto_compact_threshold,
239 is_at_blocking_limit,
240 }
241}
242
243fn is_auto_compact_enabled_for_calculation() -> bool {
246 use crate::utils::env_utils::is_env_truthy;
247
248 if is_env_truthy(Some("DISABLE_COMPACT")) {
249 return false;
250 }
251 if is_env_truthy(Some("DISABLE_AUTO_COMPACT")) {
252 return false;
253 }
254 true
257}
258
259#[derive(Debug, Clone)]
261pub struct CompactionResult {
262 pub boundary_marker: Message,
264 pub summary_messages: Vec<Message>,
266 pub messages_to_keep: Option<Vec<Message>>,
268 pub attachments: Vec<Message>,
270 pub pre_compact_token_count: u32,
272 pub post_compact_token_count: u32,
274}
275
276pub fn strip_images_from_messages(messages: &[Message]) -> Vec<Message> {
279 messages
280 .iter()
281 .map(|msg| {
282 if let Message {
283 role: MessageRole::User,
284 content: _,
285 ..
286 } = msg
287 {
288 msg.clone()
291 } else {
292 msg.clone()
293 }
294 })
295 .collect()
296}
297
298pub fn estimate_token_count(messages: &[Message], max_output_tokens: u32) -> u32 {
303 let non_tool_chars: usize = messages
305 .iter()
306 .filter(|msg| msg.role != MessageRole::Tool)
307 .map(|msg| msg.content.len())
308 .sum();
309
310 let tool_result_chars: usize = messages
313 .iter()
314 .filter(|msg| msg.role == MessageRole::Tool)
315 .map(|msg| msg.content.len())
316 .sum();
317
318 let base_estimate = (non_tool_chars / 4) as u32;
319 let tool_buffer = (tool_result_chars / 2) as u32; base_estimate + tool_buffer + max_output_tokens
323}
324
325pub fn should_compact(token_usage: u32, model: &str) -> bool {
327 let state = calculate_token_warning_state(token_usage, model);
328 state.is_above_auto_compact_threshold
329}
330
331pub fn truncate_messages_for_summary(
336 messages: &[Message],
337 model: &str,
338 max_output_tokens: u32,
339) -> (Vec<Message>, u32) {
340 let context_window = get_context_window_for_model(model);
341 let safe_limit = ((context_window.saturating_sub(max_output_tokens)) as f64 * 0.50) as u32;
343
344 let total_messages = messages.len();
345 if total_messages == 0 {
346 return (vec![], 0);
347 }
348
349 let non_system_messages: Vec<Message> = messages
352 .iter()
353 .filter(|m| m.role != MessageRole::System)
354 .cloned()
355 .collect();
356
357 let mut current_tokens = 0u32;
359 let mut history_messages = Vec::new();
360
361 for msg in non_system_messages.iter().rev() {
362 let msg_tokens = rough_token_count_estimation_for_message(msg) as u32;
363 if current_tokens + msg_tokens > safe_limit {
364 break;
365 }
366 current_tokens += msg_tokens;
367 history_messages.insert(0, msg.clone());
368 }
369
370 if history_messages.is_empty() && !non_system_messages.is_empty() {
372 let last_msg = non_system_messages.last().unwrap();
374 let max_chars = (safe_limit as usize) * 4;
375 let chars_to_keep = last_msg.content.len().min(max_chars);
376 let truncated_content = last_msg
377 .content
378 .chars()
379 .take(chars_to_keep)
380 .collect::<String>();
381
382 current_tokens = rough_token_count_estimation(&truncated_content, 4.0) as u32;
383
384 history_messages = vec![Message {
385 role: last_msg.role.clone(),
386 content: truncated_content,
387 ..Default::default()
388 }];
389 }
390
391 let total_estimated = current_tokens;
392
393 (history_messages, total_estimated)
394}
395
396#[cfg(test)]
397mod tests {
398 use super::*;
399
400 #[test]
401 fn test_effective_context_window() {
402 let window = get_effective_context_window_size("claude-sonnet-4-6");
403 assert_eq!(window, 180_000);
405 }
406
407 #[test]
408 fn test_auto_compact_threshold() {
409 let threshold = get_auto_compact_threshold("claude-sonnet-4-6");
410 assert_eq!(threshold, 167_000);
412 }
413
414 #[test]
415 fn test_token_warning_state_normal() {
416 let state = calculate_token_warning_state(50_000, "claude-sonnet-4-6");
417 assert!(!state.is_above_warning_threshold);
418 assert!(!state.is_above_error_threshold);
419 assert!(!state.is_above_auto_compact_threshold);
420 assert!(state.percent_left > 50.0);
421 }
422
423 #[test]
424 fn test_token_warning_state_warning() {
425 let state = calculate_token_warning_state(165_000, "claude-sonnet-4-6");
427 assert!(state.is_above_warning_threshold);
428 assert!(state.is_above_error_threshold);
430 assert!(!state.is_above_auto_compact_threshold);
431 }
432
433 #[test]
434 fn test_token_warning_state_compact() {
435 let state = calculate_token_warning_state(170_000, "claude-sonnet-4-6");
436 assert!(state.is_above_warning_threshold);
437 assert!(state.is_above_auto_compact_threshold);
438 }
439
440 #[test]
441 fn test_should_compact() {
442 assert!(!should_compact(50_000, "claude-sonnet-4-6"));
443 assert!(should_compact(170_000, "claude-sonnet-4-6"));
444 }
445
446 #[test]
447 fn test_estimate_token_count() {
448 let messages = vec![
449 Message {
450 role: MessageRole::User,
451 content: "Hello, this is a test message".to_string(),
452 ..Default::default()
453 },
454 Message {
455 role: MessageRole::Assistant,
456 content: "Hi! How can I help you today?".to_string(),
457 ..Default::default()
458 },
459 ];
460
461 let count = estimate_token_count(&messages, 0);
462 assert!(count > 0);
464 }
465}
466
467fn is_env_truthy(env_var: &str) -> bool {
476 if env_var.is_empty() {
477 return false;
478 }
479 let binding = env_var.to_lowercase();
480 let normalized = binding.trim();
481 matches!(normalized, "1" | "true" | "yes" | "on")
482}
483
484#[derive(Debug, Clone)]
486pub struct CompactCommand {
487 pub command_type: String,
489 pub name: String,
491 pub description: String,
493 pub is_enabled: fn() -> bool,
495 pub supports_non_interactive: bool,
497 pub argument_hint: String,
499}
500
501impl Default for CompactCommand {
502 fn default() -> Self {
503 Self::new()
504 }
505}
506
507impl CompactCommand {
508 pub fn new() -> Self {
510 Self {
511 command_type: "local".to_string(),
512 name: "compact".to_string(),
513 description: "Clear conversation history but keep a summary in context. Optional: /compact [instructions for summarization]".to_string(),
514 is_enabled: || !is_env_truthy("AI_DISABLE_COMPACT"),
515 supports_non_interactive: true,
516 argument_hint: "<optional custom summarization instructions>".to_string(),
517 }
518 }
519
520 pub fn is_enabled(&self) -> bool {
522 (self.is_enabled)()
523 }
524}
525
526pub fn get_compact_command() -> CompactCommand {
528 CompactCommand::new()
529}
530
531pub mod compact_errors {
533 pub const ERROR_MESSAGE_INCOMPLETE_RESPONSE: &str =
535 "Incomplete response from model during compaction";
536 pub const ERROR_MESSAGE_NOT_ENOUGH_MESSAGES: &str = "Not enough messages to compact";
538 pub const ERROR_MESSAGE_USER_ABORT: &str = "User aborted compaction";
540}