Skip to main content

agent_code_lib/services/
compact.rs

1//! History compaction.
2//!
3//! Manages conversation history size by summarizing older messages
4//! when the context window limit approaches. Implements three
5//! compaction strategies:
6//!
7//! - **Auto-compact**: triggered when estimated tokens exceed threshold
8//! - **Reactive compact**: triggered by API `prompt_too_long` errors
9//! - **Microcompact**: clears stale tool results to free tokens
10//!
11//! # Thresholds
12//!
13//! ```text
14//! |<--- context window (e.g., 200K) -------------------------------->|
15//! |<--- effective window (context - 20K reserved) ------------------>|
16//! |<--- auto-compact threshold (effective - 13K buffer) ------------>|
17//! |                                                    ↑ compact fires here
18//! ```
19
20use crate::llm::message::{
21    ContentBlock, Message, MessageLevel, SystemMessage, SystemMessageType, UserMessage,
22};
23use crate::services::{secret_masker, tokens};
24use serde::{Deserialize, Serialize};
25use sha2::{Digest, Sha256};
26use std::collections::HashMap;
27use std::path::{Path, PathBuf};
28use uuid::Uuid;
29
30/// Number of recent turns during which file reads are locked at `Full`
31/// fidelity and cannot be compressed by the summarizer.
32pub const PROTECTED_TURN_WINDOW: usize = 2;
33
34/// Fidelity of a file's representation in conversation history.
35#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
36#[serde(rename_all = "snake_case")]
37pub enum CompressionLevel {
38    /// Complete file contents in context (recently read).
39    Full,
40    /// Key sections only — functions referenced, changed lines.
41    Partial,
42    /// LLM-generated 2-3 sentence summary of the file's role.
43    Summary,
44    /// File removed from context entirely.
45    Excluded,
46}
47
48/// Per-file tracking record used by the history compressor.
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct FileCompressionRecord {
51    pub path: PathBuf,
52    pub level: CompressionLevel,
53    /// 12-byte SHA256 slice — short enough to be cheap, long enough
54    /// to detect any real content change.
55    #[serde(with = "hex_hash")]
56    pub content_hash: [u8; 12],
57    /// Line range retained at `Partial` level, if any.
58    pub line_range: Option<(usize, usize)>,
59    /// Turn index where this file was last referenced by any tool.
60    pub last_referenced_turn: usize,
61}
62
63impl FileCompressionRecord {
64    /// True while the file is within the protected turn window.
65    pub fn is_protected(&self, current_turn: usize) -> bool {
66        current_turn.saturating_sub(self.last_referenced_turn) < PROTECTED_TURN_WINDOW
67    }
68}
69
70/// Set of file compression records tracked for a session.
71#[derive(Debug, Clone, Default, Serialize, Deserialize)]
72pub struct FileCompressionState {
73    pub files: HashMap<PathBuf, FileCompressionRecord>,
74}
75
76impl FileCompressionState {
77    pub fn new() -> Self {
78        Self::default()
79    }
80
81    /// Record a file read at the given turn, setting level to `Full`.
82    /// If the content has changed since the last record, the level is
83    /// reset to `Full` (stale summaries must be discarded). If unchanged,
84    /// the existing level is preserved but the turn marker is updated.
85    pub fn record_read(&mut self, path: &Path, content: &str, turn: usize) {
86        let hash = hash_content(content);
87        match self.files.get_mut(path) {
88            Some(existing) => {
89                if existing.content_hash != hash {
90                    existing.content_hash = hash;
91                    existing.level = CompressionLevel::Full;
92                    existing.line_range = None;
93                }
94                existing.last_referenced_turn = turn;
95            }
96            None => {
97                self.files.insert(
98                    path.to_path_buf(),
99                    FileCompressionRecord {
100                        path: path.to_path_buf(),
101                        level: CompressionLevel::Full,
102                        content_hash: hash,
103                        line_range: None,
104                        last_referenced_turn: turn,
105                    },
106                );
107            }
108        }
109    }
110
111    /// Demote a file's compression level, unless it is currently protected.
112    pub fn demote(&mut self, path: &Path, level: CompressionLevel, current_turn: usize) -> bool {
113        if let Some(rec) = self.files.get_mut(path) {
114            if rec.is_protected(current_turn) {
115                return false;
116            }
117            rec.level = level;
118            return true;
119        }
120        false
121    }
122
123    /// Persist the state to the standard compression_state.json path
124    /// next to a session file.
125    pub fn save(&self, session_id: &str) -> Result<PathBuf, String> {
126        let path = compression_state_path(session_id)
127            .ok_or_else(|| "Could not determine cache dir".to_string())?;
128        if let Some(parent) = path.parent() {
129            std::fs::create_dir_all(parent)
130                .map_err(|e| format!("create compression state dir: {e}"))?;
131        }
132        let json = serde_json::to_string_pretty(self)
133            .map_err(|e| format!("serialize compression state: {e}"))?;
134        std::fs::write(&path, json).map_err(|e| format!("write compression state: {e}"))?;
135        Ok(path)
136    }
137
138    /// Load state from disk for a session id. Returns `None` if no
139    /// state file exists yet (fresh session).
140    pub fn load(session_id: &str) -> Option<Self> {
141        let path = compression_state_path(session_id)?;
142        if !path.exists() {
143            return None;
144        }
145        let content = std::fs::read_to_string(&path).ok()?;
146        serde_json::from_str(&content).ok()
147    }
148}
149
150/// Compute a 12-byte SHA256 slice of `content` for change detection.
151pub fn hash_content(content: &str) -> [u8; 12] {
152    let digest = Sha256::digest(content.as_bytes());
153    let mut out = [0u8; 12];
154    out.copy_from_slice(&digest[..12]);
155    out
156}
157
158/// Path to the compression state sidecar file for a session.
159fn compression_state_path(session_id: &str) -> Option<PathBuf> {
160    dirs::cache_dir().map(|d| {
161        d.join("agent-code")
162            .join("sessions")
163            .join(format!("{session_id}.compression.json"))
164    })
165}
166
167/// Serde helper: store `[u8; 12]` as a 24-char lowercase hex string.
168mod hex_hash {
169    use serde::{Deserialize, Deserializer, Serializer};
170
171    pub fn serialize<S: Serializer>(bytes: &[u8; 12], ser: S) -> Result<S::Ok, S::Error> {
172        let hex: String = bytes.iter().map(|b| format!("{b:02x}")).collect();
173        ser.serialize_str(&hex)
174    }
175
176    pub fn deserialize<'de, D: Deserializer<'de>>(de: D) -> Result<[u8; 12], D::Error> {
177        let s = String::deserialize(de)?;
178        if s.len() != 24 {
179            return Err(serde::de::Error::custom("expected 24 hex chars"));
180        }
181        let mut out = [0u8; 12];
182        for (i, chunk) in s.as_bytes().chunks(2).enumerate() {
183            let byte = u8::from_str_radix(std::str::from_utf8(chunk).unwrap_or(""), 16)
184                .map_err(serde::de::Error::custom)?;
185            out[i] = byte;
186        }
187        Ok(out)
188    }
189}
190
191/// Buffer tokens before auto-compact fires.
192const AUTOCOMPACT_BUFFER_TOKENS: u64 = 13_000;
193
194/// Tokens reserved for the compact summary output.
195const MAX_OUTPUT_TOKENS_FOR_SUMMARY: u64 = 20_000;
196
197/// Maximum consecutive auto-compact failures before circuit breaker trips.
198const MAX_CONSECUTIVE_FAILURES: u32 = 3;
199
200/// Maximum recovery attempts for max-output-tokens errors.
201pub const MAX_OUTPUT_TOKENS_RECOVERY_LIMIT: u32 = 3;
202
203/// Tools whose results can be cleared by microcompact.
204const COMPACTABLE_TOOLS: &[&str] = &["FileRead", "Bash", "Grep", "Glob", "FileEdit", "FileWrite"];
205
206/// Token warning state for the UI.
207#[derive(Debug, Clone)]
208pub struct TokenWarningState {
209    /// Percentage of context window remaining.
210    pub percent_left: u64,
211    /// Whether to show a warning in the UI.
212    pub is_above_warning: bool,
213    /// Whether to show an error in the UI.
214    pub is_above_error: bool,
215    /// Whether auto-compact should fire.
216    pub should_compact: bool,
217    /// Whether the context is at the blocking limit.
218    pub is_blocking: bool,
219}
220
221/// Tracking state for auto-compact across turns.
222#[derive(Debug, Clone, Default)]
223pub struct CompactTracking {
224    pub consecutive_failures: u32,
225    pub was_compacted: bool,
226}
227
228/// Calculate the effective context window (total minus output reservation).
229pub fn effective_context_window(model: &str) -> u64 {
230    let context = tokens::context_window_for_model(model);
231    let reserved = tokens::max_output_tokens_for_model(model).min(MAX_OUTPUT_TOKENS_FOR_SUMMARY);
232    context.saturating_sub(reserved)
233}
234
235/// Calculate the auto-compact threshold.
236pub fn auto_compact_threshold(model: &str) -> u64 {
237    effective_context_window(model).saturating_sub(AUTOCOMPACT_BUFFER_TOKENS)
238}
239
240/// Calculate token warning state for the current conversation.
241pub fn token_warning_state(messages: &[Message], model: &str) -> TokenWarningState {
242    let token_count = tokens::estimate_context_tokens(messages);
243    let threshold = auto_compact_threshold(model);
244    let effective = effective_context_window(model);
245
246    let percent_left = if effective > 0 {
247        ((effective.saturating_sub(token_count)) as f64 / effective as f64 * 100.0)
248            .round()
249            .max(0.0) as u64
250    } else {
251        0
252    };
253
254    let warning_buffer = 20_000;
255
256    TokenWarningState {
257        percent_left,
258        is_above_warning: token_count >= effective.saturating_sub(warning_buffer),
259        is_above_error: token_count >= effective.saturating_sub(warning_buffer),
260        should_compact: token_count >= threshold,
261        is_blocking: token_count >= effective.saturating_sub(3_000),
262    }
263}
264
265/// Check whether auto-compact should fire for this conversation.
266pub fn should_auto_compact(messages: &[Message], model: &str, tracking: &CompactTracking) -> bool {
267    // Circuit breaker.
268    if tracking.consecutive_failures >= MAX_CONSECUTIVE_FAILURES {
269        return false;
270    }
271
272    let state = token_warning_state(messages, model);
273    state.should_compact
274}
275
276/// Perform microcompact: clear stale tool results to free tokens.
277///
278/// Replaces the content of old tool_result blocks with a placeholder,
279/// keeping the most recent `keep_recent` results intact.
280pub fn microcompact(messages: &mut [Message], keep_recent: usize) -> u64 {
281    let keep_recent = keep_recent.max(1);
282
283    // Collect indices of compactable tool results (in order).
284    let mut compactable_indices: Vec<(usize, usize)> = Vec::new(); // (msg_idx, block_idx)
285
286    for (msg_idx, msg) in messages.iter().enumerate() {
287        if let Message::User(u) = msg {
288            for (block_idx, block) in u.content.iter().enumerate() {
289                if let ContentBlock::ToolResult { tool_use_id, .. } = block {
290                    // Check if this tool_use_id corresponds to a compactable tool.
291                    if is_compactable_tool_result(messages, tool_use_id) {
292                        compactable_indices.push((msg_idx, block_idx));
293                    }
294                }
295            }
296        }
297    }
298
299    if compactable_indices.len() <= keep_recent {
300        return 0;
301    }
302
303    // Clear all but the most recent `keep_recent`.
304    let clear_count = compactable_indices.len() - keep_recent;
305    let to_clear = &compactable_indices[..clear_count];
306
307    let mut freed_tokens = 0u64;
308
309    for &(msg_idx, block_idx) in to_clear {
310        if let Message::User(ref mut u) = messages[msg_idx]
311            && let ContentBlock::ToolResult {
312                ref mut content,
313                tool_use_id: _,
314                is_error: _,
315                ..
316            } = u.content[block_idx]
317        {
318            let old_tokens = tokens::estimate_tokens(content);
319            let placeholder = "[Old tool result cleared]".to_string();
320            let new_tokens = tokens::estimate_tokens(&placeholder);
321            *content = placeholder;
322            freed_tokens += old_tokens.saturating_sub(new_tokens);
323        }
324    }
325
326    freed_tokens
327}
328
329/// Check if a tool_use_id corresponds to a compactable tool.
330fn is_compactable_tool_result(messages: &[Message], tool_use_id: &str) -> bool {
331    for msg in messages {
332        if let Message::Assistant(a) = msg {
333            for block in &a.content {
334                if let ContentBlock::ToolUse { id, name, .. } = block
335                    && id == tool_use_id
336                {
337                    return COMPACTABLE_TOOLS
338                        .iter()
339                        .any(|t| t.eq_ignore_ascii_case(name));
340                }
341            }
342        }
343    }
344    false
345}
346
347/// Create a compact boundary marker message.
348pub fn compact_boundary_message(summary: &str) -> Message {
349    Message::System(SystemMessage {
350        uuid: Uuid::new_v4(),
351        timestamp: chrono::Utc::now().to_rfc3339(),
352        subtype: SystemMessageType::CompactBoundary,
353        content: format!("[Conversation compacted. Summary: {summary}]"),
354        level: MessageLevel::Info,
355    })
356}
357
358/// Build a compact summary request: asks the LLM to summarize
359/// the conversation up to a certain point.
360///
361/// All message text is run through [`secret_masker`] before being
362/// passed to the summarizer, so secrets that appeared in tool output
363/// never end up baked into the summary.
364pub fn build_compact_summary_prompt(messages: &[Message]) -> String {
365    let mut context = String::new();
366    for msg in messages {
367        match msg {
368            Message::User(u) => {
369                context.push_str("User: ");
370                for block in &u.content {
371                    if let ContentBlock::Text { text } = block {
372                        context.push_str(&secret_masker::mask(text));
373                    }
374                }
375                context.push('\n');
376            }
377            Message::Assistant(a) => {
378                context.push_str("Assistant: ");
379                for block in &a.content {
380                    if let ContentBlock::Text { text } = block {
381                        context.push_str(&secret_masker::mask(text));
382                    }
383                }
384                context.push('\n');
385            }
386            _ => {}
387        }
388    }
389
390    format!(
391        "Summarize this conversation concisely, preserving key decisions, \
392         file changes made, and important context. Focus on what the user \
393         was trying to accomplish and what was done.\n\n{context}"
394    )
395}
396
397/// Build the recovery message injected when max-output-tokens is hit.
398pub fn max_output_recovery_message() -> Message {
399    Message::User(UserMessage {
400        uuid: Uuid::new_v4(),
401        timestamp: chrono::Utc::now().to_rfc3339(),
402        content: vec![ContentBlock::Text {
403            text: "Output token limit hit. Resume directly — no apology, no recap \
404                   of what you were doing. Pick up mid-thought if that is where the \
405                   cut happened. Break remaining work into smaller pieces."
406                .to_string(),
407        }],
408        is_meta: true,
409        is_compact_summary: false,
410    })
411}
412
413/// Parse a "prompt too long" error to extract the token gap.
414///
415/// Looks for patterns like "prompt is too long: 137500 tokens > 135000 maximum"
416/// and returns the difference (2500 in this example).
417pub fn parse_prompt_too_long_gap(error_text: &str) -> Option<u64> {
418    let re = regex::Regex::new(r"(\d+)\s*tokens?\s*>\s*(\d+)").ok()?;
419    let captures = re.captures(error_text)?;
420    let actual: u64 = captures.get(1)?.as_str().parse().ok()?;
421    let limit: u64 = captures.get(2)?.as_str().parse().ok()?;
422    let gap = actual.saturating_sub(limit);
423    if gap > 0 { Some(gap) } else { None }
424}
425
426/// Perform full LLM-based compaction of the conversation history.
427///
428/// Splits the message history into two parts: messages to summarize
429/// (older) and messages to keep (recent). Calls the LLM to generate
430/// a summary, then replaces the old messages with:
431/// 1. A compact boundary marker
432/// 2. A summary message (as a user message with is_compact_summary=true)
433/// 3. The kept recent messages
434///
435/// Returns the number of messages removed, or None if compaction failed.
436pub async fn compact_with_llm(
437    messages: &mut Vec<Message>,
438    llm: &dyn crate::llm::provider::Provider,
439    model: &str,
440    cancel: tokio_util::sync::CancellationToken,
441) -> Option<usize> {
442    if messages.len() < 4 {
443        return None; // Not enough messages to compact.
444    }
445
446    // Keep the most recent messages (at least 40K tokens worth, or
447    // minimum 5 messages with text content).
448    let keep_count = calculate_keep_count(messages);
449    let split_point = messages.len().saturating_sub(keep_count);
450
451    if split_point < 2 {
452        return None; // Not enough to summarize.
453    }
454
455    let to_summarize = &messages[..split_point];
456    let summary_prompt = build_compact_summary_prompt(to_summarize);
457
458    // Call the LLM to generate the summary.
459    let summary_messages = vec![crate::llm::message::user_message(&summary_prompt)];
460    let request = crate::llm::provider::ProviderRequest {
461        messages: summary_messages,
462        system_prompt: "You are a conversation summarizer. Produce a concise summary \
463                        preserving key decisions, file changes, and important context. \
464                        Do not use tools."
465            .to_string(),
466        tools: vec![],
467        model: model.to_string(),
468        max_tokens: 4096,
469        temperature: None,
470        enable_caching: false,
471        tool_choice: Default::default(),
472        metadata: None,
473        cancel,
474    };
475
476    let mut rx = match llm.stream(&request).await {
477        Ok(rx) => rx,
478        Err(e) => {
479            tracing::warn!("Compact LLM call failed: {e}");
480            return None;
481        }
482    };
483
484    // Collect the summary text.
485    let mut summary = String::new();
486    while let Some(event) = rx.recv().await {
487        if let crate::llm::stream::StreamEvent::TextDelta(text) = event {
488            summary.push_str(&text);
489        }
490    }
491
492    if summary.is_empty() {
493        return None;
494    }
495
496    // Replace old messages with boundary + summary + kept messages.
497    let kept = messages[split_point..].to_vec();
498    let removed = split_point;
499
500    messages.clear();
501    messages.push(compact_boundary_message(&summary));
502    messages.push(Message::User(UserMessage {
503        uuid: Uuid::new_v4(),
504        timestamp: chrono::Utc::now().to_rfc3339(),
505        content: vec![ContentBlock::Text {
506            text: format!("[Conversation compacted. Prior context summary:]\n\n{summary}"),
507        }],
508        is_meta: true,
509        is_compact_summary: true,
510    }));
511    messages.extend(kept);
512
513    tracing::info!("Compacted {removed} messages into summary");
514    Some(removed)
515}
516
517/// Calculate how many recent messages to keep during compaction.
518///
519/// Keeps at least 5 messages with text content, or messages totaling
520/// at least 10K estimated tokens, whichever is more.
521fn calculate_keep_count(messages: &[Message]) -> usize {
522    let min_text_messages = 5;
523    let min_tokens = 10_000u64;
524    let max_tokens = 40_000u64;
525
526    let mut count = 0usize;
527    let mut text_count = 0usize;
528    let mut token_total = 0u64;
529
530    // Walk backwards from the end.
531    for msg in messages.iter().rev() {
532        let tokens = crate::services::tokens::estimate_message_tokens(msg);
533        token_total += tokens;
534        count += 1;
535
536        // Count messages with text content.
537        let has_text = match msg {
538            Message::User(u) => u
539                .content
540                .iter()
541                .any(|b| matches!(b, ContentBlock::Text { .. })),
542            Message::Assistant(a) => a
543                .content
544                .iter()
545                .any(|b| matches!(b, ContentBlock::Text { .. })),
546            _ => false,
547        };
548        if has_text {
549            text_count += 1;
550        }
551
552        // Stop if we've met both minimums.
553        if text_count >= min_text_messages && token_total >= min_tokens {
554            break;
555        }
556        // Hard cap.
557        if token_total >= max_tokens {
558            break;
559        }
560    }
561
562    count
563}
564
565#[cfg(test)]
566mod tests {
567    use super::*;
568
569    #[test]
570    fn hash_content_detects_change() {
571        let a = hash_content("hello world");
572        let b = hash_content("hello world");
573        let c = hash_content("hello world!");
574        assert_eq!(a, b);
575        assert_ne!(a, c);
576    }
577
578    #[test]
579    fn file_record_protected_inside_window() {
580        let rec = FileCompressionRecord {
581            path: PathBuf::from("src/main.rs"),
582            level: CompressionLevel::Full,
583            content_hash: hash_content("fn main() {}"),
584            line_range: None,
585            last_referenced_turn: 5,
586        };
587        assert!(rec.is_protected(5));
588        assert!(rec.is_protected(6));
589        assert!(!rec.is_protected(7));
590    }
591
592    #[test]
593    fn record_read_resets_level_on_content_change() {
594        let mut state = FileCompressionState::new();
595        let path = PathBuf::from("src/lib.rs");
596        state.record_read(&path, "original", 1);
597        // Demote outside protection window.
598        state.files.get_mut(&path).unwrap().last_referenced_turn = 0;
599        state.demote(&path, CompressionLevel::Summary, 10);
600        assert_eq!(
601            state.files.get(&path).unwrap().level,
602            CompressionLevel::Summary
603        );
604        // Re-read with new content — level must reset to Full.
605        state.record_read(&path, "changed content", 11);
606        assert_eq!(
607            state.files.get(&path).unwrap().level,
608            CompressionLevel::Full
609        );
610    }
611
612    #[test]
613    fn record_read_preserves_level_on_unchanged_content() {
614        let mut state = FileCompressionState::new();
615        let path = PathBuf::from("src/lib.rs");
616        state.record_read(&path, "same", 1);
617        state.files.get_mut(&path).unwrap().last_referenced_turn = 0;
618        state.demote(&path, CompressionLevel::Partial, 10);
619        state.record_read(&path, "same", 11);
620        assert_eq!(
621            state.files.get(&path).unwrap().level,
622            CompressionLevel::Partial
623        );
624    }
625
626    #[test]
627    fn demote_refuses_protected_files() {
628        let mut state = FileCompressionState::new();
629        let path = PathBuf::from("src/hot.rs");
630        state.record_read(&path, "contents", 5);
631        let ok = state.demote(&path, CompressionLevel::Summary, 5);
632        assert!(!ok);
633        assert_eq!(
634            state.files.get(&path).unwrap().level,
635            CompressionLevel::Full
636        );
637    }
638
639    #[test]
640    fn compression_state_empty_roundtrip() {
641        let state = FileCompressionState::new();
642        let json = serde_json::to_string(&state).unwrap();
643        let back: FileCompressionState = serde_json::from_str(&json).unwrap();
644        assert!(back.files.is_empty());
645    }
646
647    #[test]
648    fn compression_state_handles_unicode_paths() {
649        let mut state = FileCompressionState::new();
650        let path = PathBuf::from("src/crates/café/niño.rs");
651        state.record_read(&path, "contents", 1);
652        let json = serde_json::to_string(&state).unwrap();
653        let back: FileCompressionState = serde_json::from_str(&json).unwrap();
654        assert_eq!(back.files.len(), 1);
655        assert!(back.files.contains_key(&path));
656    }
657
658    #[test]
659    fn compression_state_demote_after_protection_window_expires() {
660        // Read at turn 0 → protected until turn 2 (PROTECTED_TURN_WINDOW = 2).
661        // Demote attempts inside the window fail; one past the window succeeds.
662        let mut state = FileCompressionState::new();
663        let path = PathBuf::from("src/hot.rs");
664        state.record_read(&path, "contents", 0);
665        assert!(!state.demote(&path, CompressionLevel::Summary, 0));
666        assert!(!state.demote(&path, CompressionLevel::Summary, 1));
667        assert!(state.demote(&path, CompressionLevel::Summary, 2));
668        assert_eq!(
669            state.files.get(&path).unwrap().level,
670            CompressionLevel::Summary
671        );
672    }
673
674    #[test]
675    fn compression_state_roundtrip() {
676        let mut state = FileCompressionState::new();
677        state.record_read(Path::new("a.rs"), "alpha", 1);
678        state.record_read(Path::new("b.rs"), "beta", 2);
679        let json = serde_json::to_string(&state).unwrap();
680        let back: FileCompressionState = serde_json::from_str(&json).unwrap();
681        assert_eq!(back.files.len(), 2);
682        assert_eq!(
683            back.files.get(Path::new("a.rs")).unwrap().content_hash,
684            hash_content("alpha"),
685        );
686    }
687
688    #[test]
689    fn test_auto_compact_threshold() {
690        // Sonnet: 200K context, 16K max output (capped at 20K), effective = 180K
691        // Threshold = 180K - 13K = 167K
692        let threshold = auto_compact_threshold("claude-sonnet");
693        assert_eq!(threshold, 200_000 - 16_384 - 13_000);
694    }
695
696    #[test]
697    fn test_parse_prompt_too_long_gap() {
698        let msg = "prompt is too long: 137500 tokens > 135000 maximum";
699        assert_eq!(parse_prompt_too_long_gap(msg), Some(2500));
700    }
701
702    #[test]
703    fn test_parse_prompt_too_long_no_match() {
704        assert_eq!(parse_prompt_too_long_gap("some other error"), None);
705    }
706
707    #[test]
708    fn test_effective_context_window() {
709        // Sonnet: 200K context - 16K output = 184K (capped at 20K → 180K)
710        let eff = effective_context_window("claude-sonnet");
711        assert!(eff > 100_000);
712        assert!(eff < 200_000);
713    }
714
715    #[test]
716    fn test_token_warning_state_empty() {
717        let state = token_warning_state(&[], "claude-sonnet");
718        assert_eq!(state.percent_left, 100);
719        assert!(!state.is_above_warning);
720        assert!(!state.is_blocking);
721    }
722
723    #[test]
724    fn test_should_auto_compact_empty() {
725        let tracking = CompactTracking::default();
726        assert!(!should_auto_compact(&[], "claude-sonnet", &tracking));
727    }
728
729    #[test]
730    fn test_should_auto_compact_circuit_breaker() {
731        let tracking = CompactTracking {
732            consecutive_failures: 5,
733            was_compacted: false,
734        };
735        // Even with huge message list, circuit breaker should prevent compaction.
736        assert!(!should_auto_compact(&[], "claude-sonnet", &tracking));
737    }
738
739    #[test]
740    fn test_microcompact_empty() {
741        let mut messages = vec![];
742        let freed = microcompact(&mut messages, 2);
743        assert_eq!(freed, 0);
744    }
745
746    #[test]
747    fn test_microcompact_keeps_recent() {
748        use crate::llm::message::*;
749        // Create a tool result message.
750        let mut messages = vec![
751            Message::Assistant(AssistantMessage {
752                uuid: uuid::Uuid::new_v4(),
753                timestamp: String::new(),
754                content: vec![ContentBlock::ToolUse {
755                    id: "call_1".into(),
756                    name: "FileRead".into(),
757                    input: serde_json::json!({}),
758                }],
759                model: None,
760                usage: None,
761                stop_reason: None,
762                request_id: None,
763            }),
764            Message::User(UserMessage {
765                uuid: uuid::Uuid::new_v4(),
766                timestamp: String::new(),
767                content: vec![ContentBlock::ToolResult {
768                    tool_use_id: "call_1".into(),
769                    content: "file content here".repeat(100),
770                    is_error: false,
771                    extra_content: vec![],
772                }],
773                is_meta: true,
774                is_compact_summary: false,
775            }),
776        ];
777        // keep_recent=5 means this single result should be kept.
778        let freed = microcompact(&mut messages, 5);
779        assert_eq!(freed, 0);
780    }
781
782    #[test]
783    fn test_compact_boundary_message() {
784        let msg = compact_boundary_message("test summary");
785        if let Message::System(s) = msg {
786            assert_eq!(
787                s.subtype,
788                crate::llm::message::SystemMessageType::CompactBoundary
789            );
790        } else {
791            panic!("Expected system message");
792        }
793    }
794
795    #[test]
796    fn test_max_output_recovery_message() {
797        let msg = max_output_recovery_message();
798        match msg {
799            Message::User(u) => {
800                assert!(!u.content.is_empty());
801            }
802            _ => panic!("Expected user message"),
803        }
804    }
805
806    #[test]
807    fn test_build_compact_summary_prompt() {
808        use crate::llm::message::*;
809        let messages = vec![user_message("hello"), user_message("world")];
810        let prompt = build_compact_summary_prompt(&messages);
811        assert!(prompt.contains("Summarize"));
812    }
813
814    #[test]
815    fn test_effective_context_window_gpt_model() {
816        let eff = effective_context_window("gpt-4o");
817        // gpt-4: 128K context, 16K max output (capped at 20K → 16K), effective = 128K - 16K = 112K
818        assert_eq!(eff, 128_000 - 16_384);
819    }
820
821    #[test]
822    fn test_auto_compact_threshold_gpt_model() {
823        let threshold = auto_compact_threshold("gpt-4o");
824        assert_eq!(threshold, 128_000 - 16_384 - 13_000);
825    }
826
827    #[test]
828    fn test_parse_prompt_too_long_gap_with_comma_format() {
829        // Numbers without commas embedded, but different magnitudes.
830        let msg = "prompt is too long: 137500 tokens > 135000 maximum";
831        assert_eq!(parse_prompt_too_long_gap(msg), Some(2500));
832    }
833
834    #[test]
835    fn test_parse_prompt_too_long_gap_equal_tokens_returns_none() {
836        let msg = "prompt is too long: 135000 tokens > 135000 maximum";
837        // gap = 0, so returns None.
838        assert_eq!(parse_prompt_too_long_gap(msg), None);
839    }
840
841    #[test]
842    fn test_token_warning_state_large_count_should_compact() {
843        use crate::llm::message::*;
844        // Create a huge message that will exceed the threshold.
845        let big_text = "a".repeat(800_000); // ~200K tokens
846        let messages = vec![user_message(&big_text)];
847        let state = token_warning_state(&messages, "claude-sonnet");
848        assert!(state.should_compact);
849    }
850
851    #[test]
852    fn test_should_auto_compact_empty_tracking_small_conversation() {
853        let tracking = CompactTracking::default();
854        let messages = vec![crate::llm::message::user_message("tiny")];
855        assert!(!should_auto_compact(&messages, "claude-sonnet", &tracking));
856    }
857
858    #[test]
859    fn test_compact_boundary_message_content_format() {
860        let msg = compact_boundary_message("my summary");
861        if let Message::System(s) = &msg {
862            assert!(s.content.contains("my summary"));
863            assert!(s.content.starts_with("[Conversation compacted."));
864        } else {
865            panic!("Expected System message");
866        }
867    }
868
869    #[test]
870    fn test_build_compact_summary_prompt_includes_user_and_assistant() {
871        use crate::llm::message::*;
872        let messages = vec![
873            user_message("user said this"),
874            Message::Assistant(AssistantMessage {
875                uuid: uuid::Uuid::new_v4(),
876                timestamp: String::new(),
877                content: vec![ContentBlock::Text {
878                    text: "assistant said that".into(),
879                }],
880                model: None,
881                usage: None,
882                stop_reason: None,
883                request_id: None,
884            }),
885        ];
886        let prompt = build_compact_summary_prompt(&messages);
887        assert!(prompt.contains("user said this"));
888        assert!(prompt.contains("assistant said that"));
889        assert!(prompt.contains("User:"));
890        assert!(prompt.contains("Assistant:"));
891    }
892
893    #[test]
894    fn build_compact_summary_prompt_masks_secrets_in_user_messages() {
895        use crate::llm::message::*;
896        let aws_key = "AKIAIOSFODNN7EXAMPLE";
897        let messages = vec![user_message(format!(
898            "I pasted my AWS key {aws_key} into the file"
899        ))];
900        let prompt = build_compact_summary_prompt(&messages);
901        assert!(
902            !prompt.contains(aws_key),
903            "raw AWS key survived compaction prompt: {prompt}",
904        );
905        assert!(prompt.contains("[REDACTED:aws_access_key]"));
906    }
907
908    #[test]
909    fn build_compact_summary_prompt_masks_secrets_in_assistant_messages() {
910        use crate::llm::message::*;
911        let secret = "ghp_abcdefghijklmnopqrstuvwxyz0123456789";
912        let messages = vec![Message::Assistant(AssistantMessage {
913            uuid: uuid::Uuid::new_v4(),
914            timestamp: String::new(),
915            content: vec![ContentBlock::Text {
916                text: format!("I used this token: {secret}"),
917            }],
918            model: None,
919            usage: None,
920            stop_reason: None,
921            request_id: None,
922        })];
923        let prompt = build_compact_summary_prompt(&messages);
924        assert!(!prompt.contains(secret));
925        assert!(prompt.contains("REDACTED"));
926    }
927
928    #[test]
929    fn test_max_output_recovery_message_is_meta() {
930        let msg = max_output_recovery_message();
931        if let Message::User(u) = &msg {
932            assert!(u.is_meta);
933        } else {
934            panic!("Expected User message");
935        }
936    }
937
938    #[test]
939    fn test_calculate_keep_count_returns_at_least_5_for_large_list() {
940        use crate::llm::message::*;
941        // Create 20 messages with text content.
942        let messages: Vec<Message> = (0..20)
943            .map(|i| user_message(format!("message {i}")))
944            .collect();
945        let keep = calculate_keep_count(&messages);
946        assert!(keep >= 5, "keep_count was {keep}, expected at least 5");
947    }
948}