ai-agent-sdk 0.5.0

//! Context compaction module.
//!
//! Handles automatic context compaction when the conversation gets too long.
//! This includes token threshold detection, summary generation, and message management.

use crate::types::*;

/// Default context window sizes by model (in tokens)
pub const DEFAULT_CONTEXT_WINDOW: u32 = 200_000;

/// Get default context window from environment or use default
pub fn get_default_context_window() -> u32 {
    if let Ok(override_val) = std::env::var("AI_CONTEXT_WINDOW") {
        if let Ok(parsed) = override_val.parse::<u32>() {
            if parsed > 0 {
                return parsed;
            }
        }
    }
    DEFAULT_CONTEXT_WINDOW
}

/// Reserve tokens for output during compaction
/// Based on p99.99 of compact summary output
pub const MAX_OUTPUT_TOKENS_FOR_SUMMARY: u32 = 20_000;

/// Buffer tokens for auto-compact trigger
pub const AUTOCOMPACT_BUFFER_TOKENS: u32 = 13_000;

/// Buffer tokens for warning threshold
pub const WARNING_THRESHOLD_BUFFER_TOKENS: u32 = 20_000;

/// Buffer tokens for error threshold
pub const ERROR_THRESHOLD_BUFFER_TOKENS: u32 = 20_000;

/// Manual compact uses smaller buffer (more aggressive)
pub const MANUAL_COMPACT_BUFFER_TOKENS: u32 = 3_000;

/// Maximum consecutive auto-compact failures before giving up
pub const MAX_CONSECUTIVE_AUTOCOMPACT_FAILURES: u32 = 3;

/// Post-compaction: max files to restore
pub const POST_COMPACT_MAX_FILES_TO_RESTORE: u32 = 5;

/// Post-compaction: token budget for restored files
pub const POST_COMPACT_TOKEN_BUDGET: u32 = 50_000;

/// Post-compaction: max tokens per file
pub const POST_COMPACT_MAX_TOKENS_PER_FILE: u32 = 5_000;

/// Post-compaction: max tokens per skill
pub const POST_COMPACT_MAX_TOKENS_PER_SKILL: u32 = 5_000;

/// Post-compaction: skills token budget
pub const POST_COMPACT_SKILLS_TOKEN_BUDGET: u32 = 25_000;

/// Get effective context window size (total - output reserve)
pub fn get_effective_context_window_size(model: &str) -> u32 {
    let context_window = get_context_window_for_model(model);
    context_window.saturating_sub(MAX_OUTPUT_TOKENS_FOR_SUMMARY)
}

/// Get context window size for a model
pub fn get_context_window_for_model(model: &str) -> u32 {
    // Check environment override for auto compact window
    if let Ok(override_val) = std::env::var("AI_AUTO_COMPACT_WINDOW") {
        if let Ok(parsed) = override_val.parse::<u32>() {
            if parsed > 0 {
                return parsed;
            }
        }
    }

    // Default context windows by model
    let lower = model.to_lowercase();
    if lower.contains("sonnet") {
        // Claude Sonnet models typically have 200K context
        get_default_context_window()
    } else if lower.contains("haiku") {
        // Haiku has 200K context
        get_default_context_window()
    } else if lower.contains("opus") {
        // Opus models typically have 200K context
        get_default_context_window()
    } else {
        get_default_context_window()
    }
}

/// Get the auto-compact threshold (when to trigger compaction)
pub fn get_auto_compact_threshold(model: &str) -> u32 {
    let effective_window = get_effective_context_window_size(model);
    effective_window.saturating_sub(AUTOCOMPACT_BUFFER_TOKENS)
}

/// Calculate token warning state
#[derive(Debug, Clone)]
pub struct TokenWarningState {
    pub percent_left: f64,
    pub is_above_warning_threshold: bool,
    pub is_above_error_threshold: bool,
    pub is_above_auto_compact_threshold: bool,
}

pub fn calculate_token_warning_state(
    token_usage: u32,
    model: &str,
) -> TokenWarningState {
    let context_window = get_effective_context_window_size(model);
    let percent_left = if context_window > 0 {
        ((context_window - token_usage) as f64 / context_window as f64) * 100.0
    } else {
        100.0
    };

    let auto_compact_threshold = get_auto_compact_threshold(model);
    let warning_threshold = context_window.saturating_sub(WARNING_THRESHOLD_BUFFER_TOKENS);
    let error_threshold = context_window.saturating_sub(ERROR_THRESHOLD_BUFFER_TOKENS);

    TokenWarningState {
        percent_left,
        is_above_warning_threshold: token_usage >= warning_threshold,
        is_above_error_threshold: token_usage >= error_threshold,
        is_above_auto_compact_threshold: token_usage >= auto_compact_threshold,
    }
}

/// Compact result containing the new messages after compaction
#[derive(Debug, Clone)]
pub struct CompactionResult {
    /// The boundary marker message
    pub boundary_marker: Message,
    /// Summary messages to keep
    pub summary_messages: Vec<Message>,
    /// Messages that were kept (not summarized)
    pub messages_to_keep: Option<Vec<Message>>,
    /// Attachments to include
    pub attachments: Vec<Message>,
    /// Pre-compaction token count
    pub pre_compact_token_count: u32,
    /// Post-compaction token count
    pub post_compact_token_count: u32,
}

/// Strip images from messages before sending for compaction
/// Images are not needed for summary generation
pub fn strip_images_from_messages(messages: &[Message]) -> Vec<Message> {
    messages
        .iter()
        .map(|msg| {
            if let Message {
                role: MessageRole::User,
                content: _,
                ..
            } = msg
            {
                // For user messages, we could strip images
                // For simplicity, return as-is
                msg.clone()
            } else {
                msg.clone()
            }
        })
        .collect()
}

/// Estimate token count for messages (rough estimation)
pub fn estimate_token_count(messages: &[Message]) -> u32 {
    // Rough estimate: ~4 characters per token
    let total_chars: usize = messages
        .iter()
        .map(|msg| msg.content.len())
        .sum();

    (total_chars / 4) as u32
}

/// Check if conversation should be compacted
pub fn should_compact(token_usage: u32, model: &str) -> bool {
    let state = calculate_token_warning_state(token_usage, model);
    state.is_above_auto_compact_threshold
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_effective_context_window() {
        let window = get_effective_context_window_size("claude-sonnet-4-6");
        // 200000 - 20000 = 180000
        assert_eq!(window, 180_000);
    }

    #[test]
    fn test_auto_compact_threshold() {
        let threshold = get_auto_compact_threshold("claude-sonnet-4-6");
        // 180000 - 13000 = 167000
        assert_eq!(threshold, 167_000);
    }

    #[test]
    fn test_token_warning_state_normal() {
        let state = calculate_token_warning_state(50_000, "claude-sonnet-4-6");
        assert!(!state.is_above_warning_threshold);
        assert!(!state.is_above_error_threshold);
        assert!(!state.is_above_auto_compact_threshold);
        assert!(state.percent_left > 50.0);
    }

    #[test]
    fn test_token_warning_state_warning() {
        // warning at 180000 - 20000 = 160000
        let state = calculate_token_warning_state(165_000, "claude-sonnet-4-6");
        assert!(state.is_above_warning_threshold);
        // error uses same buffer, so this is also above error threshold
        assert!(state.is_above_error_threshold);
        assert!(!state.is_above_auto_compact_threshold);
    }

    #[test]
    fn test_token_warning_state_compact() {
        let state = calculate_token_warning_state(170_000, "claude-sonnet-4-6");
        assert!(state.is_above_warning_threshold);
        assert!(state.is_above_auto_compact_threshold);
    }

    #[test]
    fn test_should_compact() {
        assert!(!should_compact(50_000, "claude-sonnet-4-6"));
        assert!(should_compact(170_000, "claude-sonnet-4-6"));
    }

    #[test]
    fn test_estimate_token_count() {
        let messages = vec![
            Message {
                role: MessageRole::User,
                content: "Hello, this is a test message".to_string(),
                ..Default::default()
            },
            Message {
                role: MessageRole::Assistant,
                content: "Hi! How can I help you today?".to_string(),
                ..Default::default()
            },
        ];

        let count = estimate_token_count(&messages);
        // ~60 chars / 4 = 15 tokens
        assert!(count > 0);
    }
}