everruns-core 0.15.0

//! Infinity Context Capability
//!
//! Keeps recent conversation turns in prompt context while exposing a
//! `query_history` tool for older messages that fell out of the active window.

use super::{Capability, CapabilityLocalization, CapabilityStatus};
use crate::message::{ContentPart, Message, MessageRole};
use crate::message_filter::{
    ExcludedNoticeTransform, MessageFilterProvider, MessageQuery, anchored_window,
};
use crate::tool_types::ToolHints;
use crate::tools::{Tool, ToolExecutionResult};
use crate::traits::ToolContext;
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use serde_json::{Value, json};
use std::cmp::Ordering;
use std::io::{self, Write};
use std::sync::Arc;

/// Capability ID for infinity context.
pub const INFINITY_CONTEXT_CAPABILITY_ID: &str = "infinity_context";

/// Infinity context capability.
pub struct InfinityContextCapability;

impl Capability for InfinityContextCapability {
    fn id(&self) -> &str {
        INFINITY_CONTEXT_CAPABILITY_ID
    }

    fn name(&self) -> &str {
        "Infinity Context"
    }

    fn description(&self) -> &str {
        r#"Trims older conversation history out of the live prompt while keeping it queryable with `query_history`.

> [!TIP]
> Use this for long-running sessions where earlier discussion still matters but should not consume prompt budget every turn."#
    }

    fn status(&self) -> CapabilityStatus {
        CapabilityStatus::Available
    }

    fn icon(&self) -> Option<&str> {
        Some("infinity")
    }

    fn category(&self) -> Option<&str> {
        Some("Optimization")
    }

    fn system_prompt_addition(&self) -> Option<&str> {
        Some(INFINITY_CONTEXT_SYSTEM_PROMPT)
    }

    fn tools(&self) -> Vec<Box<dyn Tool>> {
        vec![Box::new(QueryHistoryTool)]
    }

    fn message_filter_provider(&self) -> Option<Arc<dyn MessageFilterProvider>> {
        Some(Arc::new(InfinityContextFilterProvider))
    }

    /// All three `InfinityContextConfig` fields are simple numeric knobs users
    /// tune, so all are exposed. Candidate-load sizing (overfetch factor,
    /// per-message token estimate, hard DB limit) is internal and derived from
    /// these values, so it has no schema surface.
    fn config_schema(&self) -> Option<Value> {
        Some(json!({
            "type": "object",
            "properties": {
                "context_budget_tokens": {
                    "type": "integer",
                    "title": "Context budget (tokens)",
                    "description": "Maximum prompt budget reserved for message history.",
                    "minimum": 1,
                    "default": default_context_budget_tokens()
                },
                "min_recent_messages": {
                    "type": "integer",
                    "title": "Minimum recent messages",
                    "description": "Number of recent messages always kept, even when the token budget is tight.",
                    "minimum": 1,
                    "default": default_min_recent_messages()
                },
                "max_recent_messages": {
                    "type": "integer",
                    "title": "Maximum recent messages",
                    "description": "Optional hard cap on recent messages kept in the live prompt.",
                    "minimum": 1
                },
                "keep_first_messages": {
                    "type": "integer",
                    "title": "Anchored first messages",
                    "description": "Leading messages always kept as an anchor (the original task), even under a tight budget. Additional to the maximum recent messages. The anchor is fetched as a head+tail load, so it is guaranteed even for histories far longer than the candidate load window — any value is honored, independent of max_recent_messages. Larger values are not free: every load fetches keep_first_messages extra rows and includes them in the prompt, so keep it small (the default 1 anchors the original task) and raise it only when more leading context is genuinely needed.",
                    "minimum": 0,
                    "default": default_keep_first_messages()
                }
            }
        }))
    }

    fn validate_config(&self, config: &Value) -> Result<(), String> {
        if config.is_null() {
            return Ok(());
        }
        let typed: InfinityContextConfig = serde_json::from_value(config.clone())
            .map_err(|e| format!("invalid infinity_context config: {e}"))?;
        if typed.context_budget_tokens == 0 {
            return Err("context_budget_tokens must be >= 1".to_string());
        }
        if typed.min_recent_messages == 0 {
            return Err("min_recent_messages must be >= 1".to_string());
        }
        if typed.max_recent_messages == Some(0) {
            return Err("max_recent_messages must be >= 1".to_string());
        }
        Ok(())
    }

    fn localizations(&self) -> Vec<CapabilityLocalization> {
        vec![
            CapabilityLocalization {
                locale: "en",
                name: None,
                description: None,
                config_description: Some(
                    "Controls the token budget for history and the minimum/maximum number \
                     of recent messages kept in the prompt.",
                ),
                config_overlay: None,
            },
            CapabilityLocalization {
                locale: "uk",
                name: Some("Нескінченний контекст"),
                description: Some(
                    "Прибирає старішу історію розмови з активного запиту, зберігаючи її \
                     доступною через інструмент query_history.",
                ),
                config_description: Some(
                    "Визначає бюджет токенів для історії та мінімальну й максимальну \
                     кількість останніх повідомлень у запиті.",
                ),
                config_overlay: Some(json!({
                    "properties": {
                        "context_budget_tokens": {
                            "title": "Бюджет контексту (токени)",
                            "description": "Максимальний бюджет запиту, зарезервований для історії повідомлень."
                        },
                        "min_recent_messages": {
                            "title": "Мінімум останніх повідомлень",
                            "description": "Кількість останніх повідомлень, які зберігаються завжди, навіть коли бюджет токенів обмежений."
                        },
                        "max_recent_messages": {
                            "title": "Максимум останніх повідомлень",
                            "description": "Необов'язкове жорстке обмеження кількості останніх повідомлень в активному запиті."
                        }
                    }
                })),
            },
        ]
    }
}

const INFINITY_CONTEXT_SYSTEM_PROMPT: &str = r#"## Conversation history

Earlier messages may be trimmed from the live prompt. Use `query_history`
to retrieve them when needed. The window is trimmed automatically; do not
abandon tasks for token reasons — persist important state via file or
memory tools when available."#;

#[derive(Debug, Clone, Serialize, Deserialize)]
struct InfinityContextConfig {
    /// Maximum prompt budget reserved for message history.
    #[serde(default = "default_context_budget_tokens")]
    context_budget_tokens: usize,

    /// Minimum number of recent messages to keep even when the budget is tight.
    #[serde(default = "default_min_recent_messages")]
    min_recent_messages: usize,

    /// Optional hard cap on recent messages kept in the live prompt.
    ///
    /// Useful for public support chats where the prompt must stay small even
    /// when the token-budget estimate would allow more messages.
    #[serde(default)]
    max_recent_messages: Option<usize>,

    /// Number of leading messages always kept as an anchor (the original task /
    /// goal), regardless of token budget. Defaults to 1 so the model never loses
    /// what it is doing when the window slides. The anchor is additional to
    /// `max_recent_messages`.
    #[serde(default = "default_keep_first_messages")]
    keep_first_messages: usize,

    /// Derived (not user-facing): set by capability collection when the
    /// `compaction` capability is also enabled. When true, infinity context
    /// stops doing token-budget eviction and lets compaction own reduction, so
    /// compaction's summary — not a bare "hidden" notice — covers old turns.
    #[serde(default)]
    compaction_active: bool,
}

fn default_context_budget_tokens() -> usize {
    100_000
}

fn default_min_recent_messages() -> usize {
    10
}

fn default_keep_first_messages() -> usize {
    1
}

impl Default for InfinityContextConfig {
    fn default() -> Self {
        Self {
            context_budget_tokens: default_context_budget_tokens(),
            min_recent_messages: default_min_recent_messages(),
            max_recent_messages: None,
            keep_first_messages: default_keep_first_messages(),
            compaction_active: false,
        }
    }
}

const CANDIDATE_AVG_TOKENS_PER_MESSAGE: usize = 250;
const CANDIDATE_OVERFETCH_FACTOR: usize = 4;
const CANDIDATE_MAX_MESSAGES: usize = 2_000;

struct InfinityContextFilterProvider;

impl MessageFilterProvider for InfinityContextFilterProvider {
    fn apply_filters(&self, query: &mut MessageQuery, config: &Value) {
        let config: InfinityContextConfig =
            serde_json::from_value(config.clone()).unwrap_or_default();

        query.limit = Some(resolve_candidate_load_limit(&config) as i64);
        // Always fetch the first `keep_first_messages` alongside the latest-N tail
        // so the task/goal anchor survives even when the history is far longer than
        // the candidate load window (the tail-only LIMIT would otherwise never
        // fetch the genuine first message).
        if config.keep_first_messages > 0 {
            query.keep_head = Some(config.keep_first_messages);
        }
        query.prepend_transform = Some(Arc::new(ExcludedNoticeTransform::infinity_context()));
    }

    fn post_load(&self, messages: &mut Vec<Message>, config: &Value) {
        let config: InfinityContextConfig =
            serde_json::from_value(config.clone()).unwrap_or_default();
        let existing_notice_count = take_existing_excluded_notice(messages);

        // P2 composition: when compaction is the active reducer, defer
        // token-budget eviction to it. Only re-surface any candidate-window
        // overflow notice (messages the DB load could not fetch).
        if config.compaction_active {
            if existing_notice_count > 0 {
                insert_excluded_notice(messages, config.keep_first_messages, existing_notice_count);
            }
            return;
        }

        let outcome = trim_messages_to_token_budget(messages, &config);
        let total_excluded_count = existing_notice_count.saturating_add(outcome.hidden_count);
        if total_excluded_count > 0 {
            // Place the notice right after the anchored head so the layout reads
            // [task anchor] -> [N hidden] -> [recent window].
            insert_excluded_notice(messages, outcome.head_len, total_excluded_count);
        }
    }

    fn priority(&self) -> i32 {
        100
    }
}

fn resolve_candidate_load_limit(config: &InfinityContextConfig) -> usize {
    // Cap the candidate window unconditionally at `CANDIDATE_MAX_MESSAGES` so a
    // large `min_recent_messages` or `max_recent_messages` cannot turn into an
    // unbounded DB `LIMIT`.
    let budget_derived_limit = (config.context_budget_tokens / CANDIDATE_AVG_TOKENS_PER_MESSAGE)
        .saturating_mul(CANDIDATE_OVERFETCH_FACTOR)
        .max(config.min_recent_messages)
        .clamp(1, CANDIDATE_MAX_MESSAGES);

    if let Some(max_recent_messages) = config.max_recent_messages {
        return budget_derived_limit.min(max_recent_messages.max(1));
    }

    budget_derived_limit
}

fn estimate_message_tokens(message: &Message) -> usize {
    const TOKEN_CHARS: usize = 4;
    let role_overhead = message.role.to_string().len() + 8;
    let content_len: usize = message
        .content
        .iter()
        .map(|part| match part {
            ContentPart::Text(text) => text.text.len(),
            ContentPart::Image(image) => {
                image.url.as_ref().map_or(0, String::len)
                    + image.base64.as_ref().map_or(50, String::len)
                    + image.media_type.as_ref().map_or(0, String::len)
            }
            ContentPart::ImageFile(file) => {
                file.image_id.to_string().len() + file.filename.as_ref().map_or(0, String::len)
            }
            ContentPart::ToolCall(call) => {
                call.id.len() + call.name.len() + estimate_json_value_len(&call.arguments) + 20
            }
            ContentPart::ToolResult(result) => {
                result.tool_call_id.len()
                    + result.result.as_ref().map_or(0, estimate_json_value_len)
                    + result.error.as_ref().map_or(0, String::len)
                    + 20
            }
        })
        .sum();
    (role_overhead + content_len) / TOKEN_CHARS
}

struct CountingWriter {
    len: usize,
}

impl Write for CountingWriter {
    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
        self.len = self.len.saturating_add(buf.len());
        Ok(buf.len())
    }

    fn flush(&mut self) -> io::Result<()> {
        Ok(())
    }
}

fn estimate_json_value_len(value: &Value) -> usize {
    let mut writer = CountingWriter { len: 0 };
    serde_json::to_writer(&mut writer, value)
        .map(|_| writer.len)
        .unwrap_or(0)
}

fn take_existing_excluded_notice(messages: &mut Vec<Message>) -> usize {
    let Some(first) = messages.first() else {
        return 0;
    };
    let Some(count) = parse_excluded_notice_count(first) else {
        return 0;
    };

    messages.remove(0);
    count
}

fn parse_excluded_notice_count(message: &Message) -> Option<usize> {
    let text = message.text()?;
    let rest = text.strip_prefix("[IMPORTANT: ")?;
    let (count, rest) = rest.split_once(' ')?;
    if !rest.starts_with("earlier messages are NOT visible in this context.") {
        return None;
    }
    count.parse().ok()
}

/// Outcome of token-budget trimming.
#[derive(Default)]
struct TrimOutcome {
    /// Messages dropped from the middle.
    hidden_count: usize,
    /// Length of the preserved leading anchor (where the notice is inserted).
    head_len: usize,
}

/// Insert the hidden-history notice at `position`, clamped to the message list.
fn insert_excluded_notice(messages: &mut Vec<Message>, position: usize, count: usize) {
    let text = ExcludedNoticeTransform::infinity_context()
        .format
        .replace("{}", &count.to_string());
    messages.insert(position.min(messages.len()), Message::system(text));
}

/// Trim the live window to the token budget while always keeping the first
/// `keep_first_messages` (the original task/goal) and the recent tail. Drops a
/// single contiguous block from the middle and reports how many were hidden.
fn trim_messages_to_token_budget(
    messages: &mut Vec<Message>,
    config: &InfinityContextConfig,
) -> TrimOutcome {
    if messages.is_empty() {
        return TrimOutcome::default();
    }

    let costs: Vec<usize> = messages.iter().map(estimate_message_tokens).collect();
    let window = anchored_window(
        &costs,
        config.keep_first_messages,
        config.min_recent_messages,
        config.max_recent_messages,
        config.context_budget_tokens,
    );

    let hidden_count = window.hidden();
    if hidden_count > 0 {
        // Rebuild as [0, head_len) ++ [recent_start, len) without cloning.
        let tail = messages.split_off(window.recent_start);
        messages.truncate(window.head_len);
        messages.extend(tail);
    }

    TrimOutcome {
        hidden_count,
        head_len: window.head_len,
    }
}

/// Tool for querying earlier conversation history.
pub struct QueryHistoryTool;

#[derive(Debug, Deserialize)]
struct QueryHistoryParams {
    #[serde(default)]
    query: Option<String>,
    #[serde(default)]
    message_range: Option<MessageRange>,
    #[serde(default = "default_query_limit")]
    limit: usize,
}

#[derive(Debug, Deserialize)]
struct MessageRange {
    from: usize,
    to: usize,
}

fn default_query_limit() -> usize {
    20
}

#[async_trait]
impl Tool for QueryHistoryTool {
    fn name(&self) -> &str {
        "query_history"
    }

    fn display_name(&self) -> Option<&str> {
        Some("Query History")
    }

    fn description(&self) -> &str {
        "Search or retrieve earlier messages from this conversation that may not be visible in the current prompt."
    }

    fn parameters_schema(&self) -> Value {
        json!({
            "type": "object",
            "properties": {
                "query": {
                    "type": "string",
                    "description": "Keyword search over earlier messages"
                },
                "message_range": {
                    "type": "object",
                    "properties": {
                        "from": { "type": "integer", "minimum": 0, "description": "Start index (0-based, inclusive)" },
                        "to": { "type": "integer", "minimum": 0, "description": "End index (0-based, exclusive)" }
                    },
                    "required": ["from", "to"],
                    "additionalProperties": false,
                    "description": "Retrieve messages by absolute position in the conversation"
                },
                "limit": {
                    "type": "integer",
                    "minimum": 1,
                    "default": 20,
                    "description": "Maximum number of messages to return"
                }
            },
            "additionalProperties": false
        })
    }

    fn hints(&self) -> ToolHints {
        ToolHints::default()
            .with_readonly(true)
            .with_idempotent(true)
    }

    async fn execute(&self, _arguments: Value) -> ToolExecutionResult {
        ToolExecutionResult::tool_error(
            "query_history requires session context. Execute it with ToolContext.",
        )
    }

    fn requires_context(&self) -> bool {
        true
    }

    async fn execute_with_context(
        &self,
        arguments: Value,
        context: &ToolContext,
    ) -> ToolExecutionResult {
        let params: QueryHistoryParams = match serde_json::from_value(arguments) {
            Ok(params) => params,
            Err(error) => {
                return ToolExecutionResult::tool_error(format!("Invalid parameters: {error}"));
            }
        };

        let Some(retriever) = &context.message_retriever else {
            return ToolExecutionResult::tool_error("No message retriever available");
        };

        let messages = match retriever.load(context.session_id).await {
            Ok(messages) => messages,
            Err(error) => {
                return ToolExecutionResult::internal_error(error);
            }
        };

        if messages.is_empty() {
            return ToolExecutionResult::success(json!({
                "count": 0,
                "message": "No history available."
            }));
        }

        let limit = params.limit.min(50);
        let total = messages.len();

        if let Some(range) = params.message_range {
            let from = range.from.min(total);
            let to = range.to.min(total).max(from);
            let range_messages: Vec<_> = messages[from..to].iter().take(limit).collect();
            return format_range_result(&range_messages, from, total);
        }

        if let Some(query) = params.query.as_deref() {
            let results = search_messages(&messages, query, limit);
            return format_search_result(&results, total);
        }

        let recent: Vec<_> = messages.iter().rev().take(limit).collect();
        format_recent_result(&recent, total)
    }
}

struct SearchResult<'a> {
    index: usize,
    message: &'a Message,
    score: f64,
}

fn search_messages<'a>(
    messages: &'a [Message],
    query: &str,
    limit: usize,
) -> Vec<SearchResult<'a>> {
    let query_lower = query.to_lowercase();
    let mut results = Vec::new();

    for (index, message) in messages.iter().enumerate() {
        let content = extract_text_content(message).to_lowercase();
        if !content.contains(&query_lower) {
            continue;
        }

        let mut score = 1.0;

        if content.split_whitespace().any(|word| word == query_lower) {
            score += 0.5;
        }

        if !messages.is_empty() {
            score += (index as f64 / messages.len() as f64) * 0.3;
        }

        match message.role {
            MessageRole::User | MessageRole::Agent => score += 0.2,
            MessageRole::System => score += 0.1,
            MessageRole::ToolResult => {}
        }

        results.push(SearchResult {
            index,
            message,
            score,
        });
    }

    results.sort_by(|left, right| {
        right
            .score
            .partial_cmp(&left.score)
            .unwrap_or(Ordering::Equal)
    });
    results.truncate(limit);
    results
}

fn extract_text_content(message: &Message) -> String {
    message
        .content
        .iter()
        .filter_map(|part| match part {
            ContentPart::Text(text) => Some(text.text.clone()),
            ContentPart::ToolResult(result) => result.result.as_ref().map(ToString::to_string),
            _ => None,
        })
        .collect::<Vec<_>>()
        .join(" ")
}

fn truncate_content(content: &str, max_len: usize) -> String {
    let char_count = content.chars().count();
    if char_count <= max_len {
        return content.to_string();
    }

    format!("{}...", content.chars().take(max_len).collect::<String>())
}

fn format_message(message: &Message, index: usize, total: usize) -> Value {
    json!({
        "index": index,
        "position": format!("{}/{}", index + 1, total),
        "role": message.role.to_string(),
        "created_at": message.created_at.to_rfc3339(),
        "content": truncate_content(&extract_text_content(message), 500)
    })
}

fn format_range_result(
    messages: &[&Message],
    start_index: usize,
    total: usize,
) -> ToolExecutionResult {
    if messages.is_empty() {
        return ToolExecutionResult::success(json!({
            "count": 0,
            "message": "No messages in the requested range."
        }));
    }

    let formatted: Vec<Value> = messages
        .iter()
        .enumerate()
        .map(|(offset, message)| format_message(message, start_index + offset, total))
        .collect();

    ToolExecutionResult::success(json!({
        "messages": formatted,
        "count": messages.len(),
        "total_in_history": total,
        "range": format!("{}-{}", start_index + 1, start_index + messages.len())
    }))
}

fn format_search_result(results: &[SearchResult<'_>], total: usize) -> ToolExecutionResult {
    if results.is_empty() {
        return ToolExecutionResult::success(json!({
            "count": 0,
            "message": "No matching messages found."
        }));
    }

    let formatted: Vec<Value> = results
        .iter()
        .map(|result| {
            let mut message = format_message(result.message, result.index, total);
            message["relevance_score"] = json!(format!("{:.2}", result.score));
            message
        })
        .collect();

    ToolExecutionResult::success(json!({
        "messages": formatted,
        "count": results.len(),
        "total_in_history": total
    }))
}

fn format_recent_result(messages: &[&Message], total: usize) -> ToolExecutionResult {
    let formatted: Vec<Value> = messages
        .iter()
        .enumerate()
        .map(|(offset, message)| format_message(message, total - messages.len() + offset, total))
        .collect();

    ToolExecutionResult::success(json!({
        "messages": formatted,
        "count": messages.len(),
        "total_in_history": total,
        "note": "Showing most recent history. Use `query` to search or `message_range` to fetch older messages."
    }))
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::in_memory::InMemoryMessageRetriever;
    use crate::typed_id::SessionId;

    #[test]
    fn test_capability_metadata() {
        let capability = InfinityContextCapability;

        assert_eq!(capability.id(), INFINITY_CONTEXT_CAPABILITY_ID);
        assert_eq!(capability.name(), "Infinity Context");
        assert_eq!(capability.status(), CapabilityStatus::Available);
        assert_eq!(capability.category(), Some("Optimization"));
        assert_eq!(capability.tools().len(), 1);
        assert!(capability.message_filter_provider().is_some());
    }

    #[test]
    fn test_config_schema_and_validate_config() {
        let capability = InfinityContextCapability;

        let schema = capability.config_schema().expect("config schema");
        assert_eq!(schema["type"], "object");
        assert!(schema["properties"]["context_budget_tokens"].is_object());
        assert!(schema["properties"]["min_recent_messages"].is_object());
        assert!(schema["properties"]["max_recent_messages"].is_object());

        // Null, empty, and valid configs are accepted.
        assert!(capability.validate_config(&Value::Null).is_ok());
        assert!(capability.validate_config(&json!({})).is_ok());
        assert!(
            capability
                .validate_config(&json!({
                    "context_budget_tokens": 50_000,
                    "min_recent_messages": 5,
                    "max_recent_messages": 100
                }))
                .is_ok()
        );

        // Wrong types and out-of-range values are rejected.
        assert!(
            capability
                .validate_config(&json!({"context_budget_tokens": "lots"}))
                .is_err()
        );
        assert!(
            capability
                .validate_config(&json!({"context_budget_tokens": 0}))
                .is_err()
        );
        assert!(
            capability
                .validate_config(&json!({"max_recent_messages": 0}))
                .is_err()
        );
    }

    #[test]
    fn test_localizations_resolve_uk() {
        let capability = InfinityContextCapability;
        assert_eq!(
            capability.localized_name(Some("uk-UA")),
            "Нескінченний контекст"
        );
        assert!(capability.describe_schema(None).is_some());
    }

    #[test]
    fn test_filter_provider_sets_bounded_candidate_load_limit_without_hard_cap() {
        let mut query = MessageQuery::new(SessionId::new());
        let provider = InfinityContextFilterProvider;
        provider.apply_filters(
            &mut query,
            &json!({"context_budget_tokens": 1_000, "min_recent_messages": 3}),
        );

        assert_eq!(query.limit, Some(16));
        assert!(query.prepend_transform.is_some());
        // Default keep_first_messages (1) is loaded as a head anchor so the task
        // goal survives even for histories longer than the candidate window.
        assert_eq!(query.keep_head, Some(1));
    }

    #[test]
    fn test_filter_provider_sets_keep_head_from_keep_first_messages() {
        let mut query = MessageQuery::new(SessionId::new());
        let provider = InfinityContextFilterProvider;
        provider.apply_filters(
            &mut query,
            &json!({"context_budget_tokens": 1_000, "keep_first_messages": 3}),
        );
        assert_eq!(query.keep_head, Some(3));
    }

    #[test]
    fn test_filter_provider_omits_keep_head_when_zero() {
        let mut query = MessageQuery::new(SessionId::new());
        let provider = InfinityContextFilterProvider;
        provider.apply_filters(
            &mut query,
            &json!({"context_budget_tokens": 1_000, "keep_first_messages": 0}),
        );
        assert_eq!(query.keep_head, None);
    }

    #[test]
    fn test_filter_provider_caps_explicit_max_to_bounded_candidate_window() {
        let mut query = MessageQuery::new(SessionId::new());
        let provider = InfinityContextFilterProvider;
        provider.apply_filters(
            &mut query,
            &json!({
                "context_budget_tokens": 500_000,
                "min_recent_messages": 10,
                "max_recent_messages": 1_000_000
            }),
        );

        assert_eq!(query.limit, Some(CANDIDATE_MAX_MESSAGES as i64));
        assert!(query.prepend_transform.is_some());
    }

    #[test]
    fn test_filter_provider_caps_large_min_recent_messages() {
        let mut query = MessageQuery::new(SessionId::new());
        let provider = InfinityContextFilterProvider;
        provider.apply_filters(
            &mut query,
            &json!({
                "context_budget_tokens": 1_000,
                "min_recent_messages": 1_000_000,
            }),
        );

        assert_eq!(query.limit, Some(CANDIDATE_MAX_MESSAGES as i64));
        assert!(query.prepend_transform.is_some());
    }

    #[test]
    fn test_filter_provider_allows_small_public_chat_window() {
        let mut query = MessageQuery::new(SessionId::new());
        let provider = InfinityContextFilterProvider;
        provider.apply_filters(
            &mut query,
            &json!({
                "context_budget_tokens": 10_000,
                "min_recent_messages": 10,
                "max_recent_messages": 30
            }),
        );

        assert_eq!(query.limit, Some(30));
        assert!(query.prepend_transform.is_some());
    }

    #[test]
    fn test_filter_provider_falls_back_to_defaults_for_invalid_config() {
        let mut query = MessageQuery::new(SessionId::new());
        let provider = InfinityContextFilterProvider;
        provider.apply_filters(
            &mut query,
            &json!({"context_budget_tokens": "not-a-number"}),
        );

        assert_eq!(query.limit, Some(1_600));
        assert!(query.prepend_transform.is_some());
    }

    #[test]
    fn test_filter_provider_trims_loaded_messages_by_token_budget() {
        let provider = InfinityContextFilterProvider;
        let mut messages = vec![
            Message::user("the original task"),
            Message::assistant("old ".repeat(400)),
            Message::user("recent one"),
            Message::assistant("recent two"),
        ];

        provider.post_load(
            &mut messages,
            &json!({"context_budget_tokens": 1, "min_recent_messages": 2}),
        );

        // Layout: [task anchor] -> [N hidden notice] -> [recent window].
        // The original task survives even under a 1-token budget.
        assert_eq!(messages.len(), 4);
        assert_eq!(extract_text_content(&messages[0]), "the original task");
        assert!(
            extract_text_content(&messages[1])
                .contains("1 earlier messages are NOT visible in this context")
        );
        assert_eq!(extract_text_content(&messages[2]), "recent one");
        assert_eq!(extract_text_content(&messages[3]), "recent two");
    }

    #[test]
    fn test_filter_provider_applies_hard_cap_after_loading() {
        let provider = InfinityContextFilterProvider;
        let mut messages = vec![
            Message::user("one"),
            Message::assistant("two"),
            Message::user("three"),
            Message::assistant("four"),
            Message::user("five"),
        ];

        provider.post_load(
            &mut messages,
            &json!({
                "context_budget_tokens": 10_000,
                "min_recent_messages": 10,
                "max_recent_messages": 2
            }),
        );

        // Hard cap keeps 2 recent; the head anchor ("one") is additional to it.
        assert_eq!(messages.len(), 4);
        assert_eq!(extract_text_content(&messages[0]), "one");
        assert!(
            extract_text_content(&messages[1])
                .contains("2 earlier messages are NOT visible in this context")
        );
        assert_eq!(extract_text_content(&messages[2]), "four");
        assert_eq!(extract_text_content(&messages[3]), "five");
    }

    #[test]
    fn test_filter_provider_anchors_task_through_full_flow() {
        let provider = InfinityContextFilterProvider;
        // Budget large enough that the candidate load fetches every message, but
        // small enough that the big middle turns are dropped by token budget.
        let config = json!({
            "context_budget_tokens": 600,
            "min_recent_messages": 2
        });
        let mut query = MessageQuery::new(SessionId::new());
        provider.apply_filters(&mut query, &config);
        let mut messages = vec![
            Message::user("TASK: build the widget"),
            Message::assistant("X".repeat(2000)),
            Message::assistant("Y".repeat(2000)),
            Message::user("recent a"),
            Message::assistant("recent b"),
        ];

        query.apply_windowing(&mut messages);
        provider.post_load(&mut messages, &config);

        // The original task is anchored at the front, the notice follows it, and
        // the recent tail is intact — the model still knows what it is doing.
        assert_eq!(extract_text_content(&messages[0]), "TASK: build the widget");
        assert!(
            extract_text_content(&messages[1])
                .contains("earlier messages are NOT visible in this context")
        );
        assert_eq!(extract_text_content(messages.last().unwrap()), "recent b");
        // The huge first assistant turn was dropped from the middle.
        assert!(
            !messages
                .iter()
                .any(|m| extract_text_content(m).starts_with("XXX"))
        );
    }

    #[test]
    fn test_filter_provider_defers_eviction_to_compaction() {
        let provider = InfinityContextFilterProvider;
        let mut messages = vec![
            Message::user("task"),
            Message::assistant("old ".repeat(400)),
            Message::user("recent one"),
            Message::assistant("recent two"),
        ];

        provider.post_load(
            &mut messages,
            &json!({
                "context_budget_tokens": 1,
                "min_recent_messages": 2,
                "compaction_active": true
            }),
        );

        // Compaction owns reduction: infinity context neither trims nor injects a
        // notice when compaction is active.
        assert_eq!(messages.len(), 4);
        assert!(
            messages
                .iter()
                .all(|m| !extract_text_content(m).contains("NOT visible"))
        );
    }

    #[test]
    fn test_filter_provider_keep_first_messages_anchors_multiple() {
        let provider = InfinityContextFilterProvider;
        let mut messages = vec![
            Message::user("anchor one"),
            Message::user("anchor two"),
            Message::assistant("mid ".repeat(400)),
            Message::user("recent"),
        ];

        provider.post_load(
            &mut messages,
            &json!({
                "context_budget_tokens": 1,
                "min_recent_messages": 1,
                "keep_first_messages": 2
            }),
        );

        assert_eq!(extract_text_content(&messages[0]), "anchor one");
        assert_eq!(extract_text_content(&messages[1]), "anchor two");
        assert!(extract_text_content(&messages[2]).contains("NOT visible"));
        assert_eq!(extract_text_content(messages.last().unwrap()), "recent");
    }

    #[test]
    fn test_estimate_json_value_len_matches_serialized_length() {
        let value = json!({
            "stdout": ["alpha", "beta"],
            "ok": true,
            "count": 2
        });

        assert_eq!(
            estimate_json_value_len(&value),
            serde_json::to_string(&value).unwrap().len()
        );
    }

    #[test]
    fn test_query_history_requires_context() {
        let tool = QueryHistoryTool;
        assert!(tool.requires_context());
    }

    #[tokio::test]
    async fn test_query_history_tool_errors_without_retriever() {
        let tool = QueryHistoryTool;
        let result = tool
            .execute_with_context(json!({"query": "api"}), &ToolContext::new(SessionId::new()))
            .await;

        match result {
            ToolExecutionResult::ToolError(message) => {
                assert!(message.contains("No message retriever available"));
            }
            other => panic!("expected tool error, got {other:?}"),
        }
    }

    #[tokio::test]
    async fn test_query_history_tool_rejects_invalid_params() {
        let result = QueryHistoryTool.execute(json!({"limit": "oops"})).await;

        match result {
            ToolExecutionResult::ToolError(message) => {
                assert!(message.contains("requires session context"));
            }
            other => panic!("expected tool error, got {other:?}"),
        }

        let session_id = SessionId::new();
        let retriever = InMemoryMessageRetriever::new();
        let result = QueryHistoryTool
            .execute_with_context(
                json!({"message_range": {"from": "bad", "to": 1}}),
                &ToolContext::new(session_id).with_message_retriever(Arc::new(retriever)),
            )
            .await;

        match result {
            ToolExecutionResult::ToolError(message) => {
                assert!(message.contains("Invalid parameters"));
            }
            other => panic!("expected tool error, got {other:?}"),
        }
    }

    #[tokio::test]
    async fn test_query_history_tool_empty_history() {
        let session_id = SessionId::new();
        let retriever = InMemoryMessageRetriever::new();

        let result = QueryHistoryTool
            .execute_with_context(
                json!({}),
                &ToolContext::new(session_id).with_message_retriever(Arc::new(retriever)),
            )
            .await;

        match result {
            ToolExecutionResult::Success(value) => {
                assert_eq!(value["count"], 0);
                assert_eq!(value["message"], "No history available.");
            }
            other => panic!("expected success, got {other:?}"),
        }
    }

    #[tokio::test]
    async fn test_query_history_tool_searches_history() {
        let session_id = SessionId::new();
        let retriever = InMemoryMessageRetriever::new();
        retriever
            .seed(
                session_id,
                vec![
                    Message::user("First topic"),
                    Message::assistant("The API key is abc123"),
                    Message::user("We should keep discussing logging"),
                ],
            )
            .await;

        let result = QueryHistoryTool
            .execute_with_context(
                json!({"query": "api key"}),
                &ToolContext::new(session_id).with_message_retriever(Arc::new(retriever)),
            )
            .await;

        match result {
            ToolExecutionResult::Success(value) => {
                assert_eq!(value["count"], 1);
                assert_eq!(value["messages"][0]["content"], "The API key is abc123");
            }
            other => panic!("expected success, got {other:?}"),
        }
    }

    #[tokio::test]
    async fn test_query_history_tool_search_no_match() {
        let session_id = SessionId::new();
        let retriever = InMemoryMessageRetriever::new();
        retriever
            .seed(
                session_id,
                vec![Message::user("one"), Message::assistant("two")],
            )
            .await;

        let result = QueryHistoryTool
            .execute_with_context(
                json!({"query": "missing"}),
                &ToolContext::new(session_id).with_message_retriever(Arc::new(retriever)),
            )
            .await;

        match result {
            ToolExecutionResult::Success(value) => {
                assert_eq!(value["count"], 0);
                assert_eq!(value["message"], "No matching messages found.");
            }
            other => panic!("expected success, got {other:?}"),
        }
    }

    #[tokio::test]
    async fn test_query_history_tool_reads_range() {
        let session_id = SessionId::new();
        let retriever = InMemoryMessageRetriever::new();
        retriever
            .seed(
                session_id,
                vec![
                    Message::user("one"),
                    Message::assistant("two"),
                    Message::user("three"),
                ],
            )
            .await;

        let result = QueryHistoryTool
            .execute_with_context(
                json!({"message_range": {"from": 1, "to": 3}, "limit": 10}),
                &ToolContext::new(session_id).with_message_retriever(Arc::new(retriever)),
            )
            .await;

        match result {
            ToolExecutionResult::Success(value) => {
                assert_eq!(value["count"], 2);
                assert_eq!(value["messages"][0]["content"], "two");
                assert_eq!(value["messages"][1]["content"], "three");
            }
            other => panic!("expected success, got {other:?}"),
        }
    }

    #[tokio::test]
    async fn test_query_history_tool_clamps_out_of_bounds_range() {
        let session_id = SessionId::new();
        let retriever = InMemoryMessageRetriever::new();
        retriever
            .seed(
                session_id,
                vec![
                    Message::user("one"),
                    Message::assistant("two"),
                    Message::user("three"),
                ],
            )
            .await;

        let result = QueryHistoryTool
            .execute_with_context(
                json!({"message_range": {"from": 99, "to": 100}}),
                &ToolContext::new(session_id).with_message_retriever(Arc::new(retriever)),
            )
            .await;

        match result {
            ToolExecutionResult::Success(value) => {
                assert_eq!(value["count"], 0);
                assert_eq!(value["message"], "No messages in the requested range.");
            }
            other => panic!("expected success, got {other:?}"),
        }
    }

    #[test]
    fn test_truncate_content_is_utf8_safe() {
        let truncated = truncate_content("hello🙂world", 6);
        assert_eq!(truncated, "hello🙂...");
    }

    #[test]
    fn trim_preserves_locally_unmatched_tool_result_for_stateful_responses() {
        use crate::tool_types::ToolCall;

        let provider = InfinityContextFilterProvider;
        // min_recent_messages=3 keeps the last 3 messages. With a 1-token budget the
        // two older messages are dropped, including the assistant tool call. The
        // OpenAI Responses path may still have that call in previous_response_id
        // state, so InfinityContext must not drop the tool output before provider
        // serialization decides whether stateful continuation is active.
        let mut messages = vec![
            Message::user("old question"),
            Message::assistant_with_tools(
                "calling tool",
                vec![ToolCall {
                    id: "call_old".to_string(),
                    name: "edit_file".to_string(),
                    arguments: serde_json::json!({}),
                }],
            ),
            // This tool result is in the min-recent window but its call is trimmed away.
            Message::tool_result("call_old", Some(serde_json::json!("done")), None),
            Message::user("new question"),
            Message::assistant("answer"),
        ];

        provider.post_load(
            &mut messages,
            &serde_json::json!({"context_budget_tokens": 1, "min_recent_messages": 3}),
        );

        assert!(
            messages.iter().any(|m| m.role == MessageRole::ToolResult),
            "locally unmatched tool result must be preserved until provider serialization"
        );
    }

    #[test]
    fn trim_keeps_tool_result_when_tool_call_is_visible() {
        use crate::tool_types::ToolCall;

        let provider = InfinityContextFilterProvider;
        // All 3 messages fit in the window: no orphan expected.
        let mut messages = vec![
            Message::assistant_with_tools(
                "calling tool",
                vec![ToolCall {
                    id: "call_1".to_string(),
                    name: "read_file".to_string(),
                    arguments: serde_json::json!({}),
                }],
            ),
            Message::tool_result("call_1", Some(serde_json::json!("content")), None),
            Message::user("thanks"),
        ];

        provider.post_load(
            &mut messages,
            &serde_json::json!({"context_budget_tokens": 100_000, "min_recent_messages": 10}),
        );

        assert!(
            messages.iter().any(|m| m.role == MessageRole::ToolResult),
            "tool result must be kept when its tool call is visible"
        );
    }
}