ai_tokenopt 0.5.10

//! Integration tests for advanced token optimization strategies.
//!
//! Validates the full pipeline: dedup → chain collapse → structured prompts →
//! output budgeting → calibration → metrics — all working together.

use std::sync::Arc;

use ai_tokenopt::config::TokenOptimizationConfig;
use ai_tokenopt::estimator::TokenEstimator;
use ai_tokenopt::metrics::OptimizationMetrics;
use ai_tokenopt::optimizer::TokenOptimizer;
use ai_tokenopt::pipeline::Pipeline;
use ai_tokenopt::types::{ChatMessage, Conversation};

/// Helper to create tool messages regardless of feature flags.
fn tool_msg(content: &str) -> ChatMessage {
    #[cfg(feature = "pisovereign")]
    {
        ChatMessage::tool("test", content)
    }
    #[cfg(not(feature = "pisovereign"))]
    {
        ChatMessage::tool(content)
    }
}

// ---------------------------------------------------------------------------
// TokenOptimizer integration
// ---------------------------------------------------------------------------

#[tokio::test]
async fn optimize_conversation_returns_no_output_budget_by_default() {
    let config = TokenOptimizationConfig::default();
    let optimizer = TokenOptimizer::new(config);

    let mut conv = Conversation::with_system_prompt("You are helpful.");
    conv.add_user_message("What is Rust?");
    conv.add_assistant_message("Rust is a systems programming language.");
    conv.add_user_message("Is it fast?");

    let result = optimizer
        .optimize_conversation(&mut conv, None)
        .await
        .expect("should succeed");

    // No output budget when output_max_tokens is unset
    assert!(result.recommended_max_tokens.is_none());
}

#[tokio::test]
async fn optimizer_with_metrics_records_counters() {
    let metrics = Arc::new(OptimizationMetrics::new());
    let config = TokenOptimizationConfig::default();
    let optimizer = TokenOptimizer::new(config).with_metrics(Arc::clone(&metrics));

    let mut conv = Conversation::with_system_prompt("You are helpful.");
    conv.add_user_message("Hello!");

    let _result = optimizer
        .optimize_conversation(&mut conv, None)
        .await
        .expect("should succeed");

    assert_eq!(metrics.total_optimizations(), 1);
}

#[tokio::test]
async fn optimizer_with_metrics_accumulates_across_calls() {
    let metrics = Arc::new(OptimizationMetrics::new());
    let config = TokenOptimizationConfig::default();
    let optimizer = TokenOptimizer::new(config).with_metrics(Arc::clone(&metrics));

    for _ in 0..5 {
        let mut conv = Conversation::new();
        conv.add_user_message("Hi!");
        let _result = optimizer
            .optimize_conversation(&mut conv, None)
            .await
            .expect("should succeed");
    }

    assert_eq!(metrics.total_optimizations(), 5);
}

#[tokio::test]
async fn optimizer_with_calibration_accepts_observations() {
    let config = TokenOptimizationConfig::default();
    let optimizer = TokenOptimizer::new(config).with_calibration();

    // Report some observations
    optimizer.report_actual_tokens("llama3.2", 100, 110);
    optimizer.report_actual_tokens("llama3.2", 100, 115);

    // Should not panic or fail — calibrator stores correction silently
    let mut conv = Conversation::new();
    conv.add_user_message("Test");
    let result = optimizer
        .optimize_conversation(&mut conv, None)
        .await
        .expect("should succeed");
    assert!(result.estimate_after.total > 0);
}

#[tokio::test]
async fn optimize_prompt_returns_no_output_budget_by_default() {
    let config = TokenOptimizationConfig::default();
    let optimizer = TokenOptimizer::new(config);

    let result = optimizer
        .optimize_prompt("What time is it?", None)
        .await
        .expect("should succeed");

    assert!(!result.text.is_empty());
    assert!(result.recommended_max_tokens.is_none());
    assert!(result.tokens_estimated > 0);
    assert!(result.metadata.tokens_before > 0);
    assert!(result.metadata.complexity.is_some());
}

// ---------------------------------------------------------------------------
// Pipeline integration
// ---------------------------------------------------------------------------

#[tokio::test]
async fn pipeline_dedup_removes_duplicates() {
    let mut pipeline = Pipeline::default().context_window(8192).enable_dedup(true);

    let mut conv = Conversation::with_system_prompt("You are helpful.");
    conv.add_user_message("What is the weather?");
    conv.add_user_message("What is the weather?");
    conv.add_assistant_message("It's sunny!");

    let before_count = conv.messages.len();
    let _result = pipeline
        .optimize_conversation(&mut conv)
        .await
        .expect("should succeed");

    assert!(
        conv.messages.len() < before_count,
        "dedup should remove duplicate"
    );
}

#[tokio::test]
async fn pipeline_chain_collapse_reduces_tool_chains() {
    let mut pipeline = Pipeline::default()
        .context_window(8192)
        .enable_chain_collapse(true)
        .enable_dedup(false);

    let mut conv = Conversation::new();
    conv.messages.push(ChatMessage::user("Check things"));
    conv.messages.push(tool_msg("Result from tool A: 42"));
    conv.messages
        .push(tool_msg("Result from tool B: hello world"));
    conv.messages.push(tool_msg("Result from tool C: done"));
    conv.messages.push(ChatMessage::assistant("All done."));

    let before_count = conv.messages.len();
    let _result = pipeline
        .optimize_conversation(&mut conv)
        .await
        .expect("should succeed");

    assert!(
        conv.messages.len() <= before_count,
        "chain collapse should reduce or keep message count"
    );
}

#[tokio::test]
async fn pipeline_structured_prompts_apply_filler_stripping() {
    let pipeline = Pipeline::default()
        .context_window(8192)
        .enable_structured_prompts(true);

    let verbose_prompt = "You are a helpful assistant.\n\n\
        Please note that you should always respond concisely. \
        It is important to remember that accuracy matters. \
        Make sure to provide relevant information. \
        Please be aware that the user expects clear answers.";

    let result = pipeline
        .optimize_text(verbose_prompt, "Hello!")
        .await
        .expect("should succeed");

    // Structured conversion should have processed the prompt
    assert!(!result.optimized_prompt.is_empty());
}

#[tokio::test]
async fn pipeline_output_budget_none_by_default() {
    let pipeline = Pipeline::default().context_window(8192);
    let max = pipeline.recommended_max_tokens("What is 2+2?");
    assert!(max.is_none(), "no output budget by default");
}

#[tokio::test]
async fn pipeline_output_budget_honours_explicit_cap() {
    let pipeline = Pipeline::default()
        .context_window(8192)
        .output_max_tokens(Some(256));
    let max = pipeline.recommended_max_tokens("Write a Python function to sort a list");
    assert_eq!(max, Some(256));
}

#[tokio::test]
async fn pipeline_all_v2_features_together() {
    let mut pipeline = Pipeline::default()
        .context_window(8192)
        .enable_dedup(true)
        .enable_structured_prompts(true)
        .enable_chain_collapse(true)
        .enable_output_budget(true);

    let mut conv = Conversation::with_system_prompt(
        "You are a helpful assistant. Please note that you should be concise.",
    );
    // Duplicate user messages
    conv.add_user_message("What is Rust?");
    conv.add_user_message("What is Rust?");
    conv.add_assistant_message("Rust is a programming language.");
    // Tool chain
    conv.messages.push(tool_msg("Tool result 1: compiled"));
    conv.messages.push(tool_msg("Tool result 2: passed tests"));
    conv.add_user_message("Summarize the results");

    let original_total = TokenEstimator::estimate_conversation(&conv).total;

    let result = pipeline
        .optimize_conversation(&mut conv)
        .await
        .expect("should succeed");

    let after_total = result.estimate_after.total;
    // Combined strategies should produce some reduction
    assert!(
        after_total <= original_total,
        "combined v2 strategies should not increase tokens: before={original_total}, after={after_total}"
    );
}

// ---------------------------------------------------------------------------
// Language-aware estimation integration
// ---------------------------------------------------------------------------

#[test]
fn estimator_handles_cjk_text_accurately() {
    let english = "The quick brown fox jumps over the lazy dog";
    let chinese = "快速的棕色狐狸跳过了懒狗快速的棕色狐狸跳过";

    let en_tokens = TokenEstimator::estimate_tokens(english);
    let zh_tokens = TokenEstimator::estimate_tokens(chinese);

    // CJK text should produce more tokens per byte (lower chars_per_token ratio)
    // so for similar semantic content, CJK will have different token counts
    assert!(en_tokens > 0);
    assert!(zh_tokens > 0);
}

#[test]
fn estimator_handles_mixed_script_text() {
    let mixed = "Hello 你好 Привет مرحبا World";
    let estimate = TokenEstimator::estimate_tokens(mixed);
    assert!(estimate > 0);
}

// ---------------------------------------------------------------------------
// Progressive tool compression integration
// ---------------------------------------------------------------------------

#[test]
fn optimizer_progressive_tools_strips_on_repeat() {
    use ai_tokenopt::tools::progressive::ToolUsageTracker;
    use ai_tokenopt::types::{ParameterProperty, ToolDefinition, ToolParameters};

    let mut tracker = ToolUsageTracker::new();
    let config = TokenOptimizationConfig::default();
    let optimizer = TokenOptimizer::new(config);

    let tools = vec![ToolDefinition {
        name: "get_weather".to_string(),
        description: "Get the current weather for a location".to_string(),
        parameters: ToolParameters {
            schema_type: "object".to_string(),
            properties: {
                let mut props = std::collections::HashMap::new();
                props.insert(
                    "location".to_string(),
                    ParameterProperty {
                        param_type: "string".to_string(),
                        description: "The city name to get weather for".to_string(),
                        enum_values: vec![],
                    },
                );
                props
            },
            required: vec!["location".to_string()],
        },
        icon: None,
    }];

    // First call: full definition
    let first = optimizer.optimize_tools_progressive("weather?", &tools, &tracker);
    assert!(!first.is_empty());
    let first_tokens = TokenEstimator::estimate_tool_definitions(&first);

    // Mark as seen
    tracker.mark_seen(&first);

    // Second call: should be smaller (descriptions stripped)
    let second = optimizer.optimize_tools_progressive("weather?", &tools, &tracker);
    let second_tokens = TokenEstimator::estimate_tool_definitions(&second);

    assert!(
        second_tokens < first_tokens,
        "progressive compression should reduce tokens: first={first_tokens}, second={second_tokens}"
    );
}

// ---------------------------------------------------------------------------
// PromptContext.structured_format integration
// ---------------------------------------------------------------------------

#[test]
fn prompt_context_new_defaults_structured_format_true() {
    use ai_tokenopt::prompt::system_prompt::PromptContext;

    let ctx = PromptContext::new(false, false);
    assert!(ctx.structured_format);
}

#[test]
fn structured_format_false_skips_filler_stripping() {
    use ai_tokenopt::prompt::system_prompt::{PromptContext, optimize_system_prompt};

    let prompt = "You are PiSovereign. You must not lie.\n\n\
                  Please note that you should always respond concisely.\n\n\
                  It is important to remember that accuracy matters.";

    let ctx_with = PromptContext {
        has_tools: false,
        has_rag: false,
        structured_format: true,
    };
    let ctx_without = PromptContext {
        has_tools: false,
        has_rag: false,
        structured_format: false,
    };

    let result_with = optimize_system_prompt(prompt, 50, &ctx_with);
    let result_without = optimize_system_prompt(prompt, 50, &ctx_without);

    // Both should preserve critical content
    assert!(result_with.contains("PiSovereign"));
    assert!(result_without.contains("PiSovereign"));
    // With structured_format, filler should be stripped (potentially shorter)
    assert!(result_with.len() <= result_without.len());
}

// ---------------------------------------------------------------------------
// adjust_profile integration
// ---------------------------------------------------------------------------

#[test]
fn adjust_profile_small_model_upgrades_standard() {
    use ai_tokenopt::profile::{HardwareProfile, ModelInfo, adjust_profile};

    let model = ModelInfo {
        name: "gemma2:2b".to_string(),
        context_length: 8192,
        parameter_count: Some(2_000_000_000),
    };

    assert_eq!(
        adjust_profile(HardwareProfile::Standard, &model),
        HardwareProfile::Performance,
    );
}

#[test]
fn adjust_profile_large_model_never_upgrades() {
    use ai_tokenopt::profile::{HardwareProfile, ModelInfo, adjust_profile};

    let model = ModelInfo {
        name: "llama3.1:70b".to_string(),
        context_length: 128_000,
        parameter_count: Some(70_000_000_000),
    };

    assert_eq!(
        adjust_profile(HardwareProfile::Standard, &model),
        HardwareProfile::Standard,
    );
}

// ---------------------------------------------------------------------------
// Budget pressure priority integration
// ---------------------------------------------------------------------------

#[test]
fn budget_pressure_priority_protects_system_prompt() {
    use ai_tokenopt::budget::TokenBudget;
    use ai_tokenopt::config::TokenOptimizationConfig;
    use ai_tokenopt::estimator::ConversationTokenEstimate;

    let config = TokenOptimizationConfig::default();
    let budget = TokenBudget::new(&config);

    // Drive pressure above 0.9
    let estimate = ConversationTokenEstimate {
        system_prompt: 800,
        summary: 200,
        history: 5500,
        total: 6500,
    };

    let alloc = budget.allocate_with_pressure_priority(&estimate, true, 10);

    // System prompt should get its full allocation (≤ cap)
    assert!(
        alloc.system_prompt >= 800,
        "system prompt should be protected"
    );
    // History should be most constrained — compaction required
    assert!(
        alloc.requires_compaction,
        "history compaction should be required"
    );
}

// ---------------------------------------------------------------------------
// Cached prompt tokens integration (standalone only)
// ---------------------------------------------------------------------------

#[cfg(not(feature = "pisovereign"))]
#[tokio::test]
async fn cached_prompt_tokens_set_after_optimization() {
    let config = TokenOptimizationConfig::default();
    let optimizer = TokenOptimizer::new(config);

    let mut conv = Conversation::with_system_prompt("You are helpful.");
    conv.add_user_message("Hello!");

    assert!(conv.cached_prompt_tokens.is_none());

    let _result = optimizer
        .optimize_conversation(&mut conv, None)
        .await
        .expect("should succeed");

    // After optimization, the cache should be populated
    assert!(conv.cached_prompt_tokens.is_some());
    assert!(conv.cached_prompt_tokens.expect("cached") > 0);
}

// ---------------------------------------------------------------------------
// Build-time YAML prompts
// ---------------------------------------------------------------------------

#[test]
fn yaml_prompts_const_is_available() {
    // The build.rs generates YAML_PROMPTS — it should be accessible
    let prompts: &[(&str, &str)] = ai_tokenopt::YAML_PROMPTS;
    // No .prompt.txt files exist, so the array is empty
    assert!(prompts.is_empty());
}