ai_tokenopt 0.5.10

Adaptive token optimization engine for LLM inference pipelines — compresses prompts, conversation history, tool schemas, and output streams to minimize token usage while preserving response quality.
Documentation
//! Integration tests for `TokenOptimizationConfig` serialization and loading.
//!
//! Verifies that the configuration round-trips through TOML and JSON,
//! that defaults are sensible, and that partial configs merge correctly.

use ai_tokenopt::config::TokenOptimizationConfig;

// ============================================================================
// Default values
// ============================================================================

#[test]
fn default_config_has_sensible_values() {
    let config = TokenOptimizationConfig::default();
    assert!(config.enabled);
    assert_eq!(config.context_window_tokens, 8192);
    assert!((config.response_headroom_ratio - 0.25).abs() < f32::EPSILON);
    assert!((config.compaction_trigger_ratio - 0.70).abs() < f32::EPSILON);
    assert_eq!(config.max_summary_tokens, 256);
    assert!((config.system_prompt_budget_ratio - 0.15).abs() < f32::EPSILON);
    assert!((config.rag_budget_ratio - 0.15).abs() < f32::EPSILON);
    assert!(config.repetition_detection_enabled);
    assert_eq!(config.repetition_ngram_size, 3);
    assert!((config.repetition_threshold - 0.3).abs() < f32::EPSILON);
    assert_eq!(config.max_tools_per_request, 8);
}

// ============================================================================
// TOML serialization round-trip
// ============================================================================

#[test]
fn config_toml_round_trip() {
    let original = TokenOptimizationConfig {
        enabled: true,
        context_window_tokens: 16384,
        response_headroom_ratio: 0.30,
        compaction_trigger_ratio: 0.60,
        max_summary_tokens: 512,
        system_prompt_budget_ratio: 0.20,
        rag_budget_ratio: 0.10,
        repetition_detection_enabled: false,
        repetition_ngram_size: 4,
        repetition_threshold: 0.5,
        max_tools_per_request: 12,
        tokenizer_model: None,
        output_max_tokens: None,
        frequency_penalty: None,
        presence_penalty: None,
        progressive_tool_compression: true,
        conciseness_pressure_threshold: 0.7,
        tool_result_max_tokens: 100,
        max_history_tokens: None,
        max_profile_prompt_tokens: 300,
        prompt_template_dir: None,
    };

    let toml_str = toml::to_string(&original).expect("TOML serialization should succeed");
    let deserialized: TokenOptimizationConfig =
        toml::from_str(&toml_str).expect("TOML deserialization should succeed");

    assert_eq!(deserialized.enabled, original.enabled);
    assert_eq!(
        deserialized.context_window_tokens,
        original.context_window_tokens
    );
    assert!(
        (deserialized.response_headroom_ratio - original.response_headroom_ratio).abs()
            < f32::EPSILON
    );
    assert!(
        (deserialized.compaction_trigger_ratio - original.compaction_trigger_ratio).abs()
            < f32::EPSILON
    );
    assert_eq!(deserialized.max_summary_tokens, original.max_summary_tokens);
    assert!(
        (deserialized.system_prompt_budget_ratio - original.system_prompt_budget_ratio).abs()
            < f32::EPSILON
    );
    assert!((deserialized.rag_budget_ratio - original.rag_budget_ratio).abs() < f32::EPSILON);
    assert_eq!(
        deserialized.repetition_detection_enabled,
        original.repetition_detection_enabled
    );
    assert_eq!(
        deserialized.repetition_ngram_size,
        original.repetition_ngram_size
    );
    assert!(
        (deserialized.repetition_threshold - original.repetition_threshold).abs() < f32::EPSILON
    );
    assert_eq!(
        deserialized.max_tools_per_request,
        original.max_tools_per_request
    );
}

#[test]
fn partial_toml_uses_defaults_for_missing_fields() {
    let toml_str = r"
enabled = false
context_window_tokens = 4096
";

    let config: TokenOptimizationConfig =
        toml::from_str(toml_str).expect("partial TOML should parse");

    assert!(!config.enabled);
    assert_eq!(config.context_window_tokens, 4096);
    // Remaining fields should be defaults
    assert!((config.response_headroom_ratio - 0.25).abs() < f32::EPSILON);
    assert_eq!(config.max_summary_tokens, 256);
    assert_eq!(config.max_tools_per_request, 8);
}

#[test]
fn empty_toml_gives_all_defaults() {
    let config: TokenOptimizationConfig =
        toml::from_str("").expect("empty TOML should parse with defaults");

    let default = TokenOptimizationConfig::default();
    assert_eq!(config.enabled, default.enabled);
    assert_eq!(config.context_window_tokens, default.context_window_tokens);
    assert_eq!(config.max_tools_per_request, default.max_tools_per_request);
}

// ============================================================================
// JSON serialization round-trip
// ============================================================================

#[test]
fn config_json_round_trip() {
    let original = TokenOptimizationConfig::default();
    let json = serde_json::to_string(&original).expect("JSON serialization should succeed");
    let deserialized: TokenOptimizationConfig =
        serde_json::from_str(&json).expect("JSON deserialization should succeed");

    assert_eq!(deserialized.enabled, original.enabled);
    assert_eq!(
        deserialized.context_window_tokens,
        original.context_window_tokens
    );
}

#[test]
fn config_from_example_toml_section() {
    let toml_str = r"
enabled = true
context_window_tokens = 8192
response_headroom_ratio = 0.25
compaction_trigger_ratio = 0.70
max_summary_tokens = 256
system_prompt_budget_ratio = 0.15
rag_budget_ratio = 0.15
repetition_detection_enabled = true
repetition_ngram_size = 3
repetition_threshold = 0.3
max_tools_per_request = 8
";

    let config: TokenOptimizationConfig =
        toml::from_str(toml_str).expect("example config should parse");

    assert!(config.enabled);
    assert_eq!(config.context_window_tokens, 8192);
    assert!((config.response_headroom_ratio - 0.25).abs() < f32::EPSILON);
    assert!((config.compaction_trigger_ratio - 0.70).abs() < f32::EPSILON);
    assert_eq!(config.max_summary_tokens, 256);
}

// ============================================================================
// Edge cases
// ============================================================================

#[test]
fn config_with_zero_context_window() {
    let config = TokenOptimizationConfig {
        context_window_tokens: 0,
        ..TokenOptimizationConfig::default()
    };

    // Should not panic — the budget allocator handles zero gracefully
    let optimizer = ai_tokenopt::TokenOptimizer::new(config);
    assert!(optimizer.is_enabled());
}

#[test]
fn config_with_extreme_ratios() {
    let config = TokenOptimizationConfig {
        response_headroom_ratio: 0.99,
        compaction_trigger_ratio: 0.01,
        system_prompt_budget_ratio: 0.5,
        rag_budget_ratio: 0.5,
        ..TokenOptimizationConfig::default()
    };

    // Should not panic even with extreme ratios
    let optimizer = ai_tokenopt::TokenOptimizer::new(config);
    assert!(optimizer.is_enabled());
}

#[test]
fn disabled_config_reflects_in_optimizer() {
    let config = TokenOptimizationConfig {
        enabled: false,
        ..TokenOptimizationConfig::default()
    };

    let optimizer = ai_tokenopt::TokenOptimizer::new(config);
    assert!(!optimizer.is_enabled());
}