ai_tokenopt 0.5.7

Adaptive token optimization engine for LLM inference pipelines — compresses prompts, conversation history, tool schemas, and output streams to minimize token usage while preserving response quality.
Documentation
//! Output token budget recommendation based on query complexity.
//!
//! Combines query complexity classification with model constraints
//! to produce a concrete `max_tokens` recommendation for inference requests.

use serde::{Deserialize, Serialize};

use crate::output::complexity::{QueryComplexity, classify_query};

/// Recommended output token budget for an inference request.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OutputBudget {
    /// Recommended `max_tokens` value for the LLM request
    pub recommended_max_tokens: u32,
    /// The detected query complexity tier
    pub complexity: QueryComplexity,
}

/// Compute an output budget for a user query.
///
/// Classifies the query and maps the complexity tier to a concrete
/// `max_tokens` value, capped by the model's maximum output capability.
#[must_use]
pub fn compute_output_budget(query: &str, model_max_tokens: u32) -> OutputBudget {
    let complexity = classify_query(query);
    let recommended = complexity.max_tokens(model_max_tokens);
    OutputBudget {
        recommended_max_tokens: recommended,
        complexity,
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn simple_question_gets_small_budget() {
        let budget = compute_output_budget("Is water wet?", 2048);
        assert_eq!(budget.complexity, QueryComplexity::VeryShort);
        assert_eq!(budget.recommended_max_tokens, 64);
    }

    #[test]
    fn code_request_gets_full_budget() {
        let budget = compute_output_budget("Write code to parse JSON in Rust", 4096);
        assert_eq!(budget.complexity, QueryComplexity::Unlimited);
        assert_eq!(budget.recommended_max_tokens, 4096);
    }

    #[test]
    fn explanation_gets_medium_budget() {
        let budget = compute_output_budget("Explain how TCP works", 2048);
        assert_eq!(budget.complexity, QueryComplexity::Medium);
        assert_eq!(budget.recommended_max_tokens, 512);
    }

    #[test]
    fn factual_lookup_gets_short_budget() {
        let budget = compute_output_budget("Who is Marie Curie?", 2048);
        assert_eq!(budget.complexity, QueryComplexity::Short);
        assert_eq!(budget.recommended_max_tokens, 256);
    }

    #[test]
    fn complex_analysis_gets_long_budget() {
        let budget = compute_output_budget("Analyze the economic impact in detail", 2048);
        assert_eq!(budget.complexity, QueryComplexity::Long);
        assert_eq!(budget.recommended_max_tokens, 1024);
    }
}