ai_tokenopt 0.5.10

Adaptive token optimization engine for LLM inference pipelines — compresses prompts, conversation history, tool schemas, and output streams to minimize token usage while preserving response quality.
Documentation
//! Criterion benchmark suite for the `ai_tokenopt` crate.
//!
//! Benchmarks are grouped into five functional areas:
//!
//! - `token_estimation` — heuristic estimator throughput across text types
//! - `budget_allocation` — budget calculator with varying component counts
//! - `tool_compression` — tool schema compression throughput
//! - `history_compaction` — full lossless → extractive compaction pipeline
//! - `full_pipeline` — end-to-end optimization latency for realistic convos

// Benchmarks are allowed to use `expect` for infallible setup; panics are acceptable
// in bench harnesses as they indicate broken test data, not production bugs.
#![allow(clippy::expect_used)]

use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main};

use ai_tokenopt::budget::TokenBudget;
use ai_tokenopt::config::TokenOptimizationConfig;
use ai_tokenopt::estimator::TokenEstimator;
use ai_tokenopt::optimizer::TokenOptimizer;
use ai_tokenopt::types::{Conversation, ToolDefinition, ToolParameters};

// ─── Helpers ────────────────────────────────────────────────────────────────

fn default_config() -> TokenOptimizationConfig {
    TokenOptimizationConfig::default()
}

fn build_conversation(message_count: usize) -> Conversation {
    let mut conv = Conversation::with_system_prompt(
        "You are a helpful AI assistant. Answer questions concisely and accurately.",
    );
    for i in 0..message_count {
        conv.add_user_message(format!(
            "Message {i}: What is the capital of country number {i}? Provide a brief explanation."
        ));
        conv.add_assistant_message(format!(
            "Response {i}: The capital of country {i} is City{i}. It is an important cultural \
             and economic centre with a history dating back several centuries."
        ));
    }
    conv
}

fn build_tools(count: usize) -> Vec<ToolDefinition> {
    (0..count)
        .map(|i| ToolDefinition {
            name: format!("tool_{i}"),
            description: format!(
                "This tool performs operation number {i}. It accepts various parameters and \
                 returns a structured result. Commonly used in data processing pipelines."
            ),
            parameters: ToolParameters {
                schema_type: "object".to_string(),
                properties: std::collections::HashMap::new(),
                required: Vec::new(),
            },
            icon: None,
        })
        .collect()
}

// ─── Token Estimation ───────────────────────────────────────────────────────

fn bench_token_estimation(c: &mut Criterion) {
    let mut group = c.benchmark_group("token_estimation");

    let texts: &[(&str, &str)] = &[
        ("short_ascii", "Hello, world!"),
        (
            "medium_english",
            "The quick brown fox jumps over the lazy dog. This is a typical English sentence \
             used for testing purposes.",
        ),
        (
            "long_english",
            &"The quick brown fox jumps over the lazy dog. ".repeat(50),
        ),
        (
            "german_text",
            "Der schnelle braune Fuchs springt über den faulen Hund. Dieses ist ein typischer \
             deutscher Satz für Testzwecke mit Umlauten wie ä, ö, ü und ß.",
        ),
        (
            "json_content",
            r#"{"status": "success", "data": {"items": [1, 2, 3], "total": 3, "page": 1}}"#,
        ),
    ];

    for (label, text) in texts {
        let byte_count = text.len() as u64;
        group.throughput(Throughput::Bytes(byte_count));
        group.bench_with_input(BenchmarkId::new("heuristic", label), text, |b, text| {
            b.iter(|| TokenEstimator::estimate_tokens(std::hint::black_box(text)));
        });
    }

    group.finish();
}

// ─── Budget Allocation ──────────────────────────────────────────────────────

fn bench_budget_allocation(c: &mut Criterion) {
    let mut group = c.benchmark_group("budget_allocation");
    let config = default_config();
    let budget = TokenBudget::new(&config);

    for msg_count in [5, 20, 50, 200] {
        let conv = build_conversation(msg_count);

        group.bench_with_input(
            BenchmarkId::new("allocate_no_tools", msg_count),
            &conv,
            |b, conv| {
                b.iter(|| {
                    let estimate =
                        TokenEstimator::estimate_conversation(std::hint::black_box(conv));
                    budget.allocate(&estimate, false, 0)
                });
            },
        );

        let _tools = build_tools(8);
        group.bench_with_input(
            BenchmarkId::new("allocate_with_tools", msg_count),
            &conv,
            |b, conv| {
                b.iter(|| {
                    let estimate =
                        TokenEstimator::estimate_conversation(std::hint::black_box(conv));
                    budget.allocate(&estimate, true, 8)
                });
            },
        );
    }

    group.finish();
}

// ─── Tool Compression ───────────────────────────────────────────────────────

fn bench_tool_compression(c: &mut Criterion) {
    let mut group = c.benchmark_group("tool_compression");
    let optimizer = TokenOptimizer::new(default_config());
    let context = "I need tool_3 and tool_7 to analyse the data and generate a report.";

    for tool_count in [5, 10, 20, 50] {
        let tools = build_tools(tool_count);

        group.throughput(Throughput::Elements(tool_count as u64));
        group.bench_with_input(
            BenchmarkId::new("semantic_selection", tool_count),
            &tools,
            |b, tools| {
                b.iter(|| {
                    optimizer
                        .optimize_tools(std::hint::black_box(context), std::hint::black_box(tools))
                });
            },
        );
    }

    group.finish();
}

// ─── History Compaction ─────────────────────────────────────────────────────

fn bench_history_compaction(c: &mut Criterion) {
    let mut group = c.benchmark_group("history_compaction");
    // Use a tight compaction trigger so even small conversations are compacted
    let mut config = default_config();
    config.compaction_trigger_ratio = 0.0001; // always trigger

    let rt = tokio::runtime::Runtime::new().expect("create tokio runtime");
    let optimizer = TokenOptimizer::new(config);

    for msg_count in [10, 50, 200] {
        let conv = build_conversation(msg_count);

        group.bench_with_input(
            BenchmarkId::new("compaction_without_llm", msg_count),
            &conv,
            |b, conv| {
                b.iter(|| {
                    let mut c = std::hint::black_box(conv.clone());
                    rt.block_on(async { optimizer.optimize_conversation(&mut c, None).await })
                });
            },
        );
    }

    group.finish();
}

// ─── Full Pipeline ───────────────────────────────────────────────────────────

fn bench_full_pipeline(c: &mut Criterion) {
    let mut group = c.benchmark_group("full_pipeline");
    let optimizer = TokenOptimizer::new(default_config());
    let rt = tokio::runtime::Runtime::new().expect("create tokio runtime");

    for msg_count in [5, 20, 50] {
        let base_conv = build_conversation(msg_count);
        let tools = build_tools(8);

        group.bench_with_input(
            BenchmarkId::new("optimize_conversation", msg_count),
            &base_conv,
            |b, conv| {
                b.iter(|| {
                    let mut c = std::hint::black_box(conv.clone());
                    rt.block_on(async { optimizer.optimize_conversation(&mut c, None).await })
                });
            },
        );

        group.bench_with_input(
            BenchmarkId::new("optimize_with_tools", msg_count),
            &base_conv,
            |b, conv| {
                b.iter(|| {
                    let mut c = std::hint::black_box(conv.clone());
                    rt.block_on(async {
                        optimizer
                            .optimize_conversation_with_tools(&mut c, &tools, None)
                            .await
                    })
                });
            },
        );
    }

    group.finish();
}

// ─── Harness ────────────────────────────────────────────────────────────────

criterion_group!(
    benches,
    bench_token_estimation,
    bench_budget_allocation,
    bench_tool_compression,
    bench_history_compaction,
    bench_full_pipeline,
);
criterion_main!(benches);