ai_tokenopt 0.5.9

Adaptive token optimization engine for LLM inference pipelines — compresses prompts, conversation history, tool schemas, and output streams to minimize token usage while preserving response quality.
Documentation
//! Tool schema compressor
//!
//! Reduces the token cost of tool definitions by truncating verbose
//! descriptions while preserving the essential information the LLM
//! needs to select and invoke tools correctly.

use crate::types::{ParameterProperty, ToolDefinition, ToolParameters};

/// Compress tool definitions to reduce token usage.
///
/// Strategy:
/// - Truncate description to first sentence
/// - Shorten parameter descriptions
/// - Preserve name, type, required, and enum constraints unchanged
#[must_use]
pub fn compress_tool_definitions(tools: &[ToolDefinition]) -> Vec<ToolDefinition> {
    tools.iter().map(compress_single_tool).collect()
}

/// Compress a single tool definition.
fn compress_single_tool(tool: &ToolDefinition) -> ToolDefinition {
    ToolDefinition {
        name: tool.name.clone(),
        description: first_sentence(&tool.description),
        parameters: compress_parameters(&tool.parameters),
        icon: tool.icon.clone(),
    }
}

/// Compress tool parameters by shortening descriptions.
fn compress_parameters(params: &ToolParameters) -> ToolParameters {
    let properties = params
        .properties
        .iter()
        .map(|(name, prop)| {
            let compressed = ParameterProperty {
                param_type: prop.param_type.clone(),
                description: first_sentence(&prop.description),
                enum_values: prop.enum_values.clone(),
            };
            (name.clone(), compressed)
        })
        .collect();

    ToolParameters {
        schema_type: params.schema_type.clone(),
        properties,
        required: params.required.clone(),
    }
}

/// Extract the first sentence from text.
///
/// Returns up to the first period, question mark, or exclamation mark
/// followed by a space or end of string. Falls back to the first 80
/// characters if no sentence boundary is found.
fn first_sentence(text: &str) -> String {
    if text.is_empty() {
        return String::new();
    }

    // Look for sentence-ending punctuation followed by space or end
    for (i, ch) in text.char_indices() {
        if (ch == '.' || ch == '!' || ch == '?') && i > 0 {
            let next_idx = i + ch.len_utf8();
            if next_idx >= text.len() || text[next_idx..].starts_with(char::is_whitespace) {
                return text[..next_idx].to_string();
            }
        }
    }

    // No sentence boundary found — truncate at 80 chars
    if text.len() > 80 {
        let truncated: String = text.chars().take(77).collect();
        format!("{truncated}...")
    } else {
        text.to_string()
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::collections::HashMap;

    fn make_tool(name: &str, desc: &str) -> ToolDefinition {
        ToolDefinition {
            name: name.to_string(),
            description: desc.to_string(),
            parameters: ToolParameters {
                schema_type: "object".to_string(),
                properties: HashMap::from([(
                    "query".to_string(),
                    ParameterProperty {
                        param_type: "string".to_string(),
                        description: "The search query. Must be a non-empty string. \
                                      Supports advanced search operators like AND, OR."
                            .to_string(),
                        enum_values: vec![],
                    },
                )]),
                required: vec!["query".to_string()],
            },
            icon: None,
        }
    }

    #[test]
    fn first_sentence_extraction() {
        assert_eq!(
            first_sentence("Search the web. Returns results from Brave API."),
            "Search the web."
        );
    }

    #[test]
    fn first_sentence_no_period() {
        assert_eq!(first_sentence("Short description"), "Short description");
    }

    #[test]
    fn first_sentence_long_no_period() {
        let long = "A".repeat(100);
        let result = first_sentence(&long);
        assert!(result.len() <= 80);
        assert!(result.ends_with("..."));
    }

    #[test]
    fn compress_shortens_description() {
        let tool = make_tool(
            "search_web",
            "Search the web using Brave API. Returns top results with snippets and URLs.",
        );
        let compressed = compress_single_tool(&tool);
        assert_eq!(compressed.description, "Search the web using Brave API.");
        assert_eq!(compressed.name, "search_web");
    }

    #[test]
    fn compress_shortens_parameter_descriptions() {
        let tool = make_tool("test", "Test tool.");
        let compressed = compress_single_tool(&tool);
        let param = &compressed.parameters.properties["query"];
        assert_eq!(param.description, "The search query.");
    }

    #[test]
    fn compress_preserves_required_and_types() {
        let tool = make_tool("test", "Test tool.");
        let compressed = compress_single_tool(&tool);
        assert_eq!(compressed.parameters.required, vec!["query".to_string()]);
        assert_eq!(
            compressed.parameters.properties["query"].param_type,
            "string"
        );
    }

    #[test]
    fn compress_round_trip_preserves_name() {
        let tools = vec![
            make_tool("get_weather", "Get weather. More details here."),
            make_tool("search", "Search. Additional info."),
        ];
        let compressed = compress_tool_definitions(&tools);
        assert_eq!(compressed.len(), 2);
        assert_eq!(compressed[0].name, "get_weather");
        assert_eq!(compressed[1].name, "search");
    }
}