ai_tokenopt 0.5.5

Adaptive token optimization engine for LLM inference pipelines — compresses prompts, conversation history, tool schemas, and output streams to minimize token usage while preserving response quality.
Documentation
//! Type definitions for the token optimization engine.
//!
//! When the `pisovereign` feature is enabled, these are zero-cost re-exports
//! from the PiSovereign `domain` crate. In standalone mode, minimal
//! implementations are provided that expose only the API surface required
//! by the optimizer.
//!
//! # Standalone usage
//!
//! ```rust
//! use ai_tokenopt::types::{Conversation, ChatMessage, MessageRole};
//!
//! let mut conv = Conversation::with_system_prompt("You are helpful.");
//! conv.add_user_message("Hello!");
//! conv.add_assistant_message("Hi there!");
//! ```

// ---------------------------------------------------------------------------
// PiSovereign mode — re-export domain types directly (zero-cost)
// ---------------------------------------------------------------------------

#[cfg(feature = "pisovereign")]
pub use domain::Conversation;

#[cfg(feature = "pisovereign")]
pub use domain::entities::{ChatMessage, MessageRole};

#[cfg(feature = "pisovereign")]
pub use domain::entities::{ParameterProperty, ToolDefinition, ToolParameters};

// ---------------------------------------------------------------------------
// Standalone mode — minimal type definitions
// ---------------------------------------------------------------------------

#[cfg(not(feature = "pisovereign"))]
mod standalone {
    use std::collections::HashMap;

    /// A role in a conversation message.
    #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
    pub enum MessageRole {
        /// A message from the user.
        User,
        /// A response from the assistant.
        Assistant,
        /// A system-level instruction.
        System,
        /// Output from a tool invocation.
        Tool,
    }

    /// A single message in a conversation.
    #[derive(Debug, Clone)]
    pub struct ChatMessage {
        /// The role of the message sender.
        pub role: MessageRole,
        /// The text content of the message.
        pub content: String,
    }

    impl ChatMessage {
        /// Create a user message.
        #[must_use]
        pub fn user(content: impl Into<String>) -> Self {
            Self {
                role: MessageRole::User,
                content: content.into(),
            }
        }

        /// Create an assistant message.
        #[must_use]
        pub fn assistant(content: impl Into<String>) -> Self {
            Self {
                role: MessageRole::Assistant,
                content: content.into(),
            }
        }

        /// Create a system message.
        #[must_use]
        pub fn system(content: impl Into<String>) -> Self {
            Self {
                role: MessageRole::System,
                content: content.into(),
            }
        }

        /// Create a tool output message.
        #[must_use]
        pub fn tool(content: impl Into<String>) -> Self {
            Self {
                role: MessageRole::Tool,
                content: content.into(),
            }
        }
    }

    /// A conversation with messages and optional context.
    ///
    /// This is the minimal representation needed by the optimizer.
    /// Only the fields relevant to token optimization are included.
    #[derive(Debug, Clone, Default)]
    pub struct Conversation {
        /// Optional system prompt for the conversation.
        pub system_prompt: Option<String>,
        /// Messages in the conversation (oldest first).
        pub messages: Vec<ChatMessage>,
        /// Rolling summary of compacted conversation history.
        pub summary: Option<String>,
        /// Cached token count of the (possibly structured) system prompt.
        ///
        /// Set after the first system prompt optimization pass so that
        /// subsequent turns can skip re-estimation.
        pub cached_prompt_tokens: Option<u32>,
    }

    impl Conversation {
        /// Create an empty conversation.
        #[must_use]
        pub fn new() -> Self {
            Self::default()
        }

        /// Create a conversation with a system prompt.
        #[must_use]
        pub fn with_system_prompt(prompt: impl Into<String>) -> Self {
            Self {
                system_prompt: Some(prompt.into()),
                ..Self::default()
            }
        }

        /// Append a user message.
        pub fn add_user_message(&mut self, content: impl Into<String>) {
            self.messages.push(ChatMessage::user(content));
        }

        /// Append an assistant message.
        pub fn add_assistant_message(&mut self, content: impl Into<String>) {
            self.messages.push(ChatMessage::assistant(content));
        }
    }

    /// A tool the LLM can invoke during a conversation.
    #[derive(Debug, Clone)]
    pub struct ToolDefinition {
        /// Unique tool name (e.g. `"get_weather"`).
        pub name: String,
        /// Human-readable description of what the tool does.
        pub description: String,
        /// JSON Schema describing the expected input parameters.
        pub parameters: ToolParameters,
        /// Optional emoji icon for UI display.
        pub icon: Option<String>,
    }

    /// JSON Schema for tool parameters.
    #[derive(Debug, Clone)]
    pub struct ToolParameters {
        /// Always `"object"` for tool parameters.
        pub schema_type: String,
        /// Map of parameter name to property definition.
        pub properties: HashMap<String, ParameterProperty>,
        /// List of required parameter names.
        pub required: Vec<String>,
    }

    /// A single parameter property within a tool's JSON Schema.
    #[derive(Debug, Clone)]
    pub struct ParameterProperty {
        /// JSON Schema type (e.g. `"string"`, `"integer"`).
        pub param_type: String,
        /// Human-readable description of the parameter.
        pub description: String,
        /// Optional enum constraint values.
        pub enum_values: Vec<String>,
    }
}

#[cfg(not(feature = "pisovereign"))]
pub use standalone::*;

// ---------------------------------------------------------------------------
// Shared types available regardless of feature flags
// ---------------------------------------------------------------------------

/// Result of a text-in/text-out optimization pass.
///
/// Returned by [`TokenOptimizer::optimize_prompt`](crate::optimizer::TokenOptimizer::optimize_prompt)
/// and [`Pipeline::optimize_text`](crate::pipeline::Pipeline::optimize_text).
#[derive(Debug, Clone)]
pub struct OptimizedPrompt {
    /// The optimized text.
    pub text: String,
    /// Recommended `max_tokens` for the LLM response, if available.
    pub recommended_max_tokens: Option<u32>,
    /// Estimated tokens in the optimized text.
    pub tokens_estimated: u32,
    /// Names of strategies that were applied.
    pub strategies_applied: Vec<String>,
    /// Detailed metrics about the optimization.
    pub metadata: OptimizationMetadata,
}

/// Metadata about a token optimization pass.
#[derive(Debug, Clone)]
pub struct OptimizationMetadata {
    /// Estimated tokens before optimization.
    pub tokens_before: u32,
    /// Estimated tokens after optimization.
    pub tokens_after: u32,
    /// Token reduction ratio (0.0 = no reduction, 1.0 = full removal).
    pub reduction_ratio: f32,
    /// Query complexity classification, if output budgeting was applied.
    pub complexity: Option<crate::output::complexity::QueryComplexity>,
}