ai_tokenopt 0.5.6

Adaptive token optimization engine for LLM inference pipelines — compresses prompts, conversation history, tool schemas, and output streams to minimize token usage while preserving response quality.
Documentation
//! Port definitions for the token optimization engine.
//!
//! The [`SummarizationPort`] trait provides LLM-based text summarization
//! used as a fallback in tier-3 compaction. Implement this trait to connect
//! your own LLM backend.
//!
//! When the `pisovereign` feature is enabled, `InferencePortSummarizer`
//! adapts PiSovereign's `InferencePort` to this trait automatically.
//!
//! # Example (standalone)
//!
//! ```rust,ignore
//! use ai_tokenopt::ports::SummarizationPort;
//! use ai_tokenopt::TokenOptError;
//!
//! struct MyLlm;
//!
//! #[async_trait::async_trait]
//! impl SummarizationPort for MyLlm {
//!     async fn summarize(
//!         &self,
//!         system_prompt: &str,
//!         text: &str,
//!     ) -> Result<String, TokenOptError> {
//!         // Call your LLM backend here
//!         todo!()
//!     }
//! }
//! ```

use async_trait::async_trait;

use crate::error::TokenOptError;

/// Minimal trait for LLM-based text summarization.
///
/// The token optimizer uses this as a tier-3 compaction fallback when
/// heuristic (extractive) summarization is insufficient. Implementing
/// this trait allows the optimizer to request LLM-generated summaries
/// of pruned conversation history.
///
/// If no `SummarizationPort` is provided, the optimizer uses tier-2
/// extractive summarization only — no LLM calls are made.
#[async_trait]
pub trait SummarizationPort: Send + Sync {
    /// Generate a summary of the given text using an LLM.
    ///
    /// # Arguments
    ///
    /// * `system_prompt` — Instructions for the LLM (e.g. "Summarize concisely")
    /// * `text` — The text to summarize
    ///
    /// # Errors
    ///
    /// Returns [`TokenOptError`] if the LLM call fails.
    async fn summarize(&self, system_prompt: &str, text: &str) -> Result<String, TokenOptError>;
}

/// Port for querying model metadata (context window size, etc.).
///
/// The adapter implementation (e.g. calling Ollama `/api/show`) lives
/// outside of `ai_tokenopt`. Provide an implementation when constructing
/// the `TokenOptimizer` or `Pipeline`.
#[async_trait]
pub trait ModelInfoPort: Send + Sync {
    /// Retrieve information about a model.
    ///
    /// # Errors
    ///
    /// Returns [`TokenOptError`] if the model info cannot be fetched.
    async fn get_model_info(&self, model: &str)
    -> Result<crate::profile::ModelInfo, TokenOptError>;
}

/// Adapter wrapping PiSovereign's `InferencePort` as a [`SummarizationPort`].
///
/// Created automatically by the
/// [`TokenOptimizedInferencePort`](crate::decorator::TokenOptimizedInferencePort)
/// decorator. Not needed for standalone usage.
#[cfg(feature = "pisovereign")]
pub struct InferencePortSummarizer<'a>(pub &'a dyn application::ports::InferencePort);

#[cfg(feature = "pisovereign")]
impl std::fmt::Debug for InferencePortSummarizer<'_> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("InferencePortSummarizer")
            .finish_non_exhaustive()
    }
}

#[cfg(feature = "pisovereign")]
#[async_trait]
impl SummarizationPort for InferencePortSummarizer<'_> {
    async fn summarize(&self, system_prompt: &str, text: &str) -> Result<String, TokenOptError> {
        let result = self
            .0
            .generate_with_system(system_prompt, text)
            .await
            .map_err(|e| TokenOptError::InferenceError(e.to_string()))?;
        Ok(result.content)
    }
}