ai_tokenopt 0.5.7

Adaptive token optimization engine for LLM inference pipelines — compresses prompts, conversation history, tool schemas, and output streams to minimize token usage while preserving response quality.
Documentation
//! Extractive summarizer for conversation messages
//!
//! Generates concise summaries by extracting key sentences from pruned
//! messages. No LLM call required — uses heuristic scoring.

use crate::types::{ChatMessage, MessageRole};
use unicode_segmentation::UnicodeSegmentation;

use crate::estimator::TokenEstimator;

/// Extractive summarizer that selects key sentences from messages.
#[derive(Debug)]
pub struct ExtractiveSummarizer;

impl ExtractiveSummarizer {
    /// Summarize a sequence of messages into a compact text.
    ///
    /// Extracts the most informative sentences, scored by position
    /// (recency), information density, and message role importance.
    /// Stops adding sentences once the token budget is reached.
    #[must_use]
    pub fn summarize(messages: &[ChatMessage], max_tokens: u32) -> String {
        if messages.is_empty() {
            return String::new();
        }

        let mut scored_sentences: Vec<ScoredSentence> = Vec::new();

        for (msg_idx, msg) in messages.iter().enumerate() {
            let sentences = extract_sentences(&msg.content);
            let total_sentences = sentences.len();

            for (sent_idx, sentence) in sentences.into_iter().enumerate() {
                let score =
                    compute_sentence_score(&sentence, msg.role, msg_idx, sent_idx, total_sentences);
                scored_sentences.push(ScoredSentence {
                    text: sentence,
                    score,
                });
            }
        }

        // Sort by score descending
        scored_sentences.sort_by(|a, b| {
            b.score
                .partial_cmp(&a.score)
                .unwrap_or(std::cmp::Ordering::Equal)
        });

        // Greedily pick sentences until budget is exhausted
        let mut summary_parts: Vec<String> = Vec::new();
        let mut tokens_used: u32 = 0;

        for scored in &scored_sentences {
            let sentence_tokens = TokenEstimator::estimate_tokens(&scored.text);
            if tokens_used + sentence_tokens > max_tokens {
                break;
            }
            summary_parts.push(scored.text.clone());
            tokens_used += sentence_tokens;
        }

        if summary_parts.is_empty() {
            // Fallback: take the first sentence of the last user message
            return messages
                .iter()
                .rev()
                .find(|m| m.role == MessageRole::User)
                .and_then(|m| extract_sentences(&m.content).into_iter().next())
                .map(|s| truncate_to_tokens(&s, max_tokens))
                .unwrap_or_default();
        }

        summary_parts.join(" ")
    }
}

/// A sentence with its importance score.
struct ScoredSentence {
    text: String,
    score: f64,
}

/// Extract sentences from text using unicode segmentation.
fn extract_sentences(text: &str) -> Vec<String> {
    text.unicode_sentences()
        .map(|s| s.trim().to_string())
        .filter(|s| !s.is_empty())
        .collect()
}

/// Score a sentence by multiple heuristic factors.
fn compute_sentence_score(
    sentence: &str,
    role: MessageRole,
    msg_position: usize,
    sent_position: usize,
    total_sentences: usize,
) -> f64 {
    let mut score = 0.0;

    // 1. Role weight: user questions and assistant answers are more important
    score += match role {
        MessageRole::User => 1.5,
        MessageRole::Assistant => 1.2,
        MessageRole::System => 0.5,
        MessageRole::Tool => 0.8,
    };

    // 2. Recency: later messages in the conversation are more valuable
    // msg_position is 0-based index; higher = more recent
    #[allow(clippy::cast_precision_loss)]
    {
        score += (msg_position as f64) * 0.1;
    }

    // 3. Position within message: first and last sentences carry more info
    if sent_position == 0 {
        score += 0.8;
    } else if total_sentences > 1 && sent_position == total_sentences - 1 {
        score += 0.4;
    }

    // 4. Information density: sentences with numbers, proper nouns, or
    //    specific data are more informative
    let density = information_density(sentence);
    score += density;

    // 5. Length penalty: very short sentences (< 10 chars) are likely
    //    not informative; very long sentences get diminishing returns
    let len = sentence.len();
    if len < 10 {
        score -= 0.5;
    } else if len > 200 {
        score -= 0.2;
    }

    score
}

/// Estimate the information density of a sentence.
///
/// Higher density = more specific/factual content (numbers, capitalized
/// words that might be proper nouns, special characters indicating data).
fn information_density(sentence: &str) -> f64 {
    let words: Vec<&str> = sentence.split_whitespace().collect();
    if words.is_empty() {
        return 0.0;
    }

    #[allow(clippy::cast_precision_loss)]
    let word_count = words.len() as f64;
    let mut density = 0.0;

    for word in &words {
        // Numbers and dates are high-information
        if word.chars().any(|c| c.is_ascii_digit()) {
            density += 1.0;
        }
        // Capitalized words (potential proper nouns, not sentence start)
        if word.len() > 1
            && word.starts_with(|c: char| c.is_uppercase())
            && word.chars().skip(1).any(char::is_lowercase)
        {
            density += 0.5;
        }
    }

    // Normalize by word count
    (density / word_count).min(2.0)
}

/// Truncate text to fit within a token budget.
fn truncate_to_tokens(text: &str, max_tokens: u32) -> String {
    // Approximate: 4 chars per token
    let max_chars = (max_tokens as usize) * 4;
    if text.len() <= max_chars {
        text.to_string()
    } else {
        let truncated: String = text.chars().take(max_chars.saturating_sub(3)).collect();
        format!("{truncated}...")
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn empty_messages_produce_empty_summary() {
        let result = ExtractiveSummarizer::summarize(&[], 100);
        assert!(result.is_empty());
    }

    #[test]
    fn single_message_extracts_sentence() {
        let msgs = vec![ChatMessage::user("What is the capital of France?")];
        let result = ExtractiveSummarizer::summarize(&msgs, 100);
        assert!(!result.is_empty());
        assert!(result.contains("France"));
    }

    #[test]
    fn respects_token_budget() {
        let msgs = vec![
            ChatMessage::user("Tell me about quantum computing and its applications."),
            ChatMessage::assistant(
                "Quantum computing uses qubits. It has applications in cryptography. \
                 Drug discovery benefits greatly. Machine learning is another area.",
            ),
        ];
        // Very tight budget
        let result = ExtractiveSummarizer::summarize(&msgs, 10);
        let tokens = TokenEstimator::estimate_tokens(&result);
        assert!(tokens <= 15, "tokens {tokens} exceeds budget"); // Small grace margin
    }

    #[test]
    fn prefers_user_messages() {
        let msgs = vec![
            ChatMessage::system("You are helpful."),
            ChatMessage::user("What is 2 + 2?"),
            ChatMessage::assistant("The answer is 4."),
        ];
        let result = ExtractiveSummarizer::summarize(&msgs, 50);
        // Should prefer user/assistant content over system prompt
        assert!(
            result.contains("2 + 2") || result.contains("answer"),
            "Expected user/assistant content in summary, got: {result}"
        );
    }

    #[test]
    fn information_density_scores_numbers_higher() {
        let plain = information_density("This is a simple sentence");
        let numeric = information_density("The temperature is 22 degrees on March 15");
        assert!(numeric > plain);
    }

    #[test]
    fn extract_sentences_handles_empty() {
        assert!(extract_sentences("").is_empty());
    }
}