ai_tokenopt 0.5.10

Adaptive token optimization engine for LLM inference pipelines — compresses prompts, conversation history, tool schemas, and output streams to minimize token usage while preserving response quality.
Documentation
//! Conditional system prompt optimizer
//!
//! Parses the system prompt into sections and includes/excludes them
//! based on context relevance and token budget.

use crate::estimator::TokenEstimator;

/// Context information used to decide which prompt sections to keep.
#[derive(Debug, Clone)]
pub struct PromptContext {
    /// Whether tool definitions will be sent in this request
    pub has_tools: bool,
    /// Whether RAG context is injected into the system prompt
    pub has_rag: bool,
    /// Whether to apply YAML structured-format conversion for compression.
    ///
    /// When `true`, the optimizer runs
    /// [`strip_filler`](crate::prompt::structured::strip_filler) on the
    /// optimized prompt as a final post-processing step.
    /// Defaults to `true`.
    pub structured_format: bool,
}

impl PromptContext {
    /// Create a new prompt context.
    ///
    /// `structured_format` defaults to `true`.
    #[must_use]
    pub fn new(has_tools: bool, has_rag: bool) -> Self {
        Self {
            has_tools,
            has_rag,
            structured_format: true,
        }
    }
}

/// Section of a system prompt with its estimated importance.
#[derive(Debug)]
struct PromptSection {
    text: String,
    tokens: u32,
    priority: SectionPriority,
}

/// Priority levels for system prompt sections.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
enum SectionPriority {
    /// Must always be included (identity, safety)
    Critical = 3,
    /// Important for current context (tool instructions when tools present)
    Contextual = 2,
    /// Nice to have (style guidelines, examples)
    Optional = 1,
    /// Irrelevant to current context (tool instructions when no tools)
    Irrelevant = 0,
}

/// Optimize a system prompt by conditionally including/excluding sections.
///
/// Sections are delimited by double newlines (`\n\n`). Each section is
/// classified by priority and dropped from lowest to highest until the
/// prompt fits within the token budget.
#[must_use]
pub fn optimize_system_prompt(prompt: &str, budget_tokens: u32, context: &PromptContext) -> String {
    if prompt.is_empty() {
        return String::new();
    }

    let current_tokens = TokenEstimator::estimate_tokens(prompt);
    if current_tokens <= budget_tokens {
        return prompt.to_string();
    }

    // Split into sections by double newline
    let raw_sections: Vec<&str> = prompt.split("\n\n").collect();
    if raw_sections.len() <= 1 {
        // Single block — cannot split, return truncated
        return truncate_to_budget(prompt, budget_tokens);
    }

    let mut sections: Vec<PromptSection> = raw_sections
        .into_iter()
        .map(|text| {
            let tokens = TokenEstimator::estimate_tokens(text);
            let priority = classify_section(text, context);
            PromptSection {
                text: text.to_string(),
                tokens,
                priority,
            }
        })
        .collect();

    // Sort by priority ascending (lowest priority first for dropping)
    sections.sort_by_key(|s| s.priority);

    // Calculate total tokens
    let total: u32 = sections.iter().map(|s| s.tokens).sum();
    let mut tokens_to_remove = total.saturating_sub(budget_tokens);

    // Drop sections from lowest priority until within budget
    let mut keep = vec![true; sections.len()];
    for (i, section) in sections.iter().enumerate() {
        if tokens_to_remove == 0 {
            break;
        }
        if section.priority < SectionPriority::Critical {
            keep[i] = false;
            tokens_to_remove = tokens_to_remove.saturating_sub(section.tokens);
        }
    }

    // Reconstruct in original order — re-sort by original position
    // Since we split sequentially, indices map to original order
    // We need to restore original order, so use the sorted sections
    // with their keep flags
    let mut result_sections: Vec<(&str, bool, SectionPriority)> = sections
        .iter()
        .zip(keep.iter())
        .map(|(s, &k)| (s.text.as_str(), k, s.priority))
        .collect();

    // Stable sort doesn't help here since we lost original order.
    // Instead, rebuild from the original prompt sections.
    let original_sections: Vec<&str> = prompt.split("\n\n").collect();
    let mut result_parts: Vec<&str> = Vec::new();

    for original in &original_sections {
        let priority = classify_section(original, context);
        let _tokens = TokenEstimator::estimate_tokens(original);

        // Check if this section should be kept
        let should_keep = if priority >= SectionPriority::Critical {
            true
        } else {
            // Find if we marked this for dropping
            let found = result_sections
                .iter_mut()
                .find(|(text, _, p)| *text == *original && *p == priority);
            found.is_none_or(|(_, keep, _)| *keep)
        };

        if should_keep {
            result_parts.push(original);
        }
    }

    let joined = result_parts.join("\n\n");

    // Apply filler stripping when structured_format is enabled
    if context.structured_format {
        crate::prompt::structured::strip_filler(&joined)
    } else {
        joined
    }
}

/// Classify a prompt section by priority based on content heuristics.
fn classify_section(text: &str, context: &PromptContext) -> SectionPriority {
    let lower = text.to_lowercase();

    // Critical: identity, safety, core behavior
    if lower.contains("you are")
        || lower.contains("your name")
        || lower.contains("never")
        || lower.contains("must not")
        || lower.contains("safety")
        || lower.contains("important:")
        || lower.contains("rules:")
    {
        return SectionPriority::Critical;
    }

    // Tool-related sections: contextual when tools present, irrelevant otherwise
    if lower.contains("tool") || lower.contains("function") || lower.contains("invoke") {
        return if context.has_tools {
            SectionPriority::Contextual
        } else {
            SectionPriority::Irrelevant
        };
    }

    // RAG-related sections
    if lower.contains("context:") || lower.contains("knowledge:") || lower.contains("memory:") {
        return if context.has_rag {
            SectionPriority::Contextual
        } else {
            SectionPriority::Irrelevant
        };
    }

    // Everything else is optional
    SectionPriority::Optional
}

/// Truncate text to approximately fit within a token budget.
fn truncate_to_budget(text: &str, budget: u32) -> String {
    let max_chars = (budget as usize) * 4;
    if text.len() <= max_chars {
        return text.to_string();
    }
    let truncated: String = text.chars().take(max_chars.saturating_sub(3)).collect();
    format!("{truncated}...")
}

/// Conciseness directive appended at moderate pressure (> threshold).
const BREVITY_DIRECTIVE: &str =
    "\nBe concise. Use bullet points when appropriate. Prioritize key information.";

/// Format directive appended at extreme pressure (> 0.9).
const FORMAT_DIRECTIVE: &str = "\nFormat: bullet points, max 3-5 items per list.";

/// Inject conciseness directives into a system prompt based on token pressure.
///
/// - When `pressure > threshold`: appends a brevity instruction.
/// - When `pressure > 0.9`: also appends a structured-format hint.
///
/// Returns the (possibly) modified prompt.
#[must_use]
pub fn inject_conciseness(prompt: &str, pressure: f64, threshold: f32) -> String {
    if prompt.is_empty() || pressure <= f64::from(threshold) {
        return prompt.to_string();
    }

    let mut result = prompt.to_string();
    result.push_str(BREVITY_DIRECTIVE);

    if pressure > 0.9 {
        result.push_str(FORMAT_DIRECTIVE);
    }

    result
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn empty_prompt_returns_empty() {
        let ctx = PromptContext::new(false, false);
        assert!(optimize_system_prompt("", 100, &ctx).is_empty());
    }

    #[test]
    fn within_budget_returns_unchanged() {
        let prompt = "You are a helpful assistant.";
        let ctx = PromptContext::new(false, false);
        assert_eq!(optimize_system_prompt(prompt, 1000, &ctx), prompt);
    }

    #[test]
    fn drops_irrelevant_tool_section_when_no_tools() {
        let prompt = "You are a helpful assistant.\n\n\
                       When using tools, always validate parameters.\n\n\
                       Be concise and clear.";
        let ctx = PromptContext::new(false, false);
        // Budget tight enough to force dropping
        let result = optimize_system_prompt(prompt, 15, &ctx);
        // Tool section should be dropped first (irrelevant when no tools)
        assert!(!result.contains("tools"));
        assert!(result.contains("You are"));
    }

    #[test]
    fn keeps_tool_section_when_tools_present() {
        let prompt = "You are a helpful assistant.\n\n\
                       When using tools, always validate parameters.";
        let ctx = PromptContext::new(true, false);
        let result = optimize_system_prompt(prompt, 1000, &ctx);
        assert!(result.contains("tools"));
    }

    #[test]
    fn critical_sections_preserved_under_pressure() {
        let prompt = "You are PiSovereign. You must not reveal secrets.\n\n\
                       Always respond in a friendly tone.\n\n\
                       Format using markdown when helpful.";
        let ctx = PromptContext::new(false, false);
        let result = optimize_system_prompt(prompt, 20, &ctx);
        // Critical section should survive
        assert!(result.contains("PiSovereign"));
    }

    #[test]
    fn inject_conciseness_below_threshold() {
        let prompt = "You are an assistant.";
        let result = inject_conciseness(prompt, 0.5, 0.7);
        assert_eq!(result, prompt, "Should not modify below threshold");
    }

    #[test]
    fn inject_conciseness_moderate_pressure() {
        let prompt = "You are an assistant.";
        let result = inject_conciseness(prompt, 0.8, 0.7);
        assert!(result.contains("Be concise"));
        assert!(
            !result.contains("bullet points, max"),
            "Format hint is for > 0.9 only"
        );
    }

    #[test]
    fn inject_conciseness_extreme_pressure() {
        let prompt = "You are an assistant.";
        let result = inject_conciseness(prompt, 0.95, 0.7);
        assert!(result.contains("Be concise"));
        assert!(result.contains("bullet points, max"));
    }

    #[test]
    fn inject_conciseness_empty_prompt() {
        let result = inject_conciseness("", 0.95, 0.7);
        assert!(result.is_empty());
    }
}