ai_tokenopt 0.5.10

Adaptive token optimization engine for LLM inference pipelines — compresses prompts, conversation history, tool schemas, and output streams to minimize token usage while preserving response quality.
Documentation
//! Hardware profile auto-detection and adaptive configuration.
//!
//! Detects available system resources (RAM, CPU cores) at startup and
//! selects an optimization aggressiveness profile: Minimal, Standard,
//! or Performance. Each profile defines defaults for context window
//! usage, compaction thresholds, and concurrent operations.

use serde::{Deserialize, Serialize};

/// Hardware capability tier determining optimization aggressiveness.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum HardwareProfile {
    /// Low-resource device (e.g. Raspberry Pi, < 4 GB RAM).
    /// Most aggressive optimization — smaller context windows,
    /// eager compaction, minimal tool schemas.
    Minimal,
    /// Mid-range device (4–16 GB RAM).
    /// Balanced optimization — standard context windows,
    /// moderate compaction thresholds.
    Standard,
    /// High-resource device (> 16 GB RAM).
    /// Light-touch optimization — large context windows,
    /// compaction only when approaching limits.
    Performance,
}

/// Configuration values derived from a hardware profile.
#[derive(Debug, Clone)]
pub struct ProfileConfig {
    /// Maximum percentage of context window to fill (0.0–1.0)
    pub max_context_fill: f64,
    /// Compaction triggers when usage exceeds this ratio (0.0–1.0)
    pub compaction_threshold: f64,
    /// Maximum number of tool definitions to include
    pub max_tools: usize,
    /// Whether to apply YAML structured prompt conversion
    pub structured_prompts: bool,
    /// Whether to apply progressive tool compression
    pub progressive_tools: bool,
}

impl HardwareProfile {
    /// Get the configuration for this profile.
    #[must_use]
    pub const fn config(self) -> ProfileConfig {
        match self {
            Self::Minimal => ProfileConfig {
                max_context_fill: 0.70,
                compaction_threshold: 0.50,
                max_tools: 5,
                structured_prompts: true,
                progressive_tools: true,
            },
            Self::Standard => ProfileConfig {
                max_context_fill: 0.85,
                compaction_threshold: 0.70,
                max_tools: 10,
                structured_prompts: true,
                progressive_tools: true,
            },
            Self::Performance => ProfileConfig {
                max_context_fill: 0.95,
                compaction_threshold: 0.85,
                max_tools: 20,
                structured_prompts: false,
                progressive_tools: false,
            },
        }
    }
}

/// Detect the hardware profile based on system resources.
///
/// Uses `sysinfo` to query available RAM and CPU cores. Falls back to
/// [`HardwareProfile::Standard`] if detection fails.
///
/// | RAM        | Cores | Profile     |
/// |------------|-------|-------------|
/// | < 4 GB     | any   | Minimal     |
/// | 4–16 GB    | any   | Standard    |
/// | > 16 GB    | any   | Performance |
#[must_use]
pub fn detect_profile() -> HardwareProfile {
    use sysinfo::System;

    let sys = System::new_all();
    let total_ram_gb = sys.total_memory() / (1024 * 1024 * 1024);

    if total_ram_gb < 4 {
        HardwareProfile::Minimal
    } else if total_ram_gb <= 16 {
        HardwareProfile::Standard
    } else {
        HardwareProfile::Performance
    }
}

/// Model information used for adaptive context window sizing.
#[derive(Debug, Clone)]
pub struct ModelInfo {
    /// Model name (e.g. "llama3.2:3b")
    pub name: String,
    /// Maximum context window size in tokens
    pub context_length: u32,
    /// Number of model parameters (e.g. `3_000_000_000` for a 3B model).
    ///
    /// Used by [`adjust_profile`] to decide whether the hardware can run
    /// the model comfortably.
    pub parameter_count: Option<u64>,
}

/// Adjust a hardware profile based on model characteristics.
///
/// Rules:
/// - Large model (> 7B params) on any hardware → never upgrade, keep base.
/// - Small model (≤ 3B params) on `Standard` → upgrade to `Performance`
///   (model is light enough to allow relaxed optimization).
/// - Everything else → keep base profile unchanged.
///
/// This allows the optimizer to automatically relax compression when the
/// model is lightweight relative to the hardware.
#[must_use]
pub fn adjust_profile(base: HardwareProfile, model: &ModelInfo) -> HardwareProfile {
    const THREE_BILLION: u64 = 3_000_000_000;
    const SEVEN_BILLION: u64 = 7_000_000_000;

    let Some(params) = model.parameter_count else {
        return base;
    };

    if params > SEVEN_BILLION {
        // Large model — never upgrade, hardware is under pressure
        base
    } else if params <= THREE_BILLION && base == HardwareProfile::Standard {
        // Small model on mid-range hardware — relax optimization
        HardwareProfile::Performance
    } else {
        base
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn minimal_profile_config() {
        let cfg = HardwareProfile::Minimal.config();
        assert!((cfg.max_context_fill - 0.70).abs() < f64::EPSILON);
        assert!((cfg.compaction_threshold - 0.50).abs() < f64::EPSILON);
        assert_eq!(cfg.max_tools, 5);
        assert!(cfg.structured_prompts);
        assert!(cfg.progressive_tools);
    }

    #[test]
    fn standard_profile_config() {
        let cfg = HardwareProfile::Standard.config();
        assert!((cfg.max_context_fill - 0.85).abs() < f64::EPSILON);
        assert!((cfg.compaction_threshold - 0.70).abs() < f64::EPSILON);
        assert_eq!(cfg.max_tools, 10);
    }

    #[test]
    fn performance_profile_config() {
        let cfg = HardwareProfile::Performance.config();
        assert!((cfg.max_context_fill - 0.95).abs() < f64::EPSILON);
        assert_eq!(cfg.max_tools, 20);
        assert!(!cfg.structured_prompts);
        assert!(!cfg.progressive_tools);
    }

    #[test]
    fn detect_returns_valid_profile() {
        let profile = detect_profile();
        // Just verify it returns something valid
        let _ = profile.config();
    }

    #[test]
    fn adjust_small_model_on_standard_upgrades() {
        let model = ModelInfo {
            name: "llama3.2:1b".to_string(),
            context_length: 8192,
            parameter_count: Some(1_000_000_000),
        };
        assert_eq!(
            adjust_profile(HardwareProfile::Standard, &model),
            HardwareProfile::Performance
        );
    }

    #[test]
    fn adjust_large_model_never_upgrades() {
        let model = ModelInfo {
            name: "llama3.1:70b".to_string(),
            context_length: 128_000,
            parameter_count: Some(70_000_000_000),
        };
        // Stays Minimal even though hardware could handle more
        assert_eq!(
            adjust_profile(HardwareProfile::Minimal, &model),
            HardwareProfile::Minimal
        );
        assert_eq!(
            adjust_profile(HardwareProfile::Standard, &model),
            HardwareProfile::Standard
        );
        assert_eq!(
            adjust_profile(HardwareProfile::Performance, &model),
            HardwareProfile::Performance
        );
    }

    #[test]
    fn adjust_no_param_count_keeps_base() {
        let model = ModelInfo {
            name: "custom-model".to_string(),
            context_length: 4096,
            parameter_count: None,
        };
        assert_eq!(
            adjust_profile(HardwareProfile::Minimal, &model),
            HardwareProfile::Minimal
        );
    }

    #[test]
    fn adjust_small_model_on_minimal_stays_minimal() {
        let model = ModelInfo {
            name: "llama3.2:1b".to_string(),
            context_length: 8192,
            parameter_count: Some(1_000_000_000),
        };
        // Don't upgrade Minimal — hardware is genuinely constrained
        assert_eq!(
            adjust_profile(HardwareProfile::Minimal, &model),
            HardwareProfile::Minimal
        );
    }

    #[test]
    fn adjust_mid_model_on_standard_stays() {
        let model = ModelInfo {
            name: "llama3.2:7b".to_string(),
            context_length: 8192,
            parameter_count: Some(7_000_000_000),
        };
        assert_eq!(
            adjust_profile(HardwareProfile::Standard, &model),
            HardwareProfile::Standard
        );
    }
}