car-memgine 0.15.0

//! Configuration for the memgine engine.

/// Context assembly mode — controls which layers are included.
///
/// `Full` runs all layers including embedding-based scoring, skill lookup,
/// LLM summarization, and known-unknowns extraction.
///
/// `Fast` skips expensive operations for latency-sensitive paths (voice,
/// real-time). Keeps: identity, constraints, recent conversation (no
/// embedding flush), environment. Skips: skill lookup, embedding-based
/// fact scoring, known-unknowns extraction.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum ContextMode {
    #[default]
    Full,
    Fast,
}

#[derive(Debug, Clone, Copy)]
pub struct LayerBudget {
    pub fraction: f64,
}

impl LayerBudget {
    pub fn tokens(&self, total: usize) -> usize {
        (total as f64 * self.fraction) as usize
    }
}

#[derive(Debug, Clone, Copy)]
pub struct CompactionThresholds {
    pub soft: f64,
    pub hard: f64,
}

impl Default for CompactionThresholds {
    fn default() -> Self {
        Self {
            soft: 0.70,
            hard: 0.95,
        }
    }
}

#[derive(Debug, Clone)]
pub struct MemgineConfig {
    /// Fixed token budget for context assembly (used when no model context window is provided).
    pub token_budget: usize,
    pub layer1_budget: LayerBudget,
    pub layer2_budget: LayerBudget,
    pub layer3_budget: LayerBudget,
    pub layer4_budget: LayerBudget,
    pub thresholds: CompactionThresholds,
    pub working_set_max: usize,
    pub working_set_keep_recent: usize,
    pub environment_max: usize,
    /// Maximum skills to include in context (SkillRL K parameter, default 6).
    pub max_skills_in_context: usize,
    /// Evolution threshold — trigger evolution when domain success rate drops below this.
    pub evolution_threshold: f64,
    /// Budget weight multiplier for code facts (denser content, needs more tokens).
    pub code_budget_weight: f64,
    /// Budget weight multiplier for structured data facts (compressible key-value).
    pub structured_budget_weight: f64,
    /// Number of most-recent conversation turns to always keep verbatim during compaction.
    pub conversation_keep_recent: usize,
    /// Maximum turns to group per summary batch during compaction.
    pub compaction_batch_size: usize,
    /// Number of conversation turns between speculative compaction runs.
    /// After every N turns, background summaries are pre-computed so they're
    /// ready when context fills up. Set to 0 to disable. Default: 10.
    pub speculative_compaction_interval: usize,
    /// Tokens reserved for the model's response output (default 4096).
    pub response_reservation: usize,
    /// Fraction of remaining context window to use for context assembly (default 0.40).
    /// Only used when a model context window is provided to `effective_budget()`.
    pub context_budget_fraction: f64,
}

impl Default for MemgineConfig {
    fn default() -> Self {
        Self {
            token_budget: 8000,
            layer1_budget: LayerBudget { fraction: 0.05 },
            layer2_budget: LayerBudget { fraction: 0.50 },
            layer3_budget: LayerBudget { fraction: 0.30 },
            layer4_budget: LayerBudget { fraction: 0.15 },
            thresholds: CompactionThresholds::default(),
            working_set_max: 10,
            working_set_keep_recent: 3,
            environment_max: 5,
            max_skills_in_context: 6,
            evolution_threshold: 0.6,
            code_budget_weight: 1.5,
            structured_budget_weight: 0.8,
            conversation_keep_recent: 6,
            compaction_batch_size: 8,
            speculative_compaction_interval: 10,
            response_reservation: 4096,
            context_budget_fraction: 0.40,
        }
    }
}

impl MemgineConfig {
    pub fn layer_tokens(&self, layer: u8) -> usize {
        match layer {
            1 => self.layer1_budget.tokens(self.token_budget),
            2 => self.layer2_budget.tokens(self.token_budget),
            3 => self.layer3_budget.tokens(self.token_budget),
            4 => self.layer4_budget.tokens(self.token_budget),
            _ => 0,
        }
    }

    /// Compute the effective token budget for context assembly.
    ///
    /// When `model_context_window` is provided, dynamically sizes the budget:
    ///   budget = (context_window - response_reservation) * context_budget_fraction
    /// Clamped to a minimum of 2000 tokens to remain useful.
    ///
    /// When `None`, falls back to the fixed `token_budget` (default 8000).
    pub fn effective_budget(&self, model_context_window: Option<usize>) -> usize {
        match model_context_window {
            Some(ctx_window) if ctx_window > 0 => {
                let remaining = ctx_window.saturating_sub(self.response_reservation);
                let dynamic = (remaining as f64 * self.context_budget_fraction) as usize;
                // Clamp: at least 2000 tokens, at most the remaining window
                dynamic.max(2000).min(remaining)
            }
            _ => self.token_budget,
        }
    }

    pub fn validate(&self) -> Result<(), String> {
        let total = self.layer1_budget.fraction
            + self.layer2_budget.fraction
            + self.layer3_budget.fraction
            + self.layer4_budget.fraction;
        if (total - 1.0).abs() > 0.01 {
            return Err(format!("Layer fractions must sum to 1.0, got {}", total));
        }
        if self.thresholds.soft >= self.thresholds.hard {
            return Err("Soft threshold must be < hard threshold".to_string());
        }
        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn effective_budget_none_uses_fixed() {
        let config = MemgineConfig::default();
        assert_eq!(config.effective_budget(None), 8000);
    }

    #[test]
    fn effective_budget_large_model() {
        let config = MemgineConfig::default();
        // GPT-5.2: 272,000 context window
        // (272000 - 4096) * 0.40 = 107,161
        let budget = config.effective_budget(Some(272_000));
        assert_eq!(budget, 107_161);
    }

    #[test]
    fn effective_budget_small_model() {
        let config = MemgineConfig::default();
        // 8K model: (8000 - 4096) * 0.40 = 1561, clamped to 2000
        let budget = config.effective_budget(Some(8_000));
        assert_eq!(budget, 2000);
    }

    #[test]
    fn effective_budget_zero_window() {
        let config = MemgineConfig::default();
        assert_eq!(config.effective_budget(Some(0)), 8000);
    }

    #[test]
    fn effective_budget_medium_model() {
        let config = MemgineConfig::default();
        // 128K model: (128000 - 4096) * 0.40 = 49,561
        let budget = config.effective_budget(Some(128_000));
        assert_eq!(budget, 49_561);
    }
}