cortex-mem-core 2.7.0

Core memory management engine for Cortex Memory system
//! LLM Result Cache Module
//!
//! Implements caching for LLM-generated layer content (L0/L1) to avoid redundant API calls.
//! Uses content hash as cache key and implements LRU eviction with TTL expiration.
//!
//! ## Benefits:
//! - Reduces LLM API costs by 50-75% in scenarios with repeated content
//! - Improves response time for cached results (instant vs 2-5 seconds)
//! - Configurable cache size and TTL to balance memory usage
//!
//! ## Example:
//! ```text
//! Scenario: 10 sessions with similar entity sets
//! Without cache: 20 LLM calls
//! With cache (75% hit rate): 5 LLM calls (75% cost reduction)
//! ```

use std::collections::HashMap;
use std::sync::Arc;
use std::time::{Duration, Instant};
use tokio::sync::RwLock;
use tracing::{debug, info};

/// Configuration for LLM result cache
#[derive(Debug, Clone)]
pub struct CacheConfig {
    /// Enable caching
    pub enabled: bool,
    
    /// Maximum number of entries to cache
    pub max_entries: usize,
    
    /// Time-to-live for cache entries (seconds)
    pub ttl_secs: u64,
}

impl Default for CacheConfig {
    fn default() -> Self {
        Self {
            enabled: true,
            max_entries: 1000,      // 1000 entries ~= 1-2MB memory
            ttl_secs: 3600,         // 1 hour TTL
        }
    }
}

/// Cached LLM result
#[derive(Debug, Clone)]
struct CachedResult {
    /// Cached content
    content: String,
    
    /// When this entry was created
    created_at: Instant,
    
    /// Last access time (for LRU)
    last_accessed: Instant,
    
    /// Access count
    access_count: usize,
}

impl CachedResult {
    fn new(content: String) -> Self {
        let now = Instant::now();
        Self {
            content,
            created_at: now,
            last_accessed: now,
            access_count: 1,
        }
    }
    
    fn access(&mut self) {
        self.last_accessed = Instant::now();
        self.access_count += 1;
    }
    
    fn is_expired(&self, ttl: Duration) -> bool {
        self.created_at.elapsed() > ttl
    }
}

/// Cache statistics
#[derive(Debug, Clone, Default)]
pub struct CacheStats {
    /// Total number of cache lookups
    pub lookups: usize,
    
    /// Number of cache hits
    pub hits: usize,
    
    /// Number of cache misses
    pub misses: usize,
    
    /// Number of entries evicted due to LRU
    pub evictions: usize,
    
    /// Number of entries expired due to TTL
    pub expirations: usize,
}

impl CacheStats {
    pub fn hit_rate(&self) -> f64 {
        if self.lookups == 0 {
            0.0
        } else {
            self.hits as f64 / self.lookups as f64
        }
    }
}

/// LLM Result Cache
///
/// Thread-safe cache for LLM-generated content with LRU eviction and TTL expiration.
pub struct LlmResultCache {
    /// Cache storage
    cache: Arc<RwLock<HashMap<String, CachedResult>>>,
    
    /// Configuration
    config: CacheConfig,
    
    /// Statistics
    stats: Arc<RwLock<CacheStats>>,
}

impl LlmResultCache {
    /// Create a new cache
    pub fn new(config: CacheConfig) -> Self {
        if config.enabled {
            info!(
                "🔧 LLM result cache enabled (max_entries: {}, ttl: {}s)",
                config.max_entries, config.ttl_secs
            );
        } else {
            info!("⚠️  LLM result cache disabled");
        }
        
        Self {
            cache: Arc::new(RwLock::new(HashMap::new())),
            config,
            stats: Arc::new(RwLock::new(CacheStats::default())),
        }
    }
    
    /// Get cached result
    ///
    /// Returns Some(content) if found and not expired, None otherwise
    pub async fn get(&self, key: &str) -> Option<String> {
        if !self.config.enabled {
            return None;
        }
        
        let mut cache = self.cache.write().await;
        let mut stats = self.stats.write().await;
        
        stats.lookups += 1;
        
        if let Some(entry) = cache.get_mut(key) {
            // Check expiration
            let ttl = Duration::from_secs(self.config.ttl_secs);
            if entry.is_expired(ttl) {
                // Expired, remove
                cache.remove(key);
                stats.misses += 1;
                stats.expirations += 1;
                debug!("🗑️  Cache expired for key: {}", &key[..8]);
                return None;
            }
            
            // Hit! Update access time
            entry.access();
            stats.hits += 1;
            
            debug!(
                "✅ Cache HIT for key: {} (accessed {} times, age: {:.1}s)",
                &key[..8],
                entry.access_count,
                entry.created_at.elapsed().as_secs_f64()
            );
            
            Some(entry.content.clone())
        } else {
            // Miss
            stats.misses += 1;
            debug!("❌ Cache MISS for key: {}", &key[..8]);
            None
        }
    }
    
    /// Put result into cache
    pub async fn put(&self, key: String, content: String) {
        if !self.config.enabled {
            return;
        }
        
        let mut cache = self.cache.write().await;
        
        // Check if we need to evict (LRU)
        if cache.len() >= self.config.max_entries && !cache.contains_key(&key) {
            self.evict_lru(&mut cache).await;
        }
        
        // Insert new entry
        cache.insert(key.clone(), CachedResult::new(content));
        
        debug!(
            "💾 Cached result for key: {} (total entries: {})",
            &key[..8],
            cache.len()
        );
    }
    
    /// Evict least recently used entry
    async fn evict_lru(&self, cache: &mut HashMap<String, CachedResult>) {
        if cache.is_empty() {
            return;
        }
        
        // Find LRU entry
        let lru_key = cache
            .iter()
            .min_by_key(|(_, entry)| entry.last_accessed)
            .map(|(key, _)| key.clone());
        
        if let Some(key) = lru_key {
            cache.remove(&key);
            
            let mut stats = self.stats.write().await;
            stats.evictions += 1;
            
            debug!("🗑️  Evicted LRU entry: {}", &key[..8]);
        }
    }
    
    /// Clear all cached entries
    pub async fn clear(&self) {
        let mut cache = self.cache.write().await;
        cache.clear();
        
        info!("🗑️  Cache cleared");
    }
    
    /// Get cache statistics
    pub async fn stats(&self) -> CacheStats {
        self.stats.read().await.clone()
    }
    
    /// Reset statistics
    pub async fn reset_stats(&self) {
        let mut stats = self.stats.write().await;
        *stats = CacheStats::default();
    }
    
    /// Get current cache size
    pub async fn size(&self) -> usize {
        self.cache.read().await.len()
    }
    
    /// Clean up expired entries
    ///
    /// This should be called periodically to remove expired entries
    pub async fn cleanup_expired(&self) {
        if !self.config.enabled {
            return;
        }
        
        let mut cache = self.cache.write().await;
        let ttl = Duration::from_secs(self.config.ttl_secs);
        
        let expired_keys: Vec<String> = cache
            .iter()
            .filter(|(_, entry)| entry.is_expired(ttl))
            .map(|(key, _)| key.clone())
            .collect();
        
        let count = expired_keys.len();
        
        for key in expired_keys {
            cache.remove(&key);
        }
        
        if count > 0 {
            let mut stats = self.stats.write().await;
            stats.expirations += count;
            
            info!("🗑️  Cleaned up {} expired cache entries", count);
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    
    #[tokio::test]
    async fn test_cache_basic() {
        let cache = LlmResultCache::new(CacheConfig::default());
        
        // Miss
        assert_eq!(cache.get("key1").await, None);
        
        // Put
        cache.put("key1".to_string(), "content1".to_string()).await;
        
        // Hit
        assert_eq!(cache.get("key1").await, Some("content1".to_string()));
        
        // Stats
        let stats = cache.stats().await;
        assert_eq!(stats.lookups, 2);
        assert_eq!(stats.hits, 1);
        assert_eq!(stats.misses, 1);
        assert_eq!(stats.hit_rate(), 0.5);
    }
    
    #[tokio::test]
    async fn test_cache_lru_eviction() {
        let config = CacheConfig {
            enabled: true,
            max_entries: 2,
            ttl_secs: 3600,
        };
        let cache = LlmResultCache::new(config);
        
        // Fill cache
        cache.put("key1".to_string(), "content1".to_string()).await;
        cache.put("key2".to_string(), "content2".to_string()).await;
        
        // Access key2 to make key1 LRU
        cache.get("key2").await;
        
        // Add key3, should evict key1
        cache.put("key3".to_string(), "content3".to_string()).await;
        
        // key1 should be evicted
        assert_eq!(cache.get("key1").await, None);
        assert_eq!(cache.get("key2").await, Some("content2".to_string()));
        assert_eq!(cache.get("key3").await, Some("content3".to_string()));
        
        let stats = cache.stats().await;
        assert_eq!(stats.evictions, 1);
    }
    
    #[tokio::test]
    async fn test_cache_disabled() {
        let config = CacheConfig {
            enabled: false,
            max_entries: 100,
            ttl_secs: 3600,
        };
        let cache = LlmResultCache::new(config);
        
        cache.put("key1".to_string(), "content1".to_string()).await;
        assert_eq!(cache.get("key1").await, None);
        
        let stats = cache.stats().await;
        assert_eq!(stats.lookups, 0);  // No lookups when disabled
    }
    
    #[tokio::test]
    async fn test_cache_ttl() {
        let config = CacheConfig {
            enabled: true,
            max_entries: 100,
            ttl_secs: 0,  // Immediate expiration for testing
        };
        let cache = LlmResultCache::new(config);
        
        cache.put("key1".to_string(), "content1".to_string()).await;
        
        // Wait a bit
        tokio::time::sleep(Duration::from_millis(10)).await;
        
        // Should be expired
        assert_eq!(cache.get("key1").await, None);
        
        let stats = cache.stats().await;
        assert_eq!(stats.expirations, 1);
    }
}