oxirs-chat 0.2.4

//! Advanced Caching System for OxiRS Chat
//!
//! Implements multi-level caching for responses, contexts, embeddings, and query results
//! with LRU eviction, TTL support, semantic caching, and intelligent cache warming strategies.

pub mod semantic; // NEW: Semantic caching

pub use semantic::{
    CacheStatistics as SemanticCacheStatistics, SemanticCache, SemanticCacheConfig,
};

use anyhow::{anyhow, Result};
use serde::{Deserialize, Serialize};
use std::{
    collections::{HashMap, VecDeque},
    hash::{DefaultHasher, Hash, Hasher},
    sync::Arc,
    time::{Duration, SystemTime},
};
use tokio::sync::RwLock;
use tracing::{debug, info, warn};

use crate::{llm::LLMResponse, rag::AssembledContext, Message};

/// Cache configuration with different strategies and policies
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CacheConfig {
    pub response_cache: CacheTierConfig,
    pub context_cache: CacheTierConfig,
    pub embedding_cache: CacheTierConfig,
    pub query_cache: CacheTierConfig,
    pub enable_warming: bool,
    pub warming_strategies: Vec<WarmingStrategy>,
    pub compression_enabled: bool,
    pub persistence_enabled: bool,
    pub persistence_interval: Duration,
}

impl Default for CacheConfig {
    fn default() -> Self {
        Self {
            response_cache: CacheTierConfig {
                max_size: 1000,
                ttl: Duration::from_secs(3600), // 1 hour
                eviction_policy: EvictionPolicy::LRU,
                compression_threshold: 1024, // 1KB
            },
            context_cache: CacheTierConfig {
                max_size: 500,
                ttl: Duration::from_secs(1800), // 30 minutes
                eviction_policy: EvictionPolicy::LFU,
                compression_threshold: 2048, // 2KB
            },
            embedding_cache: CacheTierConfig {
                max_size: 5000,
                ttl: Duration::from_secs(86400), // 24 hours
                eviction_policy: EvictionPolicy::LRU,
                compression_threshold: 0, // No compression for embeddings
            },
            query_cache: CacheTierConfig {
                max_size: 200,
                ttl: Duration::from_secs(300), // 5 minutes
                eviction_policy: EvictionPolicy::TTL,
                compression_threshold: 512,
            },
            enable_warming: true,
            warming_strategies: vec![
                WarmingStrategy::FrequentQueries,
                WarmingStrategy::RecentSessions,
            ],
            compression_enabled: true,
            persistence_enabled: false,
            persistence_interval: Duration::from_secs(300),
        }
    }
}

/// Configuration for individual cache tiers
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CacheTierConfig {
    pub max_size: usize,
    pub ttl: Duration,
    pub eviction_policy: EvictionPolicy,
    pub compression_threshold: usize,
}

/// Cache eviction policies
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum EvictionPolicy {
    LRU,  // Least Recently Used
    LFU,  // Least Frequently Used
    TTL,  // Time To Live only
    FIFO, // First In, First Out
}

/// Cache warming strategies
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum WarmingStrategy {
    FrequentQueries,
    RecentSessions,
    PopularEntities,
    PredictivePatterns,
}

/// Pattern analysis results for predictive caching
#[derive(Debug, Clone)]
pub struct ConversationPatterns {
    keyword_frequency: HashMap<String, usize>,
    question_patterns: usize,
    sparql_requests: usize,
    graph_operations: usize,
    hourly_activity: [usize; 24],
    total_messages: usize,
    question_confidence: f64,
    sparql_confidence: f64,
    pattern_confidence: f64,
}

impl ConversationPatterns {
    fn new() -> Self {
        Self {
            keyword_frequency: HashMap::new(),
            question_patterns: 0,
            sparql_requests: 0,
            graph_operations: 0,
            hourly_activity: [0; 24],
            total_messages: 0,
            question_confidence: 0.0,
            sparql_confidence: 0.0,
            pattern_confidence: 0.0,
        }
    }

    fn calculate_confidence(&mut self) {
        self.total_messages = self.question_patterns + self.sparql_requests + self.graph_operations;

        if self.total_messages > 0 {
            self.question_confidence =
                (self.question_patterns as f64 / self.total_messages as f64).min(1.0);
            self.sparql_confidence =
                (self.sparql_requests as f64 / self.total_messages as f64).min(1.0);

            // Calculate overall pattern confidence based on activity consistency
            let activity_variance = self.calculate_activity_variance();
            self.pattern_confidence = (1.0 - activity_variance).max(0.3); // Minimum 30% confidence
        }
    }

    fn calculate_activity_variance(&self) -> f64 {
        let total_activity: usize = self.hourly_activity.iter().sum();
        if total_activity == 0 {
            return 1.0;
        }

        let mean = total_activity as f64 / 24.0;
        let variance: f64 = self
            .hourly_activity
            .iter()
            .map(|&activity| {
                let diff = activity as f64 - mean;
                diff * diff
            })
            .sum::<f64>()
            / 24.0;

        (variance.sqrt() / mean).min(1.0)
    }
}

/// Cache prediction for smart warming
#[derive(Debug, Clone)]
pub struct CachePrediction {
    key: String,
    cache_type: PredictiveCacheType,
    confidence: f64,
    context: String,
}

/// Types of predictive cache entries
#[derive(Debug, Clone)]
pub enum PredictiveCacheType {
    Response,
    Context,
    Embedding,
    Query,
}

/// Cache entry with metadata
#[derive(Debug, Clone)]
struct CacheEntry<T> {
    value: T,
    created_at: SystemTime,
    last_accessed: SystemTime,
    access_count: usize,
    ttl: Duration,
    size_bytes: usize,
    #[allow(dead_code)]
    compression_used: bool,
}

impl<T> CacheEntry<T> {
    fn new(value: T, ttl: Duration, size_bytes: usize) -> Self {
        let now = SystemTime::now();
        Self {
            value,
            created_at: now,
            last_accessed: now,
            access_count: 1,
            ttl,
            size_bytes,
            compression_used: false,
        }
    }

    fn is_expired(&self) -> bool {
        self.created_at.elapsed().unwrap_or(Duration::ZERO) > self.ttl
    }

    fn update_access(&mut self) {
        self.last_accessed = SystemTime::now();
        self.access_count += 1;
    }

    fn age(&self) -> Duration {
        self.created_at.elapsed().unwrap_or(Duration::ZERO)
    }
}

/// Multi-level cache with different eviction policies
struct CacheTier<T: Clone> {
    config: CacheTierConfig,
    entries: HashMap<String, CacheEntry<T>>,
    access_order: VecDeque<String>,        // For LRU
    frequency_map: HashMap<String, usize>, // For LFU
    total_size: usize,
}

impl<T: Clone> CacheTier<T> {
    fn new(config: CacheTierConfig) -> Self {
        Self {
            config,
            entries: HashMap::new(),
            access_order: VecDeque::new(),
            frequency_map: HashMap::new(),
            total_size: 0,
        }
    }

    fn get(&mut self, key: &str) -> Option<T> {
        // Check if entry exists and is not expired
        let is_expired = self
            .entries
            .get(key)
            .map_or(true, |entry| entry.is_expired());

        if is_expired {
            self.remove(key);
            return None;
        }

        // Update access and return value
        if let Some(entry) = self.entries.get_mut(key) {
            entry.update_access();
            let value = entry.value.clone();
            // Update tracking after we're done with the mutable borrow
            self.update_access_tracking(key);
            Some(value)
        } else {
            None
        }
    }

    fn put(&mut self, key: String, value: T, size_bytes: usize) -> Result<()> {
        // Remove existing entry if present
        if self.entries.contains_key(&key) {
            self.remove(&key);
        }

        // Ensure space is available
        self.ensure_space(size_bytes)?;

        let entry = CacheEntry::new(value, self.config.ttl, size_bytes);
        self.entries.insert(key.clone(), entry);
        self.total_size += size_bytes;
        self.update_access_tracking(&key);

        debug!("Cache put: {} (size: {} bytes)", key, size_bytes);
        Ok(())
    }

    fn remove(&mut self, key: &str) -> Option<T> {
        match self.entries.remove(key) {
            Some(entry) => {
                self.total_size = self.total_size.saturating_sub(entry.size_bytes);
                self.access_order.retain(|k| k != key);
                self.frequency_map.remove(key);
                debug!("Cache remove: {}", key);
                Some(entry.value)
            }
            _ => None,
        }
    }

    fn clear(&mut self) {
        self.entries.clear();
        self.access_order.clear();
        self.frequency_map.clear();
        self.total_size = 0;
        info!("Cache cleared");
    }

    fn cleanup_expired(&mut self) -> usize {
        let expired_keys: Vec<String> = self
            .entries
            .iter()
            .filter(|(_, entry)| entry.is_expired())
            .map(|(key, _)| key.clone())
            .collect();

        let count = expired_keys.len();
        for key in expired_keys {
            self.remove(&key);
        }

        if count > 0 {
            debug!("Cleaned up {} expired cache entries", count);
        }
        count
    }

    fn ensure_space(&mut self, needed_bytes: usize) -> Result<()> {
        // First, clean up expired entries
        self.cleanup_expired();

        // If still not enough space, evict according to policy
        while self.total_size + needed_bytes > self.config.max_size * 1024
            && !self.entries.is_empty()
        {
            match self.config.eviction_policy {
                EvictionPolicy::LRU => self.evict_lru()?,
                EvictionPolicy::LFU => self.evict_lfu()?,
                EvictionPolicy::FIFO => self.evict_fifo()?,
                EvictionPolicy::TTL => {
                    // TTL-only policy - if no expired entries, fail
                    return Err(anyhow!("Cache full and no expired entries to evict"));
                }
            }
        }

        Ok(())
    }

    fn evict_lru(&mut self) -> Result<()> {
        if let Some(key) = self.access_order.front().cloned() {
            self.remove(&key);
            Ok(())
        } else {
            Err(anyhow!("No entries to evict"))
        }
    }

    fn evict_lfu(&mut self) -> Result<()> {
        if let Some((key, _)) = self.frequency_map.iter().min_by_key(|&(_, &count)| count) {
            let key = key.clone();
            self.remove(&key);
            Ok(())
        } else {
            Err(anyhow!("No entries to evict"))
        }
    }

    fn evict_fifo(&mut self) -> Result<()> {
        if let Some((oldest_key, _)) = self
            .entries
            .iter()
            .min_by_key(|(_, entry)| entry.created_at)
        {
            let key = oldest_key.clone();
            self.remove(&key);
            Ok(())
        } else {
            Err(anyhow!("No entries to evict"))
        }
    }

    fn update_access_tracking(&mut self, key: &str) {
        match self.config.eviction_policy {
            EvictionPolicy::LRU => {
                // Remove from current position and add to back
                self.access_order.retain(|k| k != key);
                self.access_order.push_back(key.to_string());
            }
            EvictionPolicy::LFU => {
                *self.frequency_map.entry(key.to_string()).or_insert(0) += 1;
            }
            _ => {} // No tracking needed for other policies
        }
    }

    #[allow(dead_code)]
    fn size(&self) -> usize {
        self.entries.len()
    }

    #[allow(dead_code)]
    fn total_size_bytes(&self) -> usize {
        self.total_size
    }

    fn get_stats(&self) -> CacheTierStats {
        let mut expired_count = 0;
        let mut avg_access_count = 0.0;
        let mut total_age = Duration::ZERO;

        for entry in self.entries.values() {
            if entry.is_expired() {
                expired_count += 1;
            }
            avg_access_count += entry.access_count as f64;
            total_age += entry.age();
        }

        let entry_count = self.entries.len();
        if entry_count > 0 {
            avg_access_count /= entry_count as f64;
        }

        CacheTierStats {
            entry_count,
            total_size_bytes: self.total_size,
            expired_count,
            avg_access_count,
            avg_age_seconds: if entry_count > 0 {
                total_age.as_secs() / entry_count as u64
            } else {
                0
            },
        }
    }
}

/// Statistics for a cache tier
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CacheTierStats {
    pub entry_count: usize,
    pub total_size_bytes: usize,
    pub expired_count: usize,
    pub avg_access_count: f64,
    pub avg_age_seconds: u64,
}

/// Main cache manager with multiple tiers
pub struct AdvancedCacheManager {
    config: CacheConfig,
    response_cache: Arc<RwLock<CacheTier<CachedResponse>>>,
    context_cache: Arc<RwLock<CacheTier<CachedContext>>>,
    embedding_cache: Arc<RwLock<CacheTier<Vec<f32>>>>,
    query_cache: Arc<RwLock<CacheTier<CachedQueryResult>>>,
    hit_stats: Arc<RwLock<CacheStats>>,
}

/// Cached response data
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CachedResponse {
    pub content: String,
    pub metadata: HashMap<String, serde_json::Value>,
    pub quality_score: f32,
    pub generation_method: String,
}

/// Cached context data
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CachedContext {
    pub context_text: String,
    pub quality_score: f32,
    pub coverage_score: f32,
    pub entity_count: usize,
    pub fact_count: usize,
}

/// Cached query result
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CachedQueryResult {
    pub sparql_query: String,
    pub result_bindings: Vec<HashMap<String, String>>,
    pub execution_time_ms: u64,
}

/// Cache statistics
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct CacheStats {
    pub total_requests: usize,
    pub cache_hits: usize,
    pub cache_misses: usize,
    pub response_hits: usize,
    pub context_hits: usize,
    pub embedding_hits: usize,
    pub query_hits: usize,
    pub total_time_saved_ms: u64,
    pub average_hit_time_ms: f64,
}

impl CacheStats {
    pub fn hit_rate(&self) -> f64 {
        if self.total_requests == 0 {
            0.0
        } else {
            self.cache_hits as f64 / self.total_requests as f64
        }
    }

    pub fn miss_rate(&self) -> f64 {
        1.0 - self.hit_rate()
    }

    pub fn time_saved_per_hit(&self) -> f64 {
        if self.cache_hits == 0 {
            0.0
        } else {
            self.total_time_saved_ms as f64 / self.cache_hits as f64
        }
    }
}

impl AdvancedCacheManager {
    pub fn new(config: CacheConfig) -> Self {
        let response_cache = Arc::new(RwLock::new(CacheTier::new(config.response_cache.clone())));
        let context_cache = Arc::new(RwLock::new(CacheTier::new(config.context_cache.clone())));
        let embedding_cache = Arc::new(RwLock::new(CacheTier::new(config.embedding_cache.clone())));
        let query_cache = Arc::new(RwLock::new(CacheTier::new(config.query_cache.clone())));
        let hit_stats = Arc::new(RwLock::new(CacheStats::default()));

        let manager = Self {
            config: config.clone(),
            response_cache,
            context_cache,
            embedding_cache,
            query_cache,
            hit_stats,
        };

        // Start background cleanup task
        manager.start_cleanup_task();

        // Start cache warming if enabled
        if config.enable_warming {
            manager.start_warming_task();
        }

        manager
    }

    /// Generate cache key from query and context
    pub fn generate_cache_key(query: &str, context: Option<&str>) -> String {
        let mut hasher = DefaultHasher::new();
        query.hash(&mut hasher);
        if let Some(ctx) = context {
            ctx.hash(&mut hasher);
        }
        format!("key_{:x}", hasher.finish())
    }

    /// Cache a response
    pub async fn cache_response(
        &self,
        key: String,
        response: &LLMResponse,
        quality_score: f32,
    ) -> Result<()> {
        let cached_response = CachedResponse {
            content: response.content.clone(),
            metadata: response.metadata.clone(),
            quality_score,
            generation_method: format!("{} ({})", response.provider_used, response.model_used),
        };

        let size = oxicode::serde::encode_to_vec(&cached_response, oxicode::config::standard())
            .map_err(|e| anyhow!("Bincode encoding failed: {}", e))?
            .len();
        let mut cache = self.response_cache.write().await;
        cache.put(key, cached_response, size)
    }

    /// Get cached response
    pub async fn get_cached_response(&self, key: &str) -> Option<CachedResponse> {
        let mut stats = self.hit_stats.write().await;
        stats.total_requests += 1;

        let mut cache = self.response_cache.write().await;
        if let Some(response) = cache.get(key) {
            stats.cache_hits += 1;
            stats.response_hits += 1;
            drop(stats);
            debug!("Cache hit for response: {}", key);
            Some(response)
        } else {
            stats.cache_misses += 1;
            drop(stats);
            debug!("Cache miss for response: {}", key);
            None
        }
    }

    /// Cache assembled context
    pub async fn cache_context(&self, key: String, context: &AssembledContext) -> Result<()> {
        let cached_context = CachedContext {
            context_text: format!(
                "{} semantic results, {} graph results",
                context.semantic_results.len(),
                context.graph_results.len()
            ),
            quality_score: context.context_score,
            coverage_score: context.context_score,
            entity_count: context.extracted_entities.len(),
            fact_count: context
                .retrieved_triples
                .as_ref()
                .map(|t| t.len())
                .unwrap_or(0),
        };

        let size = oxicode::serde::encode_to_vec(&cached_context, oxicode::config::standard())
            .map_err(|e| anyhow!("Bincode encoding failed: {}", e))?
            .len();
        let mut cache = self.context_cache.write().await;
        cache.put(key, cached_context, size)
    }

    /// Get cached context
    pub async fn get_cached_context(&self, key: &str) -> Option<CachedContext> {
        let mut stats = self.hit_stats.write().await;
        stats.total_requests += 1;

        let mut cache = self.context_cache.write().await;
        if let Some(context) = cache.get(key) {
            stats.cache_hits += 1;
            stats.context_hits += 1;
            drop(stats);
            debug!("Cache hit for context: {}", key);
            Some(context)
        } else {
            stats.cache_misses += 1;
            drop(stats);
            debug!("Cache miss for context: {}", key);
            None
        }
    }

    /// Cache embedding vector
    pub async fn cache_embedding(&self, key: String, embedding: Vec<f32>) -> Result<()> {
        let size = embedding.len() * std::mem::size_of::<f32>();
        let mut cache = self.embedding_cache.write().await;
        cache.put(key, embedding, size)
    }

    /// Get cached embedding
    pub async fn get_cached_embedding(&self, key: &str) -> Option<Vec<f32>> {
        let mut stats = self.hit_stats.write().await;
        stats.total_requests += 1;

        let mut cache = self.embedding_cache.write().await;
        if let Some(embedding) = cache.get(key) {
            stats.cache_hits += 1;
            stats.embedding_hits += 1;
            drop(stats);
            debug!("Cache hit for embedding: {}", key);
            Some(embedding)
        } else {
            stats.cache_misses += 1;
            drop(stats);
            debug!("Cache miss for embedding: {}", key);
            None
        }
    }

    /// Cache SPARQL query result
    pub async fn cache_query_result(
        &self,
        key: String,
        sparql_query: String,
        bindings: Vec<HashMap<String, String>>,
        execution_time_ms: u64,
    ) -> Result<()> {
        let cached_result = CachedQueryResult {
            sparql_query,
            result_bindings: bindings,
            execution_time_ms,
        };

        let size = oxicode::serde::encode_to_vec(&cached_result, oxicode::config::standard())
            .map_err(|e| anyhow!("Bincode encoding failed: {}", e))?
            .len();
        let mut cache = self.query_cache.write().await;
        cache.put(key, cached_result, size)
    }

    /// Get cached query result
    pub async fn get_cached_query_result(&self, key: &str) -> Option<CachedQueryResult> {
        let mut stats = self.hit_stats.write().await;
        stats.total_requests += 1;

        let mut cache = self.query_cache.write().await;
        if let Some(result) = cache.get(key) {
            stats.cache_hits += 1;
            stats.query_hits += 1;
            stats.total_time_saved_ms += result.execution_time_ms;
            drop(stats);
            debug!("Cache hit for query result: {}", key);
            Some(result)
        } else {
            stats.cache_misses += 1;
            drop(stats);
            debug!("Cache miss for query result: {}", key);
            None
        }
    }

    /// Get comprehensive cache statistics
    pub async fn get_cache_stats(&self) -> CacheStats {
        self.hit_stats.read().await.clone()
    }

    /// Get detailed cache tier statistics
    pub async fn get_detailed_stats(&self) -> HashMap<String, CacheTierStats> {
        let mut stats = HashMap::new();

        stats.insert(
            "response".to_string(),
            self.response_cache.read().await.get_stats(),
        );
        stats.insert(
            "context".to_string(),
            self.context_cache.read().await.get_stats(),
        );
        stats.insert(
            "embedding".to_string(),
            self.embedding_cache.read().await.get_stats(),
        );
        stats.insert(
            "query".to_string(),
            self.query_cache.read().await.get_stats(),
        );

        stats
    }

    /// Clear all caches
    pub async fn clear_all(&self) {
        let mut response_cache = self.response_cache.write().await;
        let mut context_cache = self.context_cache.write().await;
        let mut embedding_cache = self.embedding_cache.write().await;
        let mut query_cache = self.query_cache.write().await;

        response_cache.clear();
        context_cache.clear();
        embedding_cache.clear();
        query_cache.clear();

        let mut stats = self.hit_stats.write().await;
        *stats = CacheStats::default();

        info!("All caches cleared");
    }

    /// Start background cleanup task
    fn start_cleanup_task(&self) {
        let response_cache = Arc::clone(&self.response_cache);
        let context_cache = Arc::clone(&self.context_cache);
        let embedding_cache = Arc::clone(&self.embedding_cache);
        let query_cache = Arc::clone(&self.query_cache);

        tokio::spawn(async move {
            let mut interval = tokio::time::interval(Duration::from_secs(300)); // Every 5 minutes

            loop {
                interval.tick().await;

                let mut total_cleaned = 0;

                // Cleanup expired entries in all tiers
                {
                    let mut cache = response_cache.write().await;
                    total_cleaned += cache.cleanup_expired();
                }
                {
                    let mut cache = context_cache.write().await;
                    total_cleaned += cache.cleanup_expired();
                }
                {
                    let mut cache = embedding_cache.write().await;
                    total_cleaned += cache.cleanup_expired();
                }
                {
                    let mut cache = query_cache.write().await;
                    total_cleaned += cache.cleanup_expired();
                }

                if total_cleaned > 0 {
                    info!("Cache cleanup: removed {} expired entries", total_cleaned);
                }
            }
        });
    }

    /// Start cache warming task
    fn start_warming_task(&self) {
        let _response_cache = Arc::clone(&self.response_cache);
        let _context_cache = Arc::clone(&self.context_cache);
        let _embedding_cache = Arc::clone(&self.embedding_cache);
        let _query_cache = Arc::clone(&self.query_cache);
        let _hit_stats = Arc::clone(&self.hit_stats);
        let strategies = self.config.warming_strategies.clone();

        tokio::spawn(async move {
            let mut interval = tokio::time::interval(Duration::from_secs(900)); // Every 15 minutes
                                                                                // Create a temporary cache manager for warming
            let cache_config = CacheConfig::default();
            let cache_manager = Arc::new(AdvancedCacheManager::new(cache_config));
            let warming_service = CacheWarmingService::new(cache_manager, strategies.clone());

            loop {
                interval.tick().await;

                for strategy in &strategies {
                    match strategy {
                        WarmingStrategy::FrequentQueries => {
                            // Use some sample frequent queries for warming
                            let frequent_queries = vec!["sample query".to_string()];
                            if let Err(e) = warming_service
                                .warm_frequent_queries(frequent_queries)
                                .await
                            {
                                warn!("Failed to warm frequent queries: {}", e);
                            }
                        }
                        WarmingStrategy::RecentSessions => {
                            // Stub implementation - in real implementation would analyze recent session patterns
                            debug!("Warming recent sessions - not yet implemented");
                        }
                        WarmingStrategy::PopularEntities => {
                            // Stub implementation - in real implementation would warm popular entity caches
                            debug!("Warming popular entities - not yet implemented");
                        }
                        WarmingStrategy::PredictivePatterns => {
                            // Stub implementation - in real implementation would use ML to predict access patterns
                            debug!("Warming predictive patterns - not yet implemented");
                        }
                    }
                }

                info!("Cache warming cycle completed");
            }
        });

        info!(
            "Cache warming task started with strategies: {:?}",
            self.config.warming_strategies
        );
    }

    /// Optimize cache configuration based on usage patterns
    pub async fn optimize_configuration(&self) -> Result<CacheConfig> {
        let stats = self.get_cache_stats().await;
        let detailed_stats = self.get_detailed_stats().await;

        let mut optimized_config = self.config.clone();

        // Adjust cache sizes based on hit rates
        if let Some(_response_stats) = detailed_stats.get("response") {
            if stats.response_hits > 0 && stats.hit_rate() > 0.8 {
                // High hit rate - increase cache size
                optimized_config.response_cache.max_size =
                    (optimized_config.response_cache.max_size as f32 * 1.2) as usize;
            } else if stats.hit_rate() < 0.3 {
                // Low hit rate - decrease cache size
                optimized_config.response_cache.max_size =
                    (optimized_config.response_cache.max_size as f32 * 0.8) as usize;
            }
        }

        // Adjust TTL based on access patterns
        if stats.cache_hits > 100 && stats.average_hit_time_ms > 0.0 {
            // If cache is being used effectively, extend TTL
            optimized_config.response_cache.ttl = Duration::from_secs(
                (optimized_config.response_cache.ttl.as_secs() as f32 * 1.1) as u64,
            );
        }

        info!("Cache configuration optimized based on usage patterns");
        Ok(optimized_config)
    }
}

/// Cache warming service for proactive cache population
pub struct CacheWarmingService {
    cache_manager: Arc<AdvancedCacheManager>,
    #[allow(dead_code)]
    strategies: Vec<WarmingStrategy>,
}

impl CacheWarmingService {
    pub fn new(cache_manager: Arc<AdvancedCacheManager>, strategies: Vec<WarmingStrategy>) -> Self {
        Self {
            cache_manager,
            strategies,
        }
    }

    /// Warm cache with frequent queries
    pub async fn warm_frequent_queries(&self, queries: Vec<String>) -> Result<usize> {
        let mut warmed_count = 0;

        for query in queries {
            // Generate embeddings and cache them
            let embedding_key = format!(
                "embedding_{}",
                AdvancedCacheManager::generate_cache_key(&query, None)
            );

            // This would ideally generate actual embeddings
            // For now, we'll create a placeholder
            let dummy_embedding = vec![0.0f32; 768]; // Typical embedding size

            if self
                .cache_manager
                .cache_embedding(embedding_key, dummy_embedding)
                .await
                .is_ok()
            {
                warmed_count += 1;
            }
        }

        info!("Cache warming completed: {} entries warmed", warmed_count);
        Ok(warmed_count)
    }

    /// Analyze usage patterns for smart warming
    pub async fn analyze_and_warm(&self, recent_messages: &[Message]) -> Result<usize> {
        let mut warmed_count = 0;

        // Analyze patterns from recent messages
        let patterns = self.analyze_message_patterns(recent_messages).await?;

        // Generate predictions based on patterns
        let predictions = self.generate_predictive_cache_keys(&patterns).await?;

        // Warm cache with predicted items
        for prediction in predictions {
            match prediction.cache_type {
                PredictiveCacheType::Response => {
                    // Pre-generate likely responses
                    if self
                        .warm_response_cache(&prediction.key, &prediction.context)
                        .await
                        .is_ok()
                    {
                        warmed_count += 1;
                    }
                }
                PredictiveCacheType::Context => {
                    // Pre-build likely contexts
                    if self
                        .warm_context_cache(&prediction.key, &prediction.context)
                        .await
                        .is_ok()
                    {
                        warmed_count += 1;
                    }
                }
                PredictiveCacheType::Embedding => {
                    // Pre-compute embeddings for likely queries
                    if self.warm_embedding_cache(&prediction.key).await.is_ok() {
                        warmed_count += 1;
                    }
                }
                PredictiveCacheType::Query => {
                    // Pre-execute likely SPARQL queries
                    if self
                        .warm_query_cache(&prediction.key, &prediction.context)
                        .await
                        .is_ok()
                    {
                        warmed_count += 1;
                    }
                }
            }
        }

        info!(
            "Pattern-based cache warming completed: {} entries warmed",
            warmed_count
        );
        Ok(warmed_count)
    }

    /// Analyze message patterns to identify trends and predict future needs
    async fn analyze_message_patterns(&self, messages: &[Message]) -> Result<ConversationPatterns> {
        let mut patterns = ConversationPatterns::new();

        // Extract keywords and entities from recent messages
        for message in messages.iter().rev().take(50) {
            // Analyze last 50 messages
            let text = message.content.to_text();

            // Extract entities (simple word frequency for now)
            let words: Vec<&str> = text
                .split_whitespace()
                .filter(|w| w.len() > 3) // Filter out short words
                .collect();

            for word in words {
                let normalized = word.to_lowercase();
                *patterns.keyword_frequency.entry(normalized).or_insert(0) += 1;
            }

            // Identify query patterns
            if text.contains('?')
                || text.to_lowercase().contains("what")
                || text.to_lowercase().contains("how")
                || text.to_lowercase().contains("when")
                || text.to_lowercase().contains("where")
                || text.to_lowercase().contains("why")
            {
                patterns.question_patterns += 1;
            }

            // Identify domain-specific patterns
            if text.to_lowercase().contains("sparql") || text.to_lowercase().contains("query") {
                patterns.sparql_requests += 1;
            }

            if text.to_lowercase().contains("graph") || text.to_lowercase().contains("triple") {
                patterns.graph_operations += 1;
            }

            // Track temporal patterns
            let time_since_creation = message.timestamp.timestamp() % 86400; // Seconds in day
            let hour = (time_since_creation / 3600) as usize;
            if hour < 24 {
                patterns.hourly_activity[hour] += 1;
            }
        }

        // Calculate trends and confidence scores
        patterns.calculate_confidence();

        Ok(patterns)
    }

    /// Generate predictive cache keys based on identified patterns
    async fn generate_predictive_cache_keys(
        &self,
        patterns: &ConversationPatterns,
    ) -> Result<Vec<CachePrediction>> {
        let mut predictions = Vec::new();

        // Generate predictions based on frequent keywords
        for (keyword, frequency) in &patterns.keyword_frequency {
            if *frequency >= 3 {
                // Threshold for prediction
                // Predict similar queries
                predictions.push(CachePrediction {
                    key: format!("similar_to_{keyword}"),
                    cache_type: PredictiveCacheType::Query,
                    confidence: (*frequency as f64 / patterns.total_messages as f64).min(1.0),
                    context: format!("Predicted query related to: {keyword}"),
                });

                // Predict related embeddings
                predictions.push(CachePrediction {
                    key: format!("embedding_{keyword}"),
                    cache_type: PredictiveCacheType::Embedding,
                    confidence: (*frequency as f64 / patterns.total_messages as f64).min(1.0),
                    context: keyword.clone(),
                });
            }
        }

        // Generate predictions based on query patterns
        if patterns.question_patterns > 0 {
            predictions.push(CachePrediction {
                key: "common_question_contexts".to_string(),
                cache_type: PredictiveCacheType::Context,
                confidence: patterns.question_confidence,
                context: "Frequently asked question context".to_string(),
            });
        }

        // Generate SPARQL-related predictions
        if patterns.sparql_requests > 0 {
            predictions.push(CachePrediction {
                key: "sparql_template_context".to_string(),
                cache_type: PredictiveCacheType::Context,
                confidence: patterns.sparql_confidence,
                context: "SPARQL query context".to_string(),
            });
        }

        // Sort by confidence and take top predictions
        // Use unwrap_or to handle NaN values gracefully (treat as less than)
        predictions.sort_by(|a, b| {
            b.confidence
                .partial_cmp(&a.confidence)
                .unwrap_or(std::cmp::Ordering::Less)
        });
        predictions.truncate(20); // Limit to top 20 predictions

        Ok(predictions)
    }

    /// Warm response cache with predicted responses
    async fn warm_response_cache(&self, key: &str, context: &str) -> Result<()> {
        // Create a mock response for caching
        let mock_response = LLMResponse {
            content: format!("Cached response for: {context}"),
            model_used: "cache-warmer".to_string(),
            provider_used: "internal".to_string(),
            usage: crate::llm::Usage {
                prompt_tokens: 0,
                completion_tokens: 0,
                total_tokens: 0,
                cost: 0.0,
            },
            latency: Duration::from_millis(1),
            quality_score: Some(0.9),
            metadata: HashMap::new(),
        };

        self.cache_manager
            .cache_response(key.to_string(), &mock_response, 0.9)
            .await?;
        Ok(())
    }

    /// Warm context cache with predicted contexts
    async fn warm_context_cache(&self, key: &str, _context: &str) -> Result<()> {
        // Create a mock context for caching
        let mock_context = AssembledContext {
            retrieved_triples: None,
            semantic_results: Vec::new(),
            graph_results: Vec::new(),
            quantum_results: None,
            consciousness_insights: None,
            extracted_entities: Vec::new(),
            extracted_relationships: Vec::new(),
            query_constraints: Vec::new(),
            reasoning_results: None,
            extracted_knowledge: None,
            context_score: 0.8,
            assembly_time: Duration::from_millis(100),
        };

        self.cache_manager
            .cache_context(key.to_string(), &mock_context)
            .await?;
        Ok(())
    }

    /// Warm embedding cache with predicted embeddings
    async fn warm_embedding_cache(&self, key: &str) -> Result<()> {
        // Create a mock embedding for caching
        let mock_embedding = vec![0.1f32; 384]; // Standard embedding dimension

        self.cache_manager
            .cache_embedding(key.to_string(), mock_embedding)
            .await?;
        Ok(())
    }

    /// Warm query cache with predicted query results
    async fn warm_query_cache(&self, key: &str, context: &str) -> Result<()> {
        // Create mock SPARQL query results for caching
        let mock_bindings = vec![HashMap::new()]; // Empty binding set

        self.cache_manager
            .cache_query_result(
                key.to_string(),
                format!("SELECT * WHERE {{ # Predicted query for: {context} }}"),
                mock_bindings,
                100, // Mock execution time
            )
            .await?;
        Ok(())
    }
}