codeprism_mcp/context/
cache.rs

1//! Analysis result caching system
2//!
3//! Provides intelligent caching of expensive analysis operations to reduce
4//! redundant computations and improve performance.
5
6use anyhow::Result;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9use std::hash::{Hash, Hasher};
10use std::sync::{Arc, RwLock};
11use std::time::{Duration, SystemTime, UNIX_EPOCH};
12
13/// Unique identifier for cached analysis results
14#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
15pub struct CacheKey {
16    /// Tool name that generated the result
17    pub tool_name: String,
18    /// Parameters used (hashed for consistency)
19    pub parameters_hash: u64,
20    /// Target identifier (e.g., symbol_id, file_path)
21    pub target: Option<String>,
22}
23
24impl CacheKey {
25    /// Create a new cache key
26    pub fn new(tool_name: String, parameters: &serde_json::Value, target: Option<String>) -> Self {
27        let mut hasher = std::collections::hash_map::DefaultHasher::new();
28        parameters.to_string().hash(&mut hasher);
29        let parameters_hash = hasher.finish();
30
31        Self {
32            tool_name,
33            parameters_hash,
34            target,
35        }
36    }
37}
38
39/// Cached analysis result
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct CacheEntry {
42    /// The cached result
43    pub result: serde_json::Value,
44    /// When the result was cached
45    pub cached_at: u64,
46    /// How long the result is valid (in seconds)
47    pub ttl_seconds: u64,
48    /// Access count for LRU eviction
49    pub access_count: u64,
50    /// Last access time
51    pub last_accessed: u64,
52    /// Size estimate in bytes
53    pub size_bytes: usize,
54}
55
56impl CacheEntry {
57    /// Create a new cache entry
58    pub fn new(result: serde_json::Value, ttl_seconds: u64) -> Self {
59        let now = SystemTime::now()
60            .duration_since(UNIX_EPOCH)
61            .unwrap_or(Duration::from_secs(0))
62            .as_secs();
63
64        let size_bytes = result.to_string().len();
65
66        Self {
67            result,
68            cached_at: now,
69            ttl_seconds,
70            access_count: 0,
71            last_accessed: now,
72            size_bytes,
73        }
74    }
75
76    /// Check if the cache entry is expired
77    pub fn is_expired(&self) -> bool {
78        let now = SystemTime::now()
79            .duration_since(UNIX_EPOCH)
80            .unwrap_or(Duration::from_secs(0))
81            .as_secs();
82
83        now - self.cached_at > self.ttl_seconds
84    }
85
86    /// Record an access to this entry
87    pub fn record_access(&mut self) {
88        self.access_count += 1;
89        self.last_accessed = SystemTime::now()
90            .duration_since(UNIX_EPOCH)
91            .unwrap_or(Duration::from_secs(0))
92            .as_secs();
93    }
94
95    /// Get LRU score (lower is more likely to be evicted)
96    pub fn lru_score(&self) -> u64 {
97        // Combine access count and recency
98        self.access_count + (self.last_accessed / 3600) // Favor recent access
99    }
100}
101
102/// Cache statistics
103#[derive(Debug, Clone, Default, Serialize)]
104pub struct CacheStats {
105    /// Total number of cache hits
106    pub hits: u64,
107    /// Total number of cache misses
108    pub misses: u64,
109    /// Number of cached entries
110    pub entry_count: usize,
111    /// Total memory usage in bytes
112    pub memory_usage_bytes: usize,
113    /// Cache hit rate (0.0 to 1.0)
114    pub hit_rate: f64,
115}
116
117impl CacheStats {
118    /// Calculate hit rate
119    pub fn calculate_hit_rate(&mut self) {
120        let total = self.hits + self.misses;
121        self.hit_rate = if total > 0 {
122            self.hits as f64 / total as f64
123        } else {
124            0.0
125        };
126    }
127}
128
129/// Configuration for cache behavior
130#[derive(Debug, Clone)]
131pub struct CacheConfig {
132    /// Maximum number of entries
133    pub max_entries: usize,
134    /// Maximum memory usage in bytes
135    pub max_memory_bytes: usize,
136    /// Default TTL for cached results
137    pub default_ttl_seconds: u64,
138    /// TTL settings for specific tools
139    pub tool_ttl_overrides: HashMap<String, u64>,
140}
141
142impl Default for CacheConfig {
143    fn default() -> Self {
144        let mut tool_ttl_overrides = HashMap::new();
145
146        // Long TTL for expensive operations
147        tool_ttl_overrides.insert("trace_inheritance".to_string(), 3600); // 1 hour
148        tool_ttl_overrides.insert("analyze_decorators".to_string(), 3600); // 1 hour
149        tool_ttl_overrides.insert("analyze_complexity".to_string(), 1800); // 30 minutes
150        tool_ttl_overrides.insert("analyze_security".to_string(), 1800); // 30 minutes
151
152        // Medium TTL for moderately expensive operations
153        tool_ttl_overrides.insert("find_dependencies".to_string(), 900); // 15 minutes
154        tool_ttl_overrides.insert("find_references".to_string(), 900); // 15 minutes
155        tool_ttl_overrides.insert("detect_patterns".to_string(), 600); // 10 minutes
156
157        // Short TTL for fast changing results
158        tool_ttl_overrides.insert("search_symbols".to_string(), 300); // 5 minutes
159        tool_ttl_overrides.insert("search_content".to_string(), 300); // 5 minutes
160
161        Self {
162            max_entries: 1000,
163            max_memory_bytes: 50 * 1024 * 1024, // 50MB
164            default_ttl_seconds: 600,           // 10 minutes
165            tool_ttl_overrides,
166        }
167    }
168}
169
170impl CacheConfig {
171    /// Get TTL for a specific tool
172    pub fn get_ttl_for_tool(&self, tool_name: &str) -> u64 {
173        self.tool_ttl_overrides
174            .get(tool_name)
175            .copied()
176            .unwrap_or(self.default_ttl_seconds)
177    }
178}
179
180/// Analysis result cache with LRU eviction and TTL expiration
181#[derive(Debug)]
182pub struct AnalysisCache {
183    /// Cached entries
184    cache: Arc<RwLock<HashMap<CacheKey, CacheEntry>>>,
185    /// Cache configuration
186    config: CacheConfig,
187    /// Cache statistics
188    stats: Arc<RwLock<CacheStats>>,
189}
190
191impl AnalysisCache {
192    /// Create a new analysis cache
193    pub fn new() -> Self {
194        Self::with_config(CacheConfig::default())
195    }
196
197    /// Create a new analysis cache with custom configuration
198    pub fn with_config(config: CacheConfig) -> Self {
199        Self {
200            cache: Arc::new(RwLock::new(HashMap::new())),
201            config,
202            stats: Arc::new(RwLock::new(CacheStats::default())),
203        }
204    }
205
206    /// Get a cached result
207    pub fn get(
208        &self,
209        tool_name: &str,
210        parameters: &serde_json::Value,
211        target: Option<&str>,
212    ) -> Result<Option<serde_json::Value>> {
213        let key = CacheKey::new(
214            tool_name.to_string(),
215            parameters,
216            target.map(|s| s.to_string()),
217        );
218
219        let mut cache = self
220            .cache
221            .write()
222            .map_err(|_| anyhow::anyhow!("Failed to acquire write lock on cache"))?;
223
224        if let Some(entry) = cache.get_mut(&key) {
225            if entry.is_expired() {
226                // Remove expired entry
227                cache.remove(&key);
228                self.record_miss()?;
229                return Ok(None);
230            }
231
232            // Record access and return result
233            entry.record_access();
234            self.record_hit()?;
235            return Ok(Some(entry.result.clone()));
236        }
237
238        self.record_miss()?;
239        Ok(None)
240    }
241
242    /// Store a result in the cache
243    pub fn put(
244        &self,
245        tool_name: &str,
246        parameters: &serde_json::Value,
247        target: Option<&str>,
248        result: serde_json::Value,
249    ) -> Result<()> {
250        let key = CacheKey::new(
251            tool_name.to_string(),
252            parameters,
253            target.map(|s| s.to_string()),
254        );
255        let ttl = self.config.get_ttl_for_tool(tool_name);
256        let entry = CacheEntry::new(result, ttl);
257
258        let mut cache = self
259            .cache
260            .write()
261            .map_err(|_| anyhow::anyhow!("Failed to acquire write lock on cache"))?;
262
263        // Check if we need to evict entries
264        self.maybe_evict_entries(&mut cache)?;
265
266        cache.insert(key, entry);
267        self.update_memory_stats(&cache)?;
268
269        Ok(())
270    }
271
272    /// Clear expired entries
273    pub fn cleanup_expired(&self) -> Result<usize> {
274        let mut cache = self
275            .cache
276            .write()
277            .map_err(|_| anyhow::anyhow!("Failed to acquire write lock on cache"))?;
278
279        let initial_count = cache.len();
280        cache.retain(|_, entry| !entry.is_expired());
281
282        let removed_count = initial_count - cache.len();
283        self.update_memory_stats(&cache)?;
284
285        Ok(removed_count)
286    }
287
288    /// Clear all cached entries
289    pub fn clear(&self) -> Result<()> {
290        let mut cache = self
291            .cache
292            .write()
293            .map_err(|_| anyhow::anyhow!("Failed to acquire write lock on cache"))?;
294
295        cache.clear();
296
297        let mut stats = self
298            .stats
299            .write()
300            .map_err(|_| anyhow::anyhow!("Failed to acquire write lock on stats"))?;
301
302        stats.entry_count = 0;
303        stats.memory_usage_bytes = 0;
304
305        Ok(())
306    }
307
308    /// Get cache statistics
309    pub fn get_stats(&self) -> Result<CacheStats> {
310        let stats = self
311            .stats
312            .read()
313            .map_err(|_| anyhow::anyhow!("Failed to acquire read lock on stats"))?;
314
315        Ok(stats.clone())
316    }
317
318    /// Check if caching is beneficial for a tool
319    pub fn should_cache(&self, tool_name: &str) -> bool {
320        // Don't cache fast operations
321        matches!(
322            tool_name,
323            "repository_stats"
324                | "content_stats"
325                | "find_files"
326                | "trace_path"
327                | "explain_symbol"
328                | "trace_inheritance"
329                | "analyze_decorators"
330                | "analyze_complexity"
331                | "detect_patterns"
332                | "find_dependencies"
333                | "find_references"
334                | "analyze_transitive_dependencies"
335        )
336    }
337
338    /// Record a cache hit
339    fn record_hit(&self) -> Result<()> {
340        let mut stats = self
341            .stats
342            .write()
343            .map_err(|_| anyhow::anyhow!("Failed to acquire write lock on stats"))?;
344
345        stats.hits += 1;
346        stats.calculate_hit_rate();
347
348        Ok(())
349    }
350
351    /// Record a cache miss
352    fn record_miss(&self) -> Result<()> {
353        let mut stats = self
354            .stats
355            .write()
356            .map_err(|_| anyhow::anyhow!("Failed to acquire write lock on stats"))?;
357
358        stats.misses += 1;
359        stats.calculate_hit_rate();
360
361        Ok(())
362    }
363
364    /// Update memory usage statistics
365    fn update_memory_stats(&self, cache: &HashMap<CacheKey, CacheEntry>) -> Result<()> {
366        let mut stats = self
367            .stats
368            .write()
369            .map_err(|_| anyhow::anyhow!("Failed to acquire write lock on stats"))?;
370
371        stats.entry_count = cache.len();
372        stats.memory_usage_bytes = cache.values().map(|entry| entry.size_bytes).sum();
373
374        Ok(())
375    }
376
377    /// Evict entries if necessary
378    fn maybe_evict_entries(&self, cache: &mut HashMap<CacheKey, CacheEntry>) -> Result<()> {
379        // Check entry count limit
380        if cache.len() >= self.config.max_entries {
381            self.evict_lru_entries(cache, cache.len() - self.config.max_entries + 1)?;
382        }
383
384        // Check memory limit
385        let memory_usage: usize = cache.values().map(|entry| entry.size_bytes).sum();
386        if memory_usage > self.config.max_memory_bytes {
387            // Evict until we're under the limit
388            let target_reduction = memory_usage - self.config.max_memory_bytes;
389            self.evict_by_memory(cache, target_reduction)?;
390        }
391
392        Ok(())
393    }
394
395    /// Evict LRU entries
396    fn evict_lru_entries(
397        &self,
398        cache: &mut HashMap<CacheKey, CacheEntry>,
399        count: usize,
400    ) -> Result<()> {
401        // Collect entries with LRU scores
402        let mut entries: Vec<_> = cache
403            .iter()
404            .map(|(key, entry)| (key.clone(), entry.lru_score()))
405            .collect();
406
407        // Sort by LRU score (ascending - lowest scores first)
408        entries.sort_by_key(|(_, score)| *score);
409
410        // Remove the least recently used entries
411        for (key, _) in entries.into_iter().take(count) {
412            cache.remove(&key);
413        }
414
415        Ok(())
416    }
417
418    /// Evict entries to reduce memory usage
419    fn evict_by_memory(
420        &self,
421        cache: &mut HashMap<CacheKey, CacheEntry>,
422        target_reduction: usize,
423    ) -> Result<()> {
424        // Collect entries sorted by LRU score
425        let mut entries: Vec<_> = cache
426            .iter()
427            .map(|(key, entry)| (key.clone(), entry.lru_score(), entry.size_bytes))
428            .collect();
429
430        // Sort by LRU score (ascending)
431        entries.sort_by_key(|(_, score, _)| *score);
432
433        // Remove entries until we've freed enough memory
434        let mut freed_bytes = 0;
435        for (key, _, size) in entries {
436            if freed_bytes >= target_reduction {
437                break;
438            }
439
440            cache.remove(&key);
441            freed_bytes += size;
442        }
443
444        Ok(())
445    }
446}
447
448impl Default for AnalysisCache {
449    fn default() -> Self {
450        Self::new()
451    }
452}
453
454/// Advanced cache statistics for optimization
455#[derive(Debug, Clone, Serialize)]
456pub struct AdvancedCacheStats {
457    /// Total number of cached entries
458    pub total_entries: usize,
459    /// Current memory usage
460    pub memory_usage_bytes: usize,
461    /// Memory efficiency (entries per byte)
462    pub memory_efficiency: f64,
463    /// Per-tool performance statistics
464    pub tool_performance: HashMap<String, ToolCacheStats>,
465    /// Cache fragmentation ratio
466    pub fragmentation_ratio: f64,
467    /// Whether cleanup is recommended
468    pub recommended_cleanup: bool,
469}
470
471/// Cache statistics per tool
472#[derive(Debug, Clone, Serialize)]
473pub struct ToolCacheStats {
474    /// Cache hits for this tool
475    pub hits: u64,
476    /// Cache misses for this tool
477    pub misses: u64,
478    /// Total cached entries for this tool
479    pub total_size: usize,
480    /// Average TTL for this tool
481    pub average_ttl: Duration,
482}
483
484impl AnalysisCache {
485    /// Warm cache with commonly used results
486    pub fn warm_cache(&self, workflow_patterns: &[String]) -> Result<()> {
487        let mut cache = self
488            .cache
489            .write()
490            .map_err(|_| anyhow::anyhow!("Failed to acquire write lock on cache"))?;
491
492        // Pre-populate cache with results for common workflow patterns
493        for pattern in workflow_patterns {
494            match pattern.as_str() {
495                "repository_overview" => {
496                    let key =
497                        CacheKey::new("repository_stats".to_string(), &serde_json::json!({}), None);
498                    let entry = CacheEntry::new(
499                        serde_json::json!({"status": "warmed", "pattern": "repository_overview"}),
500                        self.config.get_ttl_for_tool("repository_stats"),
501                    );
502                    cache.insert(key, entry);
503                }
504                "security_analysis" => {
505                    let key =
506                        CacheKey::new("analyze_security".to_string(), &serde_json::json!({}), None);
507                    let entry = CacheEntry::new(
508                        serde_json::json!({"status": "warmed", "pattern": "security_analysis"}),
509                        self.config.get_ttl_for_tool("analyze_security"),
510                    );
511                    cache.insert(key, entry);
512                }
513                "complexity_analysis" => {
514                    let key = CacheKey::new(
515                        "analyze_complexity".to_string(),
516                        &serde_json::json!({}),
517                        None,
518                    );
519                    let entry = CacheEntry::new(
520                        serde_json::json!({"status": "warmed", "pattern": "complexity_analysis"}),
521                        self.config.get_ttl_for_tool("analyze_complexity"),
522                    );
523                    cache.insert(key, entry);
524                }
525                _ => {} // Ignore unknown patterns
526            }
527        }
528
529        self.update_memory_stats(&cache)?;
530        Ok(())
531    }
532
533    /// Invalidate cache entries based on code changes
534    pub fn invalidate_by_pattern(&self, pattern: &str) -> Result<usize> {
535        let mut cache = self
536            .cache
537            .write()
538            .map_err(|_| anyhow::anyhow!("Failed to acquire write lock on cache"))?;
539
540        let mut invalidated = 0;
541        let mut keys_to_remove = Vec::new();
542
543        for (key, _) in cache.iter() {
544            // Simple pattern matching - in real implementation would be more sophisticated
545            if key.tool_name.contains(pattern)
546                || key.target.as_ref().is_some_and(|t| t.contains(pattern))
547            {
548                keys_to_remove.push(key.clone());
549                invalidated += 1;
550            }
551        }
552
553        for key in keys_to_remove {
554            cache.remove(&key);
555        }
556
557        self.update_memory_stats(&cache)?;
558        Ok(invalidated)
559    }
560
561    /// Get advanced cache statistics for optimization
562    pub fn get_advanced_stats(&self) -> Result<AdvancedCacheStats> {
563        let cache = self
564            .cache
565            .read()
566            .map_err(|_| anyhow::anyhow!("Failed to acquire read lock on cache"))?;
567
568        let total_entries = cache.len();
569        let memory_usage_bytes: usize = cache.values().map(|entry| entry.size_bytes).sum();
570
571        // Calculate hit/miss ratios by tool
572        let mut tool_stats = HashMap::new();
573        for key in cache.keys() {
574            let stats = tool_stats
575                .entry(key.tool_name.clone())
576                .or_insert(ToolCacheStats {
577                    hits: 0,
578                    misses: 0,
579                    total_size: 0,
580                    average_ttl: Duration::from_secs(0),
581                });
582            stats.total_size += 1;
583        }
584
585        Ok(AdvancedCacheStats {
586            total_entries,
587            memory_usage_bytes,
588            memory_efficiency: if memory_usage_bytes > 0 {
589                total_entries as f64 / memory_usage_bytes as f64
590            } else {
591                0.0
592            },
593            tool_performance: tool_stats,
594            fragmentation_ratio: 0.1, // Placeholder - would calculate actual fragmentation
595            recommended_cleanup: memory_usage_bytes > self.config.max_memory_bytes / 2,
596        })
597    }
598
599    /// Persist cache to storage for recovery
600    pub fn persist_to_storage(&self, file_path: &str) -> Result<()> {
601        let cache = self
602            .cache
603            .read()
604            .map_err(|_| anyhow::anyhow!("Failed to acquire read lock on cache"))?;
605
606        let serialized = serde_json::to_string_pretty(&*cache)?;
607        std::fs::write(file_path, serialized)?;
608        Ok(())
609    }
610
611    /// Restore cache from persistent storage
612    pub fn restore_from_storage(&self, file_path: &str) -> Result<()> {
613        if std::path::Path::new(file_path).exists() {
614            let content = std::fs::read_to_string(file_path)?;
615            let entries: HashMap<CacheKey, CacheEntry> = serde_json::from_str(&content)?;
616
617            let mut cache = self
618                .cache
619                .write()
620                .map_err(|_| anyhow::anyhow!("Failed to acquire write lock on cache"))?;
621
622            // Merge with existing entries, keeping newer ones
623            for (key, entry) in entries {
624                cache.entry(key).or_insert(entry);
625            }
626
627            self.update_memory_stats(&cache)?;
628        }
629        Ok(())
630    }
631
632    /// Optimize cache performance by reorganizing entries
633    pub fn optimize_cache(&self) -> Result<CacheOptimizationResult> {
634        let mut cache = self
635            .cache
636            .write()
637            .map_err(|_| anyhow::anyhow!("Failed to acquire write lock on cache"))?;
638
639        let initial_count = cache.len();
640        let initial_memory: usize = cache.values().map(|e| e.size_bytes).sum();
641
642        // Remove expired entries
643        cache.retain(|_, entry| !entry.is_expired());
644
645        // Apply LRU eviction if over limits
646        self.maybe_evict_entries(&mut cache)?;
647
648        let final_count = cache.len();
649        let final_memory: usize = cache.values().map(|e| e.size_bytes).sum();
650
651        self.update_memory_stats(&cache)?;
652
653        Ok(CacheOptimizationResult {
654            entries_before: initial_count,
655            entries_after: final_count,
656            entries_removed: initial_count - final_count,
657            memory_before: initial_memory,
658            memory_after: final_memory,
659            memory_freed: initial_memory.saturating_sub(final_memory),
660        })
661    }
662}
663
664/// Result of cache optimization operation
665#[derive(Debug, Clone, Serialize)]
666pub struct CacheOptimizationResult {
667    /// Number of entries before optimization
668    pub entries_before: usize,
669    /// Number of entries after optimization
670    pub entries_after: usize,
671    /// Number of entries removed
672    pub entries_removed: usize,
673    /// Memory usage before optimization
674    pub memory_before: usize,
675    /// Memory usage after optimization
676    pub memory_after: usize,
677    /// Amount of memory freed
678    pub memory_freed: usize,
679}
680
681#[cfg(test)]
682mod tests {
683    use super::*;
684
685    #[test]
686    fn test_cache_key_creation() {
687        let params = serde_json::json!({"symbol_id": "test123"});
688        let key1 = CacheKey::new(
689            "explain_symbol".to_string(),
690            &params,
691            Some("target".to_string()),
692        );
693        let key2 = CacheKey::new(
694            "explain_symbol".to_string(),
695            &params,
696            Some("target".to_string()),
697        );
698
699        assert_eq!(key1, key2);
700        assert_eq!(key1.tool_name, "explain_symbol");
701        assert_eq!(key1.target, Some("target".to_string()));
702    }
703
704    #[test]
705    fn test_cache_entry() {
706        let result = serde_json::json!({"data": "test"});
707        let mut entry = CacheEntry::new(result.clone(), 600);
708
709        assert!(!entry.is_expired());
710        assert_eq!(entry.access_count, 0);
711
712        entry.record_access();
713        assert_eq!(entry.access_count, 1);
714    }
715
716    #[test]
717    fn test_cache_operations() {
718        let cache = AnalysisCache::new();
719        let params = serde_json::json!({"test": "value"});
720        let result = serde_json::json!({"result": "data"});
721
722        // Test miss
723        let cached = cache.get("test_tool", &params, None).unwrap();
724        assert!(cached.is_none());
725
726        // Test put and hit
727        cache
728            .put("test_tool", &params, None, result.clone())
729            .unwrap();
730        let cached = cache.get("test_tool", &params, None).unwrap();
731        assert_eq!(cached, Some(result));
732
733        // Check stats
734        let stats = cache.get_stats().unwrap();
735        assert_eq!(stats.hits, 1);
736        assert_eq!(stats.misses, 1);
737        assert_eq!(stats.hit_rate, 0.5);
738    }
739
740    #[test]
741    fn test_cache_config() {
742        let config = CacheConfig::default();
743
744        // Test tool-specific TTL
745        assert_eq!(config.get_ttl_for_tool("trace_inheritance"), 3600);
746        assert_eq!(
747            config.get_ttl_for_tool("unknown_tool"),
748            config.default_ttl_seconds
749        );
750    }
751
752    #[test]
753    fn test_cache_cleanup() {
754        let cache = AnalysisCache::new();
755        let params = serde_json::json!({"test": "value"});
756        let result = serde_json::json!({"result": "data"});
757
758        // Add an entry
759        cache.put("test_tool", &params, None, result).unwrap();
760
761        // Clear cache
762        cache.clear().unwrap();
763
764        // Verify it's empty
765        let cached = cache.get("test_tool", &params, None).unwrap();
766        assert!(cached.is_none());
767    }
768}