context_creator/core/semantic/
cache.rs

1//! Modern async cache implementation using moka and parser pools
2//! Provides bounded memory usage and timeout protection
3
4use crate::core::semantic::parser_pool::ParserPoolManager;
5use crate::utils::error::ContextCreatorError;
6use moka::future::Cache;
7use std::collections::hash_map::DefaultHasher;
8use std::hash::{Hash, Hasher};
9use std::path::{Path, PathBuf};
10use std::sync::Arc;
11use std::time::Duration;
12use tokio::time::timeout;
13use tree_sitter::Tree;
14
15/// Cache key for AST storage
16#[derive(Debug, Clone, PartialEq, Eq, Hash)]
17struct CacheKey {
18    /// File path (not canonicalized to avoid panics)
19    path: PathBuf,
20    /// File content hash for validation
21    content_hash: u64,
22    /// Language of the file
23    language: String,
24}
25
26impl CacheKey {
27    fn new(path: &Path, content: &str, language: &str) -> Self {
28        let mut hasher = DefaultHasher::new();
29        content.hash(&mut hasher);
30        let content_hash = hasher.finish();
31
32        Self {
33            path: path.to_path_buf(),
34            content_hash,
35            language: language.to_string(),
36        }
37    }
38}
39
40/// Cached AST entry
41#[derive(Clone)]
42struct CacheEntry {
43    /// Parsed syntax tree (wrapped in Arc for cheap cloning)
44    tree: Arc<Tree>,
45    /// Source content (wrapped in Arc for cheap cloning)
46    content: Arc<String>,
47}
48
49/// Modern async AST cache with bounded memory and timeout protection
50#[derive(Clone)]
51pub struct AstCacheV2 {
52    /// Moka cache with automatic eviction
53    cache: Cache<CacheKey, CacheEntry>,
54    /// Parser pool manager
55    parser_pool: Arc<ParserPoolManager>,
56    /// Parsing timeout duration
57    parse_timeout: Duration,
58}
59
60impl AstCacheV2 {
61    /// Create a new AST cache with the specified capacity
62    pub fn new(capacity: u64) -> Self {
63        let cache = Cache::builder()
64            .max_capacity(capacity)
65            .time_to_live(Duration::from_secs(3600)) // 1 hour TTL
66            .build();
67
68        Self {
69            cache,
70            parser_pool: Arc::new(ParserPoolManager::new()),
71            parse_timeout: Duration::from_secs(30), // 30 second timeout
72        }
73    }
74
75    /// Create a new AST cache with custom configuration
76    pub fn with_config(capacity: u64, ttl: Duration, parse_timeout: Duration) -> Self {
77        let cache = Cache::builder()
78            .max_capacity(capacity)
79            .time_to_live(ttl)
80            .build();
81
82        Self {
83            cache,
84            parser_pool: Arc::new(ParserPoolManager::new()),
85            parse_timeout,
86        }
87    }
88
89    /// Get or parse an AST for the given file
90    pub async fn get_or_parse(
91        &self,
92        path: &Path,
93        content: &str,
94        language: &str,
95    ) -> Result<Arc<Tree>, ContextCreatorError> {
96        let key = CacheKey::new(path, content, language);
97
98        // Clone for the async block
99        let parser_pool = self.parser_pool.clone();
100        let content_clone = content.to_string();
101        let language_clone = language.to_string();
102        let path_clone = path.to_path_buf();
103        let timeout_duration = self.parse_timeout;
104
105        // Use try_get_with for fallible operations
106        let entry = self
107            .cache
108            .try_get_with(key, async move {
109                // Parse with timeout protection
110                let parse_result = timeout(timeout_duration, async {
111                    let mut parser = parser_pool.get_parser(&language_clone).await?;
112
113                    let tree = parser.parse(&content_clone, None).ok_or_else(|| {
114                        ContextCreatorError::ParseError(format!(
115                            "Failed to parse {} file: {}",
116                            language_clone,
117                            path_clone.display()
118                        ))
119                    })?;
120
121                    Ok::<Tree, ContextCreatorError>(tree)
122                })
123                .await;
124
125                match parse_result {
126                    Ok(Ok(tree)) => Ok(CacheEntry {
127                        tree: Arc::new(tree),
128                        content: Arc::new(content_clone),
129                    }),
130                    Ok(Err(e)) => Err(e),
131                    Err(_) => Err(ContextCreatorError::ParseError(format!(
132                        "Parsing timed out after {:?} for file: {}",
133                        timeout_duration,
134                        path_clone.display()
135                    ))),
136                }
137            })
138            .await
139            .map_err(|e| {
140                ContextCreatorError::ParseError(format!("Failed to cache parse result: {e}"))
141            })?;
142
143        Ok(entry.tree.clone())
144    }
145
146    /// Get cached content for a file if available
147    pub async fn get_content(
148        &self,
149        path: &Path,
150        content_hash: &str,
151        language: &str,
152    ) -> Option<Arc<String>> {
153        // Create a temporary key to check cache
154        let mut hasher = DefaultHasher::new();
155        content_hash.hash(&mut hasher);
156        let hash = hasher.finish();
157
158        let key = CacheKey {
159            path: path.to_path_buf(),
160            content_hash: hash,
161            language: language.to_string(),
162        };
163
164        self.cache
165            .get(&key)
166            .await
167            .map(|entry| entry.content.clone())
168    }
169
170    /// Clear the cache
171    pub async fn clear(&self) {
172        self.cache.invalidate_all();
173    }
174
175    /// Get current cache size
176    pub fn len(&self) -> u64 {
177        self.cache.entry_count()
178    }
179
180    /// Check if cache is empty
181    pub fn is_empty(&self) -> bool {
182        self.cache.entry_count() == 0
183    }
184
185    /// Get cache statistics
186    pub fn stats(&self) -> CacheStats {
187        CacheStats {
188            hits: 0, // Moka doesn't expose stats in the same way
189            misses: 0,
190            evictions: 0,
191            entry_count: self.cache.entry_count(),
192        }
193    }
194}
195
196/// Cache statistics
197#[derive(Debug, Clone)]
198pub struct CacheStats {
199    pub hits: u64,
200    pub misses: u64,
201    pub evictions: u64,
202    pub entry_count: u64,
203}
204
205#[cfg(test)]
206mod tests {
207    use super::*;
208
209    #[tokio::test]
210    async fn test_cache_operations() {
211        let cache = AstCacheV2::new(10);
212
213        assert_eq!(cache.len(), 0);
214        assert!(cache.is_empty());
215
216        // Parse and cache a file
217        let path = Path::new("test.rs");
218        let content = "fn main() {}";
219        let result = cache.get_or_parse(path, content, "rust").await;
220        assert!(result.is_ok());
221
222        // Give cache time to update
223        tokio::time::sleep(tokio::time::Duration::from_millis(10)).await;
224
225        // Moka cache has eventual consistency, so len() might not reflect immediately
226        // Instead check that we can retrieve the cached item
227        let result2 = cache.get_or_parse(path, content, "rust").await;
228        assert!(result2.is_ok());
229
230        // Clear cache
231        cache.clear().await;
232        tokio::time::sleep(tokio::time::Duration::from_millis(10)).await;
233    }
234
235    #[tokio::test]
236    async fn test_cache_hit() {
237        let cache = AstCacheV2::new(10);
238
239        let path = Path::new("test.py");
240        let content = "def test(): pass";
241
242        // First call - cache miss
243        let result1 = cache.get_or_parse(path, content, "python").await;
244        assert!(result1.is_ok());
245
246        // Second call - cache hit (same content)
247        let result2 = cache.get_or_parse(path, content, "python").await;
248        assert!(result2.is_ok());
249
250        // Trees should be the same (Arc comparison)
251        assert!(Arc::ptr_eq(&result1.unwrap(), &result2.unwrap()));
252    }
253
254    #[tokio::test]
255    async fn test_cache_invalidation_on_content_change() {
256        let cache = AstCacheV2::new(10);
257
258        let path = Path::new("test.js");
259        let content1 = "function test() {}";
260        let content2 = "function test2() {}";
261
262        // Parse with first content
263        let result1 = cache.get_or_parse(path, content1, "javascript").await;
264        assert!(result1.is_ok());
265
266        // Parse with different content - should not hit cache
267        let result2 = cache.get_or_parse(path, content2, "javascript").await;
268        assert!(result2.is_ok());
269
270        // Trees should be different
271        assert!(!Arc::ptr_eq(&result1.unwrap(), &result2.unwrap()));
272    }
273
274    #[tokio::test]
275    async fn test_concurrent_parsing() {
276        let cache = Arc::new(AstCacheV2::new(100));
277        let mut handles = vec![];
278
279        // Spawn multiple tasks parsing the same file
280        for _i in 0..10 {
281            let cache_clone = cache.clone();
282            let handle = tokio::spawn(async move {
283                let path = Path::new("concurrent.rs");
284                let content = "fn main() { println!(\"test\"); }";
285                cache_clone.get_or_parse(path, content, "rust").await
286            });
287            handles.push(handle);
288        }
289
290        // All should succeed
291        for handle in handles {
292            assert!(handle.await.unwrap().is_ok());
293        }
294
295        // Give cache time to update
296        tokio::time::sleep(tokio::time::Duration::from_millis(50)).await;
297
298        // With eventual consistency, just verify operations succeeded
299        // The important part is that parsing didn't happen 10 times
300        assert!(cache.len() <= 10); // At most one per concurrent request
301    }
302
303    #[tokio::test]
304    async fn test_timeout_configuration() {
305        // Create cache with very short timeout
306        let cache =
307            AstCacheV2::with_config(10, Duration::from_secs(3600), Duration::from_millis(1));
308
309        // This should complete quickly even with short timeout
310        let path = Path::new("test.rs");
311        let content = "fn main() {}";
312        let result = cache.get_or_parse(path, content, "rust").await;
313
314        // Should still succeed as parsing is fast
315        assert!(result.is_ok());
316    }
317}