Skip to main content

tldr_cli/commands/remaining/
ast_cache.rs

1//! AST Cache for efficient multi-analysis
2//!
3//! Provides a caching layer for parsed ASTs to prevent redundant parsing
4//! when running multiple sub-analyses on the same file (TIGER-03 mitigation).
5//!
6//! # Usage
7//!
8//! ```ignore
9//! let mut cache = AstCache::new(100);
10//! let tree = cache.get_or_parse(&path, &source)?;
11//! // Tree is cached for subsequent calls
12//! ```
13
14use std::collections::HashMap;
15use std::path::{Path, PathBuf};
16use std::time::SystemTime;
17
18use tree_sitter::{Parser, Tree};
19
20use super::error::{RemainingError, RemainingResult};
21
22/// Maximum cache size (number of ASTs to cache)
23pub const MAX_CACHE_SIZE: usize = 100;
24
25/// Cache key combining path and modification time
26#[derive(Debug, Clone, Hash, Eq, PartialEq)]
27pub struct AstCacheKey {
28    pub path: PathBuf,
29    pub mtime: Option<SystemTime>,
30}
31
32impl AstCacheKey {
33    /// Create a new cache key
34    pub fn new(path: impl Into<PathBuf>) -> Self {
35        let path = path.into();
36        let mtime = std::fs::metadata(&path).and_then(|m| m.modified()).ok();
37        Self { path, mtime }
38    }
39}
40
41/// Cache statistics
42#[derive(Debug, Clone, Default)]
43pub struct CacheStats {
44    pub hits: u64,
45    pub misses: u64,
46    pub evictions: u64,
47}
48
49/// AST cache with LRU eviction
50pub struct AstCache {
51    /// Cached trees indexed by path/mtime key
52    cache: HashMap<PathBuf, (Option<SystemTime>, Tree)>,
53    /// Maximum number of entries
54    capacity: usize,
55    /// Access order for LRU (most recent last)
56    access_order: Vec<PathBuf>,
57    /// Statistics
58    stats: CacheStats,
59}
60
61impl AstCache {
62    /// Create a new cache with specified capacity
63    pub fn new(capacity: usize) -> Self {
64        Self {
65            cache: HashMap::new(),
66            capacity,
67            access_order: Vec::new(),
68            stats: CacheStats::default(),
69        }
70    }
71
72    /// Get or parse a file, caching the result
73    pub fn get_or_parse(&mut self, path: &Path, source: &str) -> RemainingResult<&Tree> {
74        let key = AstCacheKey::new(path);
75
76        // Check if we have a valid cached entry
77        if let Some((cached_mtime, _)) = self.cache.get(path) {
78            if *cached_mtime == key.mtime {
79                self.stats.hits += 1;
80                self.update_access_order(path);
81                return Ok(&self.cache.get(path).unwrap().1);
82            }
83        }
84
85        self.stats.misses += 1;
86
87        // Parse the file using extension-aware language selection.
88        let tree = self.parse_source(source, path)?;
89
90        // Evict if at capacity
91        while self.cache.len() >= self.capacity {
92            self.evict_lru();
93        }
94
95        // Insert into cache
96        self.cache.insert(path.to_path_buf(), (key.mtime, tree));
97        self.access_order.push(path.to_path_buf());
98
99        Ok(&self.cache.get(path).unwrap().1)
100    }
101
102    /// Parse source code using a language selected from file extension.
103    fn parse_source(&self, source: &str, path: &Path) -> RemainingResult<Tree> {
104        let ext = path
105            .extension()
106            .and_then(|e| e.to_str())
107            .unwrap_or_default();
108        match ext {
109            "rs" => self.parse_rust(source, path),
110            _ => self.parse_python(source, path),
111        }
112    }
113
114    /// Parse Python source code.
115    fn parse_python(&self, source: &str, path: &Path) -> RemainingResult<Tree> {
116        let mut parser = Parser::new();
117        parser
118            .set_language(&tree_sitter_python::LANGUAGE.into())
119            .map_err(|e| RemainingError::parse_error(path, e.to_string()))?;
120
121        parser
122            .parse(source, None)
123            .ok_or_else(|| RemainingError::parse_error(path, "Failed to parse"))
124    }
125
126    /// Parse Rust source code.
127    fn parse_rust(&self, source: &str, path: &Path) -> RemainingResult<Tree> {
128        let mut parser = Parser::new();
129        parser
130            .set_language(&tree_sitter_rust::LANGUAGE.into())
131            .map_err(|e| RemainingError::parse_error(path, e.to_string()))?;
132
133        parser
134            .parse(source, None)
135            .ok_or_else(|| RemainingError::parse_error(path, "Failed to parse"))
136    }
137
138    /// Update access order for LRU
139    fn update_access_order(&mut self, path: &Path) {
140        if let Some(pos) = self.access_order.iter().position(|p| p == path) {
141            self.access_order.remove(pos);
142        }
143        self.access_order.push(path.to_path_buf());
144    }
145
146    /// Evict least recently used entry
147    fn evict_lru(&mut self) {
148        if let Some(path) = self.access_order.first().cloned() {
149            self.cache.remove(&path);
150            self.access_order.remove(0);
151            self.stats.evictions += 1;
152        }
153    }
154
155    /// Invalidate a specific path
156    pub fn invalidate(&mut self, path: &Path) {
157        self.cache.remove(path);
158        self.access_order.retain(|p| p != path);
159    }
160
161    /// Clear the entire cache
162    pub fn clear(&mut self) {
163        self.cache.clear();
164        self.access_order.clear();
165    }
166
167    /// Get cache statistics
168    pub fn stats(&self) -> &CacheStats {
169        &self.stats
170    }
171
172    /// Get current cache size
173    pub fn len(&self) -> usize {
174        self.cache.len()
175    }
176
177    /// Check if cache is empty
178    pub fn is_empty(&self) -> bool {
179        self.cache.is_empty()
180    }
181}
182
183impl Default for AstCache {
184    fn default() -> Self {
185        Self::new(MAX_CACHE_SIZE)
186    }
187}
188
189#[cfg(test)]
190mod tests {
191    use super::*;
192    use std::fs;
193    use tempfile::TempDir;
194
195    fn create_test_file(dir: &TempDir, name: &str, content: &str) -> PathBuf {
196        let path = dir.path().join(name);
197        fs::write(&path, content).unwrap();
198        path
199    }
200
201    #[test]
202    fn test_cache_hit() {
203        let temp = TempDir::new().unwrap();
204        let path = create_test_file(&temp, "test.py", "def foo(): pass");
205        let source = fs::read_to_string(&path).unwrap();
206
207        let mut cache = AstCache::new(10);
208
209        // First access - miss
210        let _ = cache.get_or_parse(&path, &source).unwrap();
211        assert_eq!(cache.stats().misses, 1);
212        assert_eq!(cache.stats().hits, 0);
213
214        // Second access - hit
215        let _ = cache.get_or_parse(&path, &source).unwrap();
216        assert_eq!(cache.stats().misses, 1);
217        assert_eq!(cache.stats().hits, 1);
218    }
219
220    #[test]
221    fn test_cache_invalidation() {
222        let temp = TempDir::new().unwrap();
223        let path = create_test_file(&temp, "test.py", "def foo(): pass");
224        let source = fs::read_to_string(&path).unwrap();
225
226        let mut cache = AstCache::new(10);
227
228        let _ = cache.get_or_parse(&path, &source).unwrap();
229        assert_eq!(cache.len(), 1);
230
231        cache.invalidate(&path);
232        assert_eq!(cache.len(), 0);
233    }
234
235    #[test]
236    fn test_cache_eviction() {
237        let temp = TempDir::new().unwrap();
238
239        let mut cache = AstCache::new(2);
240
241        for i in 0..3 {
242            let path = create_test_file(&temp, &format!("test{}.py", i), "def foo(): pass");
243            let source = fs::read_to_string(&path).unwrap();
244            let _ = cache.get_or_parse(&path, &source).unwrap();
245        }
246
247        // Should have evicted one entry
248        assert_eq!(cache.len(), 2);
249        assert_eq!(cache.stats().evictions, 1);
250    }
251
252    #[test]
253    fn test_cache_parses_rust_source() {
254        let temp = TempDir::new().unwrap();
255        let path = create_test_file(&temp, "lib.rs", "fn main() { println!(\"ok\"); }");
256        let source = fs::read_to_string(&path).unwrap();
257
258        let mut cache = AstCache::new(10);
259        let tree = cache.get_or_parse(&path, &source).unwrap();
260        assert_eq!(tree.root_node().kind(), "source_file");
261    }
262}