probe_code/language/
tree_cache.rs

1use anyhow::{Context, Result};
2use std::collections::hash_map::DefaultHasher;
3use std::collections::HashMap;
4use std::hash::{Hash, Hasher};
5use std::sync::Mutex;
6use tree_sitter::Tree;
7
8lazy_static::lazy_static! {
9    /// A cache for parsed syntax trees to avoid redundant parsing
10    ///
11    /// This cache stores parsed ASTs keyed by file path and content hash.
12    /// When the same file is parsed multiple times, this avoids the overhead
13    /// of re-parsing unchanged files.
14    static ref TREE_CACHE: Mutex<HashMap<String, (Tree, u64)>> = Mutex::new(HashMap::new());
15
16    /// A counter for cache hits, used for testing
17    static ref CACHE_HITS: Mutex<usize> = Mutex::new(0);
18
19    /// A mutex for test synchronization to prevent concurrent test execution
20    static ref TEST_MUTEX: Mutex<()> = Mutex::new(());
21}
22
23/// Compute a hash of the content for cache validation
24fn compute_content_hash(content: &str) -> u64 {
25    let mut hasher = DefaultHasher::new();
26    content.hash(&mut hasher);
27    hasher.finish()
28}
29
30/// Get a cached tree if available, otherwise parse and cache the result
31///
32/// This function checks if a valid cached tree exists for the given file path
33/// and content. If found and the content hash matches, it returns the cached tree.
34/// Otherwise, it parses the content, caches the result, and returns the new tree.
35///
36/// # Arguments
37///
38/// * `file_path` - The path of the file being parsed
39/// * `content` - The content to parse
40/// * `parser` - The tree-sitter parser to use if parsing is needed
41///
42/// # Returns
43///
44/// A Result containing the parsed tree, either from cache or freshly parsed
45pub fn get_or_parse_tree(
46    file_path: &str,
47    content: &str,
48    parser: &mut tree_sitter::Parser,
49) -> Result<Tree> {
50    let content_hash = compute_content_hash(content);
51
52    // Check if debug mode is enabled
53    let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
54
55    // Try to get from cache first
56    {
57        let mut cache = TREE_CACHE
58            .lock()
59            .unwrap_or_else(|poisoned| poisoned.into_inner());
60        if let Some((cached_tree, cached_hash)) = cache.get(file_path) {
61            if cached_hash == &content_hash {
62                // Increment cache hit counter
63                {
64                    let mut hits = CACHE_HITS
65                        .lock()
66                        .unwrap_or_else(|poisoned| poisoned.into_inner());
67                    *hits += 1;
68                }
69
70                if debug_mode {
71                    println!("[DEBUG] Cache hit for file: {file_path}");
72                }
73                return Ok(cached_tree.clone());
74            } else {
75                // Content changed, explicitly remove the old entry
76                cache.remove(file_path);
77                if debug_mode {
78                    println!("[DEBUG] Cache invalidated for file: {file_path} (content changed)");
79                }
80            }
81        } else if debug_mode {
82            println!("[DEBUG] Cache miss for file: {file_path}");
83        }
84    }
85
86    // Not in cache or content changed, parse and store
87    let tree = parser
88        .parse(content, None)
89        .context(format!("Failed to parse file: {file_path}"))?;
90
91    // Store in cache
92    {
93        let mut cache = TREE_CACHE
94            .lock()
95            .unwrap_or_else(|poisoned| poisoned.into_inner());
96        cache.insert(file_path.to_string(), (tree.clone(), content_hash));
97
98        if debug_mode {
99            println!("[DEBUG] Cached parsed tree for file: {file_path}");
100            println!("[DEBUG] Current cache size: {} entries", cache.len());
101        }
102    }
103
104    Ok(tree)
105}
106
107/// Clear the entire tree cache
108///
109/// This function can be used to free memory or force re-parsing of all files.
110#[allow(dead_code)]
111pub fn clear_tree_cache() {
112    let mut cache = TREE_CACHE
113        .lock()
114        .unwrap_or_else(|poisoned| poisoned.into_inner());
115    let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
116
117    if debug_mode {
118        println!("[DEBUG] Clearing tree cache ({} entries)", cache.len());
119    }
120
121    cache.clear();
122
123    // Also reset the cache hit counter
124    let mut hits = CACHE_HITS
125        .lock()
126        .unwrap_or_else(|poisoned| poisoned.into_inner());
127    *hits = 0;
128}
129
130/// Remove a specific file from the tree cache
131///
132/// # Arguments
133///
134/// * `file_path` - The path of the file to remove from the cache
135#[allow(dead_code)]
136pub fn invalidate_cache_entry(file_path: &str) {
137    let mut cache = TREE_CACHE
138        .lock()
139        .unwrap_or_else(|poisoned| poisoned.into_inner());
140    let debug_mode = std::env::var("DEBUG").unwrap_or_default() == "1";
141
142    if cache.remove(file_path).is_some() && debug_mode {
143        println!("[DEBUG] Removed file from cache: {file_path}");
144    }
145}
146
147/// Acquire the test mutex for test synchronization
148///
149/// This function is used by tests to prevent concurrent access to the cache
150/// during test execution, which can lead to flaky tests.
151#[allow(dead_code)]
152pub fn acquire_test_mutex() -> std::sync::MutexGuard<'static, ()> {
153    TEST_MUTEX
154        .lock()
155        .unwrap_or_else(|poisoned| poisoned.into_inner())
156}
157
158/// Get the current size of the tree cache
159#[allow(dead_code)]
160pub fn get_cache_size() -> usize {
161    let cache = TREE_CACHE
162        .lock()
163        .unwrap_or_else(|poisoned| poisoned.into_inner());
164    cache.len()
165}
166
167/// Check if a specific file exists in the cache
168#[allow(dead_code)]
169pub fn is_in_cache(file_path: &str) -> bool {
170    let cache = TREE_CACHE
171        .lock()
172        .unwrap_or_else(|poisoned| poisoned.into_inner());
173    cache.contains_key(file_path)
174}
175
176/// Reset the cache hit counter (for testing)
177#[allow(dead_code)]
178pub fn reset_cache_hit_counter() {
179    let mut hits = CACHE_HITS
180        .lock()
181        .unwrap_or_else(|poisoned| poisoned.into_inner());
182    *hits = 0;
183}
184
185/// Get the current cache hit count (for testing)
186#[allow(dead_code)]
187pub fn get_cache_hit_count() -> usize {
188    let hits = CACHE_HITS
189        .lock()
190        .unwrap_or_else(|poisoned| poisoned.into_inner());
191    *hits
192}