Skip to main content

perl_lsp_performance/
lib.rs

1#![warn(missing_docs)]
2//! Performance optimizations for large projects.
3//!
4//! This module is designed for large workspace scaling, including repositories
5//! with tens of thousands of files where cache hit rates and bounded memory
6//! usage are required to keep indexing and analysis responsive for enterprise
7//! and large-file workloads.
8
9use moka::sync::Cache;
10use perl_parser_core::Node;
11use std::sync::{Arc, Mutex};
12use std::time::Duration;
13
14pub use perl_symbol_index::SymbolIndex;
15
16/// Cache for parsed ASTs with TTL.
17///
18/// Stores parsed ASTs with content hashing to avoid re-parsing unchanged files.
19/// Uses a high-performance concurrent cache with automatic eviction.
20pub struct AstCache {
21    /// Concurrent cache storage with TTL and LRU eviction
22    cache: Cache<String, CachedAst>,
23}
24
25/// A cached AST entry with metadata
26#[derive(Clone)]
27struct CachedAst {
28    /// The cached AST node
29    ast: Arc<Node>,
30    /// Hash of the source content for validation
31    content_hash: u64,
32}
33
34impl AstCache {
35    /// Create a new AST cache with the given size limit and TTL
36    pub fn new(max_size: usize, ttl_seconds: u64) -> Self {
37        let cache = Cache::builder()
38            .max_capacity(max_size as u64)
39            .time_to_live(Duration::from_secs(ttl_seconds))
40            .build();
41
42        Self { cache }
43    }
44
45    /// Get cached AST if still valid
46    pub fn get(&self, uri: &str, content: &str) -> Option<Arc<Node>> {
47        let content_hash = Self::hash_content(content);
48
49        if let Some(cached) = self.cache.get(uri) {
50            // Check if content hash matches (skip if content changed)
51            if cached.content_hash == content_hash {
52                return Some(Arc::clone(&cached.ast));
53            } else {
54                // Remove stale entry
55                self.cache.remove(uri);
56            }
57        }
58        None
59    }
60
61    /// Store AST in cache.
62    ///
63    /// Moka handles eviction automatically when capacity is reached.
64    pub fn put(&self, uri: String, content: &str, ast: Arc<Node>) {
65        let content_hash = Self::hash_content(content);
66        self.cache.insert(uri, CachedAst { ast, content_hash });
67    }
68
69    /// Clear expired entries.
70    ///
71    /// Moka handles expiration automatically, but this method is kept for API compatibility.
72    pub fn cleanup(&self) {
73        self.cache.run_pending_tasks();
74    }
75
76    fn hash_content(content: &str) -> u64 {
77        use std::collections::hash_map::DefaultHasher;
78        use std::hash::{Hash, Hasher};
79
80        let mut hasher = DefaultHasher::new();
81        content.hash(&mut hasher);
82        hasher.finish()
83    }
84}
85
86/// Incremental parsing optimizer.
87///
88/// Tracks changed regions to determine which AST nodes need reparsing.
89pub struct IncrementalParser {
90    /// Track changed regions as (start, end) byte offsets
91    changed_regions: Vec<(usize, usize)>,
92}
93
94impl Default for IncrementalParser {
95    fn default() -> Self {
96        Self::new()
97    }
98}
99
100impl IncrementalParser {
101    /// Create a new incremental parser with no changed regions
102    pub fn new() -> Self {
103        Self { changed_regions: Vec::new() }
104    }
105
106    /// Mark a region as changed.
107    ///
108    /// Overlapping regions are automatically merged.
109    pub fn mark_changed(&mut self, start: usize, end: usize) {
110        self.changed_regions.push((start, end));
111        self.merge_overlapping_regions();
112    }
113
114    /// Check if a node needs reparsing based on changed regions.
115    ///
116    /// Returns true if the node overlaps with any changed region.
117    pub fn needs_reparse(&self, node_start: usize, node_end: usize) -> bool {
118        self.changed_regions.iter().any(|(start, end)| {
119            // Check if node overlaps with any changed region
120            node_start < *end && node_end > *start
121        })
122    }
123
124    /// Clear all changed regions.
125    ///
126    /// Call after reparsing to reset the change tracking.
127    pub fn clear(&mut self) {
128        self.changed_regions.clear();
129    }
130
131    fn merge_overlapping_regions(&mut self) {
132        if self.changed_regions.len() < 2 {
133            return;
134        }
135
136        self.changed_regions.sort_by_key(|(start, _)| *start);
137
138        let mut merged = Vec::new();
139        let mut current = self.changed_regions[0];
140
141        for &(start, end) in &self.changed_regions[1..] {
142            if start <= current.1 {
143                // Overlapping or adjacent regions
144                current.1 = current.1.max(end);
145            } else {
146                // Non-overlapping region
147                merged.push(current);
148                current = (start, end);
149            }
150        }
151        merged.push(current);
152
153        self.changed_regions = merged;
154    }
155}
156
157/// Parallel processing utilities for large workspaces
158pub mod parallel {
159    use std::sync::mpsc;
160    use std::thread;
161
162    /// Process files in parallel with a worker pool.
163    ///
164    /// Distributes file processing across multiple threads for faster indexing.
165    pub fn process_files_parallel<T, F>(
166        files: Vec<String>,
167        num_workers: usize,
168        processor: F,
169    ) -> Vec<T>
170    where
171        T: Send + 'static,
172        F: Fn(String) -> T + Send + Sync + 'static,
173    {
174        let (tx, rx) = mpsc::channel();
175        let work_queue = Arc::new(Mutex::new(files));
176        let processor = Arc::new(processor);
177
178        let mut handles = vec![];
179
180        for _ in 0..num_workers {
181            let tx = tx.clone();
182            let work_queue = Arc::clone(&work_queue);
183            let processor = Arc::clone(&processor);
184
185            let handle = thread::spawn(move || {
186                loop {
187                    let file = {
188                        let Ok(mut queue) = work_queue.lock() else {
189                            break; // Exit if lock is poisoned
190                        };
191                        queue.pop()
192                    };
193
194                    match file {
195                        Some(f) => {
196                            let result = processor(f);
197                            if tx.send(result).is_err() {
198                                break; // Exit if receiver is dropped
199                            }
200                        }
201                        None => break,
202                    }
203                }
204            });
205
206            handles.push(handle);
207        }
208
209        drop(tx);
210
211        for handle in handles {
212            let _ = handle.join(); // Ignore join errors - worker threads handle errors internally
213        }
214
215        rx.into_iter().collect()
216    }
217
218    use super::*;
219}
220
221#[cfg(test)]
222mod tests {
223    use super::*;
224
225    #[test]
226    fn test_ast_cache() {
227        let cache = AstCache::new(2, 60);
228        let ast1 = Arc::new(Node::new(
229            perl_parser_core::NodeKind::Program { statements: vec![] },
230            perl_parser_core::SourceLocation { start: 0, end: 0 },
231        ));
232        let ast2 = Arc::new(Node::new(
233            perl_parser_core::NodeKind::Program { statements: vec![] },
234            perl_parser_core::SourceLocation { start: 0, end: 0 },
235        ));
236
237        cache.put("file1.pl".to_string(), "content1", ast1.clone());
238        cache.put("file2.pl".to_string(), "content2", ast2.clone());
239
240        // Should retrieve cached AST
241        assert!(cache.get("file1.pl", "content1").is_some());
242
243        // Different content should miss cache
244        assert!(cache.get("file1.pl", "different").is_none());
245
246        // Adding third item should evict oldest
247        let ast3 = Arc::new(Node::new(
248            perl_parser_core::NodeKind::Program { statements: vec![] },
249            perl_parser_core::SourceLocation { start: 0, end: 0 },
250        ));
251        cache.put("file3.pl".to_string(), "content3", ast3);
252
253        // file1 should be evicted (oldest)
254        assert!(cache.get("file1.pl", "content1").is_none());
255        assert!(cache.get("file2.pl", "content2").is_some());
256        assert!(cache.get("file3.pl", "content3").is_some());
257    }
258
259    #[test]
260    fn test_incremental_parser() {
261        let mut parser = IncrementalParser::new();
262
263        parser.mark_changed(10, 20);
264        parser.mark_changed(30, 40);
265
266        // Node overlapping with changed region
267        assert!(parser.needs_reparse(15, 25));
268        assert!(parser.needs_reparse(35, 45));
269
270        // Node not overlapping
271        assert!(!parser.needs_reparse(0, 5));
272        assert!(!parser.needs_reparse(50, 60));
273
274        // Test merging overlapping regions
275        parser.mark_changed(18, 35);
276        assert_eq!(parser.changed_regions.len(), 1);
277        assert_eq!(parser.changed_regions[0], (10, 40));
278    }
279
280    #[test]
281    fn test_symbol_index() {
282        let mut index = SymbolIndex::new();
283
284        index.add_symbol("calculate_total".to_string());
285        index.add_symbol("calculate_average".to_string());
286        index.add_symbol("get_user_name".to_string());
287
288        // Prefix search
289        let results = index.search_prefix("calc");
290        assert_eq!(results.len(), 2);
291        assert!(results.contains(&"calculate_total".to_string()));
292        assert!(results.contains(&"calculate_average".to_string()));
293
294        // Fuzzy search
295        let results = index.search_fuzzy("user name");
296        assert!(results.contains(&"get_user_name".to_string()));
297    }
298
299    #[test]
300    fn test_cache_concurrent_access() {
301        use std::thread;
302
303        let cache = Arc::new(AstCache::new(100, 60));
304        let mut handles = vec![];
305
306        // Spawn multiple threads that access the cache concurrently
307        for i in 0..10 {
308            let cache_clone = Arc::clone(&cache);
309            let handle = thread::spawn(move || {
310                let ast = Arc::new(Node::new(
311                    perl_parser_core::NodeKind::Program { statements: vec![] },
312                    perl_parser_core::SourceLocation { start: 0, end: 0 },
313                ));
314
315                // Perform multiple operations
316                for j in 0..50 {
317                    let key = format!("file_{}_{}.pl", i, j);
318                    let content = format!("content_{}", j);
319
320                    // Put
321                    cache_clone.put(key.clone(), &content, ast.clone());
322
323                    // Get
324                    let _ = cache_clone.get(&key, &content);
325
326                    // Cleanup (occasionally)
327                    if j % 10 == 0 {
328                        cache_clone.cleanup();
329                    }
330                }
331            });
332            handles.push(handle);
333        }
334
335        // Wait for all threads to complete
336        for handle in handles {
337            let res = handle.join();
338            assert!(res.is_ok(), "Thread should not panic");
339        }
340
341        // Cache should still be functional
342        let ast = Arc::new(Node::new(
343            perl_parser_core::NodeKind::Program { statements: vec![] },
344            perl_parser_core::SourceLocation { start: 0, end: 0 },
345        ));
346        cache.put("final.pl".to_string(), "final", ast.clone());
347        assert!(cache.get("final.pl", "final").is_some());
348    }
349
350    #[test]
351    fn test_cache_ttl_expiration() {
352        use std::thread;
353        use std::time::Duration;
354
355        // Create cache with 1 second TTL
356        let cache = AstCache::new(10, 1);
357        let ast = Arc::new(Node::new(
358            perl_parser_core::NodeKind::Program { statements: vec![] },
359            perl_parser_core::SourceLocation { start: 0, end: 0 },
360        ));
361
362        cache.put("test.pl".to_string(), "content", ast.clone());
363
364        // Should be cached immediately
365        assert!(cache.get("test.pl", "content").is_some());
366
367        // Wait for TTL to expire
368        thread::sleep(Duration::from_millis(1100));
369
370        // Should be expired now
371        assert!(cache.get("test.pl", "content").is_none());
372    }
373
374    #[test]
375    fn test_cache_content_hash_validation() {
376        let cache = AstCache::new(10, 60);
377        let ast1 = Arc::new(Node::new(
378            perl_parser_core::NodeKind::Program { statements: vec![] },
379            perl_parser_core::SourceLocation { start: 0, end: 0 },
380        ));
381        let ast2 = Arc::new(Node::new(
382            perl_parser_core::NodeKind::Program { statements: vec![] },
383            perl_parser_core::SourceLocation { start: 1, end: 1 },
384        ));
385
386        // Cache with original content
387        cache.put("test.pl".to_string(), "original content", ast1.clone());
388        assert!(cache.get("test.pl", "original content").is_some());
389
390        // Different content should invalidate cache
391        assert!(cache.get("test.pl", "modified content").is_none());
392
393        // Update with new content and AST
394        cache.put("test.pl".to_string(), "modified content", ast2.clone());
395        assert!(cache.get("test.pl", "modified content").is_some());
396        assert!(cache.get("test.pl", "original content").is_none());
397    }
398
399    #[test]
400    fn test_parallel_processing_graceful_degradation() {
401        use std::sync::atomic::{AtomicUsize, Ordering};
402
403        let processed = Arc::new(AtomicUsize::new(0));
404        let files = vec!["file1.pl".to_string(), "file2.pl".to_string(), "file3.pl".to_string()];
405
406        let processed_clone = Arc::clone(&processed);
407        let results = parallel::process_files_parallel(files, 2, move |_file| {
408            processed_clone.fetch_add(1, Ordering::SeqCst);
409            42 // Return a value
410        });
411
412        // All files should be processed
413        assert_eq!(results.len(), 3);
414        assert_eq!(processed.load(Ordering::SeqCst), 3);
415        assert!(results.iter().all(|&x| x == 42));
416    }
417}