Skip to main content

dissolve_python/
performance.rs

1// Copyright (C) 2024 Jelmer Vernooij <jelmer@samba.org>
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//    http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Performance optimizations including caching and string interning
16
17use rustpython_ast as ast;
18use std::collections::HashMap;
19use std::hash::Hash;
20use std::sync::{Arc, Mutex, RwLock};
21
22/// String interning system for reducing memory usage
23pub struct StringInterner {
24    strings: RwLock<HashMap<String, Arc<str>>>,
25}
26
27impl StringInterner {
28    pub fn new() -> Self {
29        Self {
30            strings: RwLock::new(HashMap::new()),
31        }
32    }
33
34    /// Intern a string, returning an Arc to the shared copy
35    pub fn intern(&self, s: &str) -> Arc<str> {
36        // First try to read and see if we already have it
37        if let Ok(map) = self.strings.read() {
38            if let Some(interned) = map.get(s) {
39                return Arc::clone(interned);
40            }
41        }
42
43        // Need to insert it - get write lock
44        let mut map = self.strings.write().unwrap();
45        if let Some(interned) = map.get(s) {
46            // Someone else might have inserted it while we were waiting for write lock
47            Arc::clone(interned)
48        } else {
49            let arc: Arc<str> = Arc::from(s);
50            map.insert(s.to_string(), Arc::clone(&arc));
51            arc
52        }
53    }
54
55    /// Get statistics about the interner
56    pub fn stats(&self) -> InternerStats {
57        let map = self.strings.read().unwrap();
58        InternerStats {
59            unique_strings: map.len(),
60            total_bytes_saved: map.iter().map(|(k, _)| k.len()).sum::<usize>()
61                * (std::mem::size_of::<String>() - std::mem::size_of::<Arc<str>>()),
62        }
63    }
64}
65
66impl Default for StringInterner {
67    fn default() -> Self {
68        Self::new()
69    }
70}
71
72#[derive(Debug, Clone)]
73pub struct InternerStats {
74    pub unique_strings: usize,
75    pub total_bytes_saved: usize,
76}
77
78/// Cache for parsed ASTs to avoid re-parsing the same files
79pub struct AstCache {
80    cache: RwLock<HashMap<String, CacheEntry>>,
81    max_entries: usize,
82}
83
84#[derive(Clone)]
85struct CacheEntry {
86    ast: ast::Mod,
87    last_modified: std::time::SystemTime,
88    hit_count: usize,
89}
90
91impl AstCache {
92    pub fn new(max_entries: usize) -> Self {
93        Self {
94            cache: RwLock::new(HashMap::new()),
95            max_entries,
96        }
97    }
98
99    /// Get an AST from cache if it exists and is up to date
100    pub fn get(&self, file_path: &str, file_modified: std::time::SystemTime) -> Option<ast::Mod> {
101        let mut cache = self.cache.write().ok()?;
102
103        if let Some(entry) = cache.get_mut(file_path) {
104            if entry.last_modified >= file_modified {
105                entry.hit_count += 1;
106                return Some(entry.ast.clone());
107            } else {
108                // File was modified, remove stale entry
109                cache.remove(file_path);
110            }
111        }
112        None
113    }
114
115    /// Store an AST in the cache
116    pub fn insert(&self, file_path: String, ast: ast::Mod, file_modified: std::time::SystemTime) {
117        let mut cache = self.cache.write().unwrap();
118
119        // If we're at capacity, remove the least recently used entry
120        if cache.len() >= self.max_entries {
121            if let Some(lru_key) = cache
122                .iter()
123                .min_by_key(|(_, entry)| entry.hit_count)
124                .map(|(k, _)| k.clone())
125            {
126                cache.remove(&lru_key);
127            }
128        }
129
130        cache.insert(
131            file_path,
132            CacheEntry {
133                ast,
134                last_modified: file_modified,
135                hit_count: 0,
136            },
137        );
138    }
139
140    /// Get cache statistics
141    pub fn stats(&self) -> CacheStats {
142        let cache = self.cache.read().unwrap();
143        let total_hits: usize = cache.values().map(|e| e.hit_count).sum();
144
145        CacheStats {
146            entries: cache.len(),
147            total_hits,
148            hit_ratio: if cache.len() > 0 {
149                total_hits as f64 / cache.len() as f64
150            } else {
151                0.0
152            },
153        }
154    }
155
156    /// Clear the cache
157    pub fn clear(&self) {
158        self.cache.write().unwrap().clear();
159    }
160}
161
162#[derive(Debug, Clone)]
163pub struct CacheStats {
164    pub entries: usize,
165    pub total_hits: usize,
166    pub hit_ratio: f64,
167}
168
169/// Cache for type information to reduce LSP queries
170pub struct TypeCache {
171    cache: RwLock<HashMap<TypeQuery, TypeInfo>>,
172    max_entries: usize,
173}
174
175#[derive(Debug, Clone, Hash, PartialEq, Eq)]
176pub struct TypeQuery {
177    pub file_path: String,
178    pub line: usize,
179    pub column: usize,
180    pub expression: String,
181}
182
183#[derive(Debug, Clone)]
184pub struct TypeInfo {
185    pub type_name: String,
186    pub is_callable: bool,
187    pub cached_at: std::time::Instant,
188    pub ttl: std::time::Duration, // Time to live
189}
190
191impl TypeCache {
192    pub fn new(max_entries: usize) -> Self {
193        Self {
194            cache: RwLock::new(HashMap::new()),
195            max_entries,
196        }
197    }
198
199    pub fn get(&self, query: &TypeQuery) -> Option<TypeInfo> {
200        let mut cache = self.cache.write().ok()?;
201
202        if let Some(info) = cache.get(query) {
203            // Check if the entry has expired
204            if info.cached_at.elapsed() < info.ttl {
205                return Some(info.clone());
206            } else {
207                // Entry expired, remove it
208                cache.remove(query);
209            }
210        }
211        None
212    }
213
214    pub fn insert(&self, query: TypeQuery, info: TypeInfo) {
215        let mut cache = self.cache.write().unwrap();
216
217        // If we're at capacity, remove some expired entries first
218        if cache.len() >= self.max_entries {
219            let now = std::time::Instant::now();
220            cache.retain(|_, info| now.duration_since(info.cached_at) < info.ttl);
221
222            // If still at capacity, remove oldest entries
223            if cache.len() >= self.max_entries {
224                let oldest_keys: Vec<_> = cache
225                    .iter()
226                    .map(|(k, v)| (k.clone(), v.cached_at))
227                    .collect::<Vec<_>>()
228                    .into_iter()
229                    .sorted_by_key(|(_, cached_at)| *cached_at)
230                    .take(cache.len() - self.max_entries + 1)
231                    .map(|(k, _)| k)
232                    .collect();
233
234                for key in oldest_keys {
235                    cache.remove(&key);
236                }
237            }
238        }
239
240        cache.insert(query, info);
241    }
242
243    pub fn stats(&self) -> CacheStats {
244        let cache = self.cache.read().unwrap();
245        CacheStats {
246            entries: cache.len(),
247            total_hits: 0, // We don't track hits for type cache yet
248            hit_ratio: 0.0,
249        }
250    }
251}
252
253/// Performance monitoring and statistics
254pub struct PerformanceMonitor {
255    pub string_interner: Arc<StringInterner>,
256    pub ast_cache: Arc<AstCache>,
257    pub type_cache: Arc<TypeCache>,
258    start_time: std::time::Instant,
259    files_processed: Arc<Mutex<usize>>,
260    total_parse_time: Arc<Mutex<std::time::Duration>>,
261    total_migration_time: Arc<Mutex<std::time::Duration>>,
262}
263
264impl PerformanceMonitor {
265    pub fn new(max_ast_entries: usize, max_type_entries: usize) -> Self {
266        Self {
267            string_interner: Arc::new(StringInterner::new()),
268            ast_cache: Arc::new(AstCache::new(max_ast_entries)),
269            type_cache: Arc::new(TypeCache::new(max_type_entries)),
270            start_time: std::time::Instant::now(),
271            files_processed: Arc::new(Mutex::new(0)),
272            total_parse_time: Arc::new(Mutex::new(std::time::Duration::ZERO)),
273            total_migration_time: Arc::new(Mutex::new(std::time::Duration::ZERO)),
274        }
275    }
276
277    pub fn record_file_processed(&self) {
278        *self.files_processed.lock().unwrap() += 1;
279    }
280
281    pub fn record_parse_time(&self, duration: std::time::Duration) {
282        *self.total_parse_time.lock().unwrap() += duration;
283    }
284
285    pub fn record_migration_time(&self, duration: std::time::Duration) {
286        *self.total_migration_time.lock().unwrap() += duration;
287    }
288
289    pub fn summary(&self) -> PerformanceSummary {
290        let files_processed = *self.files_processed.lock().unwrap();
291        let total_parse_time = *self.total_parse_time.lock().unwrap();
292        let total_migration_time = *self.total_migration_time.lock().unwrap();
293        let total_time = self.start_time.elapsed();
294
295        PerformanceSummary {
296            total_time,
297            files_processed,
298            files_per_second: if total_time.as_secs() > 0 {
299                files_processed as f64 / total_time.as_secs_f64()
300            } else {
301                0.0
302            },
303            total_parse_time,
304            total_migration_time,
305            average_parse_time: if files_processed > 0 {
306                total_parse_time / files_processed as u32
307            } else {
308                std::time::Duration::ZERO
309            },
310            average_migration_time: if files_processed > 0 {
311                total_migration_time / files_processed as u32
312            } else {
313                std::time::Duration::ZERO
314            },
315            interner_stats: self.string_interner.stats(),
316            ast_cache_stats: self.ast_cache.stats(),
317            type_cache_stats: self.type_cache.stats(),
318        }
319    }
320}
321
322#[derive(Debug, Clone)]
323pub struct PerformanceSummary {
324    pub total_time: std::time::Duration,
325    pub files_processed: usize,
326    pub files_per_second: f64,
327    pub total_parse_time: std::time::Duration,
328    pub total_migration_time: std::time::Duration,
329    pub average_parse_time: std::time::Duration,
330    pub average_migration_time: std::time::Duration,
331    pub interner_stats: InternerStats,
332    pub ast_cache_stats: CacheStats,
333    pub type_cache_stats: CacheStats,
334}
335
336impl std::fmt::Display for PerformanceSummary {
337    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
338        writeln!(f, "Performance Summary:")?;
339        writeln!(f, "  Total time: {:.2?}", self.total_time)?;
340        writeln!(f, "  Files processed: {}", self.files_processed)?;
341        writeln!(f, "  Files per second: {:.2}", self.files_per_second)?;
342        writeln!(f, "  Total parse time: {:.2?}", self.total_parse_time)?;
343        writeln!(
344            f,
345            "  Total migration time: {:.2?}",
346            self.total_migration_time
347        )?;
348        writeln!(f, "  Average parse time: {:.2?}", self.average_parse_time)?;
349        writeln!(
350            f,
351            "  Average migration time: {:.2?}",
352            self.average_migration_time
353        )?;
354        writeln!(
355            f,
356            "  String interner: {} unique strings",
357            self.interner_stats.unique_strings
358        )?;
359        writeln!(
360            f,
361            "  AST cache: {} entries, {:.1}% hit rate",
362            self.ast_cache_stats.entries,
363            self.ast_cache_stats.hit_ratio * 100.0
364        )?;
365        writeln!(f, "  Type cache: {} entries", self.type_cache_stats.entries)?;
366        Ok(())
367    }
368}
369
370/// Helper trait for sorting - we need this because we can't use itertools
371trait SortedIterator: Iterator {
372    fn sorted_by_key<K, F>(self, f: F) -> std::vec::IntoIter<Self::Item>
373    where
374        Self: Sized,
375        F: FnMut(&Self::Item) -> K,
376        K: Ord,
377    {
378        let mut v: Vec<Self::Item> = self.collect();
379        v.sort_by_key(f);
380        v.into_iter()
381    }
382}
383
384impl<I: Iterator> SortedIterator for I {}
385
386#[cfg(test)]
387mod tests {
388    use super::*;
389
390    #[test]
391    fn test_string_interner() {
392        let interner = StringInterner::new();
393
394        let s1 = interner.intern("hello");
395        let s2 = interner.intern("hello");
396        let s3 = interner.intern("world");
397
398        // Same string should return same Arc
399        assert!(Arc::ptr_eq(&s1, &s2));
400        assert!(!Arc::ptr_eq(&s1, &s3));
401
402        let stats = interner.stats();
403        assert_eq!(stats.unique_strings, 2);
404    }
405
406    #[test]
407    fn test_ast_cache() {
408        let cache = AstCache::new(2);
409        let now = std::time::SystemTime::now();
410
411        // Create a dummy AST
412        let module = ast::Mod::Module(ast::ModModule {
413            body: vec![],
414            type_ignores: vec![],
415            range: Default::default(),
416        });
417
418        // Insert and retrieve
419        cache.insert("test.py".to_string(), module.clone(), now);
420        let retrieved = cache.get("test.py", now);
421        assert!(retrieved.is_some());
422
423        // Should miss for modified file
424        let later = now + std::time::Duration::from_secs(1);
425        let missed = cache.get("test.py", later);
426        assert!(missed.is_none());
427    }
428
429    #[test]
430    fn test_type_cache() {
431        let cache = TypeCache::new(10);
432
433        let query = TypeQuery {
434            file_path: "test.py".to_string(),
435            line: 10,
436            column: 5,
437            expression: "func()".to_string(),
438        };
439
440        let info = TypeInfo {
441            type_name: "str".to_string(),
442            is_callable: false,
443            cached_at: std::time::Instant::now(),
444            ttl: std::time::Duration::from_secs(60),
445        };
446
447        cache.insert(query.clone(), info);
448        let retrieved = cache.get(&query);
449        assert!(retrieved.is_some());
450    }
451}