1use rustpython_ast as ast;
18use std::collections::HashMap;
19use std::hash::Hash;
20use std::sync::{Arc, Mutex, RwLock};
21
22pub struct StringInterner {
24 strings: RwLock<HashMap<String, Arc<str>>>,
25}
26
27impl StringInterner {
28 pub fn new() -> Self {
29 Self {
30 strings: RwLock::new(HashMap::new()),
31 }
32 }
33
34 pub fn intern(&self, s: &str) -> Arc<str> {
36 if let Ok(map) = self.strings.read() {
38 if let Some(interned) = map.get(s) {
39 return Arc::clone(interned);
40 }
41 }
42
43 let mut map = self.strings.write().unwrap();
45 if let Some(interned) = map.get(s) {
46 Arc::clone(interned)
48 } else {
49 let arc: Arc<str> = Arc::from(s);
50 map.insert(s.to_string(), Arc::clone(&arc));
51 arc
52 }
53 }
54
55 pub fn stats(&self) -> InternerStats {
57 let map = self.strings.read().unwrap();
58 InternerStats {
59 unique_strings: map.len(),
60 total_bytes_saved: map.iter().map(|(k, _)| k.len()).sum::<usize>()
61 * (std::mem::size_of::<String>() - std::mem::size_of::<Arc<str>>()),
62 }
63 }
64}
65
66impl Default for StringInterner {
67 fn default() -> Self {
68 Self::new()
69 }
70}
71
72#[derive(Debug, Clone)]
73pub struct InternerStats {
74 pub unique_strings: usize,
75 pub total_bytes_saved: usize,
76}
77
78pub struct AstCache {
80 cache: RwLock<HashMap<String, CacheEntry>>,
81 max_entries: usize,
82}
83
84#[derive(Clone)]
85struct CacheEntry {
86 ast: ast::Mod,
87 last_modified: std::time::SystemTime,
88 hit_count: usize,
89}
90
91impl AstCache {
92 pub fn new(max_entries: usize) -> Self {
93 Self {
94 cache: RwLock::new(HashMap::new()),
95 max_entries,
96 }
97 }
98
99 pub fn get(&self, file_path: &str, file_modified: std::time::SystemTime) -> Option<ast::Mod> {
101 let mut cache = self.cache.write().ok()?;
102
103 if let Some(entry) = cache.get_mut(file_path) {
104 if entry.last_modified >= file_modified {
105 entry.hit_count += 1;
106 return Some(entry.ast.clone());
107 } else {
108 cache.remove(file_path);
110 }
111 }
112 None
113 }
114
115 pub fn insert(&self, file_path: String, ast: ast::Mod, file_modified: std::time::SystemTime) {
117 let mut cache = self.cache.write().unwrap();
118
119 if cache.len() >= self.max_entries {
121 if let Some(lru_key) = cache
122 .iter()
123 .min_by_key(|(_, entry)| entry.hit_count)
124 .map(|(k, _)| k.clone())
125 {
126 cache.remove(&lru_key);
127 }
128 }
129
130 cache.insert(
131 file_path,
132 CacheEntry {
133 ast,
134 last_modified: file_modified,
135 hit_count: 0,
136 },
137 );
138 }
139
140 pub fn stats(&self) -> CacheStats {
142 let cache = self.cache.read().unwrap();
143 let total_hits: usize = cache.values().map(|e| e.hit_count).sum();
144
145 CacheStats {
146 entries: cache.len(),
147 total_hits,
148 hit_ratio: if cache.len() > 0 {
149 total_hits as f64 / cache.len() as f64
150 } else {
151 0.0
152 },
153 }
154 }
155
156 pub fn clear(&self) {
158 self.cache.write().unwrap().clear();
159 }
160}
161
162#[derive(Debug, Clone)]
163pub struct CacheStats {
164 pub entries: usize,
165 pub total_hits: usize,
166 pub hit_ratio: f64,
167}
168
169pub struct TypeCache {
171 cache: RwLock<HashMap<TypeQuery, TypeInfo>>,
172 max_entries: usize,
173}
174
175#[derive(Debug, Clone, Hash, PartialEq, Eq)]
176pub struct TypeQuery {
177 pub file_path: String,
178 pub line: usize,
179 pub column: usize,
180 pub expression: String,
181}
182
183#[derive(Debug, Clone)]
184pub struct TypeInfo {
185 pub type_name: String,
186 pub is_callable: bool,
187 pub cached_at: std::time::Instant,
188 pub ttl: std::time::Duration, }
190
191impl TypeCache {
192 pub fn new(max_entries: usize) -> Self {
193 Self {
194 cache: RwLock::new(HashMap::new()),
195 max_entries,
196 }
197 }
198
199 pub fn get(&self, query: &TypeQuery) -> Option<TypeInfo> {
200 let mut cache = self.cache.write().ok()?;
201
202 if let Some(info) = cache.get(query) {
203 if info.cached_at.elapsed() < info.ttl {
205 return Some(info.clone());
206 } else {
207 cache.remove(query);
209 }
210 }
211 None
212 }
213
214 pub fn insert(&self, query: TypeQuery, info: TypeInfo) {
215 let mut cache = self.cache.write().unwrap();
216
217 if cache.len() >= self.max_entries {
219 let now = std::time::Instant::now();
220 cache.retain(|_, info| now.duration_since(info.cached_at) < info.ttl);
221
222 if cache.len() >= self.max_entries {
224 let oldest_keys: Vec<_> = cache
225 .iter()
226 .map(|(k, v)| (k.clone(), v.cached_at))
227 .collect::<Vec<_>>()
228 .into_iter()
229 .sorted_by_key(|(_, cached_at)| *cached_at)
230 .take(cache.len() - self.max_entries + 1)
231 .map(|(k, _)| k)
232 .collect();
233
234 for key in oldest_keys {
235 cache.remove(&key);
236 }
237 }
238 }
239
240 cache.insert(query, info);
241 }
242
243 pub fn stats(&self) -> CacheStats {
244 let cache = self.cache.read().unwrap();
245 CacheStats {
246 entries: cache.len(),
247 total_hits: 0, hit_ratio: 0.0,
249 }
250 }
251}
252
253pub struct PerformanceMonitor {
255 pub string_interner: Arc<StringInterner>,
256 pub ast_cache: Arc<AstCache>,
257 pub type_cache: Arc<TypeCache>,
258 start_time: std::time::Instant,
259 files_processed: Arc<Mutex<usize>>,
260 total_parse_time: Arc<Mutex<std::time::Duration>>,
261 total_migration_time: Arc<Mutex<std::time::Duration>>,
262}
263
264impl PerformanceMonitor {
265 pub fn new(max_ast_entries: usize, max_type_entries: usize) -> Self {
266 Self {
267 string_interner: Arc::new(StringInterner::new()),
268 ast_cache: Arc::new(AstCache::new(max_ast_entries)),
269 type_cache: Arc::new(TypeCache::new(max_type_entries)),
270 start_time: std::time::Instant::now(),
271 files_processed: Arc::new(Mutex::new(0)),
272 total_parse_time: Arc::new(Mutex::new(std::time::Duration::ZERO)),
273 total_migration_time: Arc::new(Mutex::new(std::time::Duration::ZERO)),
274 }
275 }
276
277 pub fn record_file_processed(&self) {
278 *self.files_processed.lock().unwrap() += 1;
279 }
280
281 pub fn record_parse_time(&self, duration: std::time::Duration) {
282 *self.total_parse_time.lock().unwrap() += duration;
283 }
284
285 pub fn record_migration_time(&self, duration: std::time::Duration) {
286 *self.total_migration_time.lock().unwrap() += duration;
287 }
288
289 pub fn summary(&self) -> PerformanceSummary {
290 let files_processed = *self.files_processed.lock().unwrap();
291 let total_parse_time = *self.total_parse_time.lock().unwrap();
292 let total_migration_time = *self.total_migration_time.lock().unwrap();
293 let total_time = self.start_time.elapsed();
294
295 PerformanceSummary {
296 total_time,
297 files_processed,
298 files_per_second: if total_time.as_secs() > 0 {
299 files_processed as f64 / total_time.as_secs_f64()
300 } else {
301 0.0
302 },
303 total_parse_time,
304 total_migration_time,
305 average_parse_time: if files_processed > 0 {
306 total_parse_time / files_processed as u32
307 } else {
308 std::time::Duration::ZERO
309 },
310 average_migration_time: if files_processed > 0 {
311 total_migration_time / files_processed as u32
312 } else {
313 std::time::Duration::ZERO
314 },
315 interner_stats: self.string_interner.stats(),
316 ast_cache_stats: self.ast_cache.stats(),
317 type_cache_stats: self.type_cache.stats(),
318 }
319 }
320}
321
322#[derive(Debug, Clone)]
323pub struct PerformanceSummary {
324 pub total_time: std::time::Duration,
325 pub files_processed: usize,
326 pub files_per_second: f64,
327 pub total_parse_time: std::time::Duration,
328 pub total_migration_time: std::time::Duration,
329 pub average_parse_time: std::time::Duration,
330 pub average_migration_time: std::time::Duration,
331 pub interner_stats: InternerStats,
332 pub ast_cache_stats: CacheStats,
333 pub type_cache_stats: CacheStats,
334}
335
336impl std::fmt::Display for PerformanceSummary {
337 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
338 writeln!(f, "Performance Summary:")?;
339 writeln!(f, " Total time: {:.2?}", self.total_time)?;
340 writeln!(f, " Files processed: {}", self.files_processed)?;
341 writeln!(f, " Files per second: {:.2}", self.files_per_second)?;
342 writeln!(f, " Total parse time: {:.2?}", self.total_parse_time)?;
343 writeln!(
344 f,
345 " Total migration time: {:.2?}",
346 self.total_migration_time
347 )?;
348 writeln!(f, " Average parse time: {:.2?}", self.average_parse_time)?;
349 writeln!(
350 f,
351 " Average migration time: {:.2?}",
352 self.average_migration_time
353 )?;
354 writeln!(
355 f,
356 " String interner: {} unique strings",
357 self.interner_stats.unique_strings
358 )?;
359 writeln!(
360 f,
361 " AST cache: {} entries, {:.1}% hit rate",
362 self.ast_cache_stats.entries,
363 self.ast_cache_stats.hit_ratio * 100.0
364 )?;
365 writeln!(f, " Type cache: {} entries", self.type_cache_stats.entries)?;
366 Ok(())
367 }
368}
369
370trait SortedIterator: Iterator {
372 fn sorted_by_key<K, F>(self, f: F) -> std::vec::IntoIter<Self::Item>
373 where
374 Self: Sized,
375 F: FnMut(&Self::Item) -> K,
376 K: Ord,
377 {
378 let mut v: Vec<Self::Item> = self.collect();
379 v.sort_by_key(f);
380 v.into_iter()
381 }
382}
383
384impl<I: Iterator> SortedIterator for I {}
385
386#[cfg(test)]
387mod tests {
388 use super::*;
389
390 #[test]
391 fn test_string_interner() {
392 let interner = StringInterner::new();
393
394 let s1 = interner.intern("hello");
395 let s2 = interner.intern("hello");
396 let s3 = interner.intern("world");
397
398 assert!(Arc::ptr_eq(&s1, &s2));
400 assert!(!Arc::ptr_eq(&s1, &s3));
401
402 let stats = interner.stats();
403 assert_eq!(stats.unique_strings, 2);
404 }
405
406 #[test]
407 fn test_ast_cache() {
408 let cache = AstCache::new(2);
409 let now = std::time::SystemTime::now();
410
411 let module = ast::Mod::Module(ast::ModModule {
413 body: vec![],
414 type_ignores: vec![],
415 range: Default::default(),
416 });
417
418 cache.insert("test.py".to_string(), module.clone(), now);
420 let retrieved = cache.get("test.py", now);
421 assert!(retrieved.is_some());
422
423 let later = now + std::time::Duration::from_secs(1);
425 let missed = cache.get("test.py", later);
426 assert!(missed.is_none());
427 }
428
429 #[test]
430 fn test_type_cache() {
431 let cache = TypeCache::new(10);
432
433 let query = TypeQuery {
434 file_path: "test.py".to_string(),
435 line: 10,
436 column: 5,
437 expression: "func()".to_string(),
438 };
439
440 let info = TypeInfo {
441 type_name: "str".to_string(),
442 is_callable: false,
443 cached_at: std::time::Instant::now(),
444 ttl: std::time::Duration::from_secs(60),
445 };
446
447 cache.insert(query.clone(), info);
448 let retrieved = cache.get(&query);
449 assert!(retrieved.is_some());
450 }
451}