use rustpython_ast as ast;
use std::collections::HashMap;
use std::hash::Hash;
use std::sync::{Arc, Mutex, RwLock};
pub struct StringInterner {
strings: RwLock<HashMap<String, Arc<str>>>,
}
impl StringInterner {
pub fn new() -> Self {
Self {
strings: RwLock::new(HashMap::new()),
}
}
pub fn intern(&self, s: &str) -> Arc<str> {
if let Ok(map) = self.strings.read() {
if let Some(interned) = map.get(s) {
return Arc::clone(interned);
}
}
let mut map = self.strings.write().unwrap();
if let Some(interned) = map.get(s) {
Arc::clone(interned)
} else {
let arc: Arc<str> = Arc::from(s);
map.insert(s.to_string(), Arc::clone(&arc));
arc
}
}
pub fn stats(&self) -> InternerStats {
let map = self.strings.read().unwrap();
InternerStats {
unique_strings: map.len(),
total_bytes_saved: map.iter().map(|(k, _)| k.len()).sum::<usize>()
* (std::mem::size_of::<String>() - std::mem::size_of::<Arc<str>>()),
}
}
}
impl Default for StringInterner {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone)]
pub struct InternerStats {
pub unique_strings: usize,
pub total_bytes_saved: usize,
}
pub struct AstCache {
cache: RwLock<HashMap<String, CacheEntry>>,
max_entries: usize,
}
#[derive(Clone)]
struct CacheEntry {
ast: ast::Mod,
last_modified: std::time::SystemTime,
hit_count: usize,
}
impl AstCache {
pub fn new(max_entries: usize) -> Self {
Self {
cache: RwLock::new(HashMap::new()),
max_entries,
}
}
pub fn get(&self, file_path: &str, file_modified: std::time::SystemTime) -> Option<ast::Mod> {
let mut cache = self.cache.write().ok()?;
if let Some(entry) = cache.get_mut(file_path) {
if entry.last_modified >= file_modified {
entry.hit_count += 1;
return Some(entry.ast.clone());
} else {
cache.remove(file_path);
}
}
None
}
pub fn insert(&self, file_path: String, ast: ast::Mod, file_modified: std::time::SystemTime) {
let mut cache = self.cache.write().unwrap();
if cache.len() >= self.max_entries {
if let Some(lru_key) = cache
.iter()
.min_by_key(|(_, entry)| entry.hit_count)
.map(|(k, _)| k.clone())
{
cache.remove(&lru_key);
}
}
cache.insert(
file_path,
CacheEntry {
ast,
last_modified: file_modified,
hit_count: 0,
},
);
}
pub fn stats(&self) -> CacheStats {
let cache = self.cache.read().unwrap();
let total_hits: usize = cache.values().map(|e| e.hit_count).sum();
CacheStats {
entries: cache.len(),
total_hits,
hit_ratio: if cache.len() > 0 {
total_hits as f64 / cache.len() as f64
} else {
0.0
},
}
}
pub fn clear(&self) {
self.cache.write().unwrap().clear();
}
}
#[derive(Debug, Clone)]
pub struct CacheStats {
pub entries: usize,
pub total_hits: usize,
pub hit_ratio: f64,
}
pub struct TypeCache {
cache: RwLock<HashMap<TypeQuery, TypeInfo>>,
max_entries: usize,
}
#[derive(Debug, Clone, Hash, PartialEq, Eq)]
pub struct TypeQuery {
pub file_path: String,
pub line: usize,
pub column: usize,
pub expression: String,
}
#[derive(Debug, Clone)]
pub struct TypeInfo {
pub type_name: String,
pub is_callable: bool,
pub cached_at: std::time::Instant,
pub ttl: std::time::Duration, }
impl TypeCache {
pub fn new(max_entries: usize) -> Self {
Self {
cache: RwLock::new(HashMap::new()),
max_entries,
}
}
pub fn get(&self, query: &TypeQuery) -> Option<TypeInfo> {
let mut cache = self.cache.write().ok()?;
if let Some(info) = cache.get(query) {
if info.cached_at.elapsed() < info.ttl {
return Some(info.clone());
} else {
cache.remove(query);
}
}
None
}
pub fn insert(&self, query: TypeQuery, info: TypeInfo) {
let mut cache = self.cache.write().unwrap();
if cache.len() >= self.max_entries {
let now = std::time::Instant::now();
cache.retain(|_, info| now.duration_since(info.cached_at) < info.ttl);
if cache.len() >= self.max_entries {
let oldest_keys: Vec<_> = cache
.iter()
.map(|(k, v)| (k.clone(), v.cached_at))
.collect::<Vec<_>>()
.into_iter()
.sorted_by_key(|(_, cached_at)| *cached_at)
.take(cache.len() - self.max_entries + 1)
.map(|(k, _)| k)
.collect();
for key in oldest_keys {
cache.remove(&key);
}
}
}
cache.insert(query, info);
}
pub fn stats(&self) -> CacheStats {
let cache = self.cache.read().unwrap();
CacheStats {
entries: cache.len(),
total_hits: 0, hit_ratio: 0.0,
}
}
}
pub struct PerformanceMonitor {
pub string_interner: Arc<StringInterner>,
pub ast_cache: Arc<AstCache>,
pub type_cache: Arc<TypeCache>,
start_time: std::time::Instant,
files_processed: Arc<Mutex<usize>>,
total_parse_time: Arc<Mutex<std::time::Duration>>,
total_migration_time: Arc<Mutex<std::time::Duration>>,
}
impl PerformanceMonitor {
pub fn new(max_ast_entries: usize, max_type_entries: usize) -> Self {
Self {
string_interner: Arc::new(StringInterner::new()),
ast_cache: Arc::new(AstCache::new(max_ast_entries)),
type_cache: Arc::new(TypeCache::new(max_type_entries)),
start_time: std::time::Instant::now(),
files_processed: Arc::new(Mutex::new(0)),
total_parse_time: Arc::new(Mutex::new(std::time::Duration::ZERO)),
total_migration_time: Arc::new(Mutex::new(std::time::Duration::ZERO)),
}
}
pub fn record_file_processed(&self) {
*self.files_processed.lock().unwrap() += 1;
}
pub fn record_parse_time(&self, duration: std::time::Duration) {
*self.total_parse_time.lock().unwrap() += duration;
}
pub fn record_migration_time(&self, duration: std::time::Duration) {
*self.total_migration_time.lock().unwrap() += duration;
}
pub fn summary(&self) -> PerformanceSummary {
let files_processed = *self.files_processed.lock().unwrap();
let total_parse_time = *self.total_parse_time.lock().unwrap();
let total_migration_time = *self.total_migration_time.lock().unwrap();
let total_time = self.start_time.elapsed();
PerformanceSummary {
total_time,
files_processed,
files_per_second: if total_time.as_secs() > 0 {
files_processed as f64 / total_time.as_secs_f64()
} else {
0.0
},
total_parse_time,
total_migration_time,
average_parse_time: if files_processed > 0 {
total_parse_time / files_processed as u32
} else {
std::time::Duration::ZERO
},
average_migration_time: if files_processed > 0 {
total_migration_time / files_processed as u32
} else {
std::time::Duration::ZERO
},
interner_stats: self.string_interner.stats(),
ast_cache_stats: self.ast_cache.stats(),
type_cache_stats: self.type_cache.stats(),
}
}
}
#[derive(Debug, Clone)]
pub struct PerformanceSummary {
pub total_time: std::time::Duration,
pub files_processed: usize,
pub files_per_second: f64,
pub total_parse_time: std::time::Duration,
pub total_migration_time: std::time::Duration,
pub average_parse_time: std::time::Duration,
pub average_migration_time: std::time::Duration,
pub interner_stats: InternerStats,
pub ast_cache_stats: CacheStats,
pub type_cache_stats: CacheStats,
}
impl std::fmt::Display for PerformanceSummary {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(f, "Performance Summary:")?;
writeln!(f, " Total time: {:.2?}", self.total_time)?;
writeln!(f, " Files processed: {}", self.files_processed)?;
writeln!(f, " Files per second: {:.2}", self.files_per_second)?;
writeln!(f, " Total parse time: {:.2?}", self.total_parse_time)?;
writeln!(
f,
" Total migration time: {:.2?}",
self.total_migration_time
)?;
writeln!(f, " Average parse time: {:.2?}", self.average_parse_time)?;
writeln!(
f,
" Average migration time: {:.2?}",
self.average_migration_time
)?;
writeln!(
f,
" String interner: {} unique strings",
self.interner_stats.unique_strings
)?;
writeln!(
f,
" AST cache: {} entries, {:.1}% hit rate",
self.ast_cache_stats.entries,
self.ast_cache_stats.hit_ratio * 100.0
)?;
writeln!(f, " Type cache: {} entries", self.type_cache_stats.entries)?;
Ok(())
}
}
trait SortedIterator: Iterator {
fn sorted_by_key<K, F>(self, f: F) -> std::vec::IntoIter<Self::Item>
where
Self: Sized,
F: FnMut(&Self::Item) -> K,
K: Ord,
{
let mut v: Vec<Self::Item> = self.collect();
v.sort_by_key(f);
v.into_iter()
}
}
impl<I: Iterator> SortedIterator for I {}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_string_interner() {
let interner = StringInterner::new();
let s1 = interner.intern("hello");
let s2 = interner.intern("hello");
let s3 = interner.intern("world");
assert!(Arc::ptr_eq(&s1, &s2));
assert!(!Arc::ptr_eq(&s1, &s3));
let stats = interner.stats();
assert_eq!(stats.unique_strings, 2);
}
#[test]
fn test_ast_cache() {
let cache = AstCache::new(2);
let now = std::time::SystemTime::now();
let module = ast::Mod::Module(ast::ModModule {
body: vec![],
type_ignores: vec![],
range: Default::default(),
});
cache.insert("test.py".to_string(), module.clone(), now);
let retrieved = cache.get("test.py", now);
assert!(retrieved.is_some());
let later = now + std::time::Duration::from_secs(1);
let missed = cache.get("test.py", later);
assert!(missed.is_none());
}
#[test]
fn test_type_cache() {
let cache = TypeCache::new(10);
let query = TypeQuery {
file_path: "test.py".to_string(),
line: 10,
column: 5,
expression: "func()".to_string(),
};
let info = TypeInfo {
type_name: "str".to_string(),
is_callable: false,
cached_at: std::time::Instant::now(),
ttl: std::time::Duration::from_secs(60),
};
cache.insert(query.clone(), info);
let retrieved = cache.get(&query);
assert!(retrieved.is_some());
}
}