pytest_language_server/fixtures/mod.rs
1//! Fixture database and analysis module.
2//!
3//! This module provides the core functionality for managing pytest fixtures:
4//! - Scanning workspaces for fixture definitions
5//! - Analyzing Python files for fixtures and their usages
6//! - Resolving fixture definitions based on pytest's priority rules
7//! - Providing completion context for fixture suggestions
8
9mod analyzer;
10pub(crate) mod cli;
11pub mod decorators; // Public for testing
12mod docstring;
13mod imports;
14mod resolver;
15mod scanner;
16pub(crate) mod string_utils; // pub(crate) for inlay_hint provider access
17pub mod types;
18mod undeclared;
19
20#[allow(unused_imports)] // ParamInsertionInfo re-exported for public API via lib.rs
21pub use types::{
22 CompletionContext, FixtureCycle, FixtureDefinition, FixtureScope, FixtureUsage,
23 ParamInsertionInfo, ScopeMismatch, UndeclaredFixture,
24};
25
26use dashmap::DashMap;
27use std::collections::hash_map::DefaultHasher;
28use std::collections::HashSet;
29use std::hash::{Hash, Hasher};
30use std::path::{Path, PathBuf};
31use std::sync::Arc;
32use tracing::debug;
33
34/// An editable install discovered via `direct_url.json` + `.pth` files in site-packages.
35#[derive(Debug, Clone)]
36#[allow(dead_code)] // Fields read in tests and used for debug logging
37pub struct EditableInstall {
38 pub package_name: String,
39 pub raw_package_name: String,
40 pub source_root: PathBuf,
41 pub site_packages: PathBuf,
42}
43
44/// Cache entry for line indices: (content_hash, line_index).
45/// The content hash is used to invalidate the cache when file content changes.
46type LineIndexCacheEntry = (u64, Arc<Vec<usize>>);
47
48/// Cache entry for parsed AST: (content_hash, ast).
49/// The content hash is used to invalidate the cache when file content changes.
50type AstCacheEntry = (u64, Arc<rustpython_parser::ast::Mod>);
51
52/// Cache entry for fixture cycles: (definitions_version, cycles).
53/// The version is incremented when definitions change to invalidate the cache.
54type CycleCacheEntry = (u64, Arc<Vec<types::FixtureCycle>>);
55
56/// Cache entry for available fixtures: (definitions_version, fixtures).
57/// The version is incremented when definitions change to invalidate the cache.
58type AvailableFixturesCacheEntry = (u64, Arc<Vec<FixtureDefinition>>);
59
60/// Cache entry for imported fixtures: (content_hash, definitions_version, imported_fixture_names).
61/// Invalidated when either the file content or fixture definitions change.
62type ImportedFixturesCacheEntry = (u64, u64, Arc<HashSet<String>>);
63
64/// Maximum number of files to keep in the file content cache.
65/// When exceeded, the oldest entries are evicted to prevent unbounded memory growth.
66const MAX_FILE_CACHE_SIZE: usize = 2000;
67
68/// The central database for fixture definitions and usages.
69///
70/// Uses `DashMap` for lock-free concurrent access during workspace scanning.
71#[derive(Debug)]
72pub struct FixtureDatabase {
73 /// Map from fixture name to all its definitions (can be in multiple conftest.py files).
74 pub definitions: Arc<DashMap<String, Vec<FixtureDefinition>>>,
75 /// Reverse index: file path -> fixture names defined in that file.
76 /// Used for efficient cleanup when a file is re-analyzed.
77 pub file_definitions: Arc<DashMap<PathBuf, HashSet<String>>>,
78 /// Map from file path to fixtures used in that file.
79 pub usages: Arc<DashMap<PathBuf, Vec<FixtureUsage>>>,
80 /// Reverse index: fixture name -> (file_path, usage) pairs.
81 /// Used for efficient O(1) lookup in find_references_for_definition.
82 pub usage_by_fixture: Arc<DashMap<String, Vec<(PathBuf, FixtureUsage)>>>,
83 /// Cache of file contents for analyzed files (uses Arc for efficient sharing).
84 pub file_cache: Arc<DashMap<PathBuf, Arc<String>>>,
85 /// Map from file path to undeclared fixtures used in function bodies.
86 pub undeclared_fixtures: Arc<DashMap<PathBuf, Vec<UndeclaredFixture>>>,
87 /// Map from file path to imported names in that file.
88 pub imports: Arc<DashMap<PathBuf, HashSet<String>>>,
89 /// Cache of canonical paths to avoid repeated filesystem calls.
90 pub canonical_path_cache: Arc<DashMap<PathBuf, PathBuf>>,
91 /// Cache of line indices (byte offsets) for files to avoid recomputation.
92 /// Stores (content_hash, line_index) to invalidate when content changes.
93 pub line_index_cache: Arc<DashMap<PathBuf, LineIndexCacheEntry>>,
94 /// Cache of parsed AST for files to avoid re-parsing.
95 /// Stores (content_hash, ast) to invalidate when content changes.
96 pub ast_cache: Arc<DashMap<PathBuf, AstCacheEntry>>,
97 /// Version counter for definitions, incremented on each change.
98 /// Used to invalidate cycle detection cache and available fixtures cache.
99 pub definitions_version: Arc<std::sync::atomic::AtomicU64>,
100 /// Cache of detected fixture cycles.
101 /// Stores (definitions_version, cycles) to invalidate when definitions change.
102 pub cycle_cache: Arc<DashMap<(), CycleCacheEntry>>,
103 /// Cache of available fixtures per file.
104 /// Stores (definitions_version, fixtures) to invalidate when definitions change.
105 pub available_fixtures_cache: Arc<DashMap<PathBuf, AvailableFixturesCacheEntry>>,
106 /// Cache of imported fixtures per file.
107 /// Stores (content_hash, definitions_version, fixture_names) for invalidation.
108 pub imported_fixtures_cache: Arc<DashMap<PathBuf, ImportedFixturesCacheEntry>>,
109 /// Discovered site-packages paths from venv scanning.
110 /// Used for resolving absolute imports in venv plugin modules.
111 pub site_packages_paths: Arc<std::sync::Mutex<Vec<PathBuf>>>,
112 /// Discovered editable installs from venv scanning.
113 pub editable_install_roots: Arc<std::sync::Mutex<Vec<EditableInstall>>>,
114 /// Workspace root path, set during scan. Used to distinguish in-workspace editables.
115 pub workspace_root: Arc<std::sync::Mutex<Option<PathBuf>>>,
116}
117
118impl Default for FixtureDatabase {
119 fn default() -> Self {
120 Self::new()
121 }
122}
123
124impl FixtureDatabase {
125 /// Create a new empty fixture database.
126 pub fn new() -> Self {
127 Self {
128 definitions: Arc::new(DashMap::new()),
129 file_definitions: Arc::new(DashMap::new()),
130 usages: Arc::new(DashMap::new()),
131 usage_by_fixture: Arc::new(DashMap::new()),
132 file_cache: Arc::new(DashMap::new()),
133 undeclared_fixtures: Arc::new(DashMap::new()),
134 imports: Arc::new(DashMap::new()),
135 canonical_path_cache: Arc::new(DashMap::new()),
136 line_index_cache: Arc::new(DashMap::new()),
137 ast_cache: Arc::new(DashMap::new()),
138 definitions_version: Arc::new(std::sync::atomic::AtomicU64::new(0)),
139 cycle_cache: Arc::new(DashMap::new()),
140 available_fixtures_cache: Arc::new(DashMap::new()),
141 imported_fixtures_cache: Arc::new(DashMap::new()),
142 site_packages_paths: Arc::new(std::sync::Mutex::new(Vec::new())),
143 editable_install_roots: Arc::new(std::sync::Mutex::new(Vec::new())),
144 workspace_root: Arc::new(std::sync::Mutex::new(None)),
145 }
146 }
147
148 /// Increment the definitions version to invalidate cycle cache.
149 /// Called whenever fixture definitions are modified.
150 pub(crate) fn invalidate_cycle_cache(&self) {
151 self.definitions_version
152 .fetch_add(1, std::sync::atomic::Ordering::SeqCst);
153 }
154
155 /// Get canonical path with caching to avoid repeated filesystem calls.
156 /// Falls back to original path if canonicalization fails.
157 pub(crate) fn get_canonical_path(&self, path: PathBuf) -> PathBuf {
158 // Check cache first
159 if let Some(cached) = self.canonical_path_cache.get(&path) {
160 return cached.value().clone();
161 }
162
163 // Attempt canonicalization
164 let canonical = path.canonicalize().unwrap_or_else(|_| {
165 debug!("Could not canonicalize path {:?}, using as-is", path);
166 path.clone()
167 });
168
169 // Store in cache for future lookups
170 self.canonical_path_cache.insert(path, canonical.clone());
171 canonical
172 }
173
174 /// Get file content from cache or read from filesystem.
175 /// Returns None if file cannot be read.
176 pub(crate) fn get_file_content(&self, file_path: &Path) -> Option<Arc<String>> {
177 if let Some(cached) = self.file_cache.get(file_path) {
178 Some(Arc::clone(cached.value()))
179 } else {
180 std::fs::read_to_string(file_path).ok().map(Arc::new)
181 }
182 }
183
184 /// Get or compute line index for a file, with content-hash-based caching.
185 /// Returns Arc to avoid cloning the potentially large Vec.
186 /// The cache is invalidated when the content hash changes.
187 pub(crate) fn get_line_index(&self, file_path: &Path, content: &str) -> Arc<Vec<usize>> {
188 let content_hash = Self::hash_content(content);
189
190 // Check cache first - only use if content hash matches
191 if let Some(cached) = self.line_index_cache.get(file_path) {
192 let (cached_hash, cached_index) = cached.value();
193 if *cached_hash == content_hash {
194 return Arc::clone(cached_index);
195 }
196 }
197
198 // Build line index
199 let line_index = Self::build_line_index(content);
200 let arc_index = Arc::new(line_index);
201
202 // Store in cache with content hash
203 self.line_index_cache.insert(
204 file_path.to_path_buf(),
205 (content_hash, Arc::clone(&arc_index)),
206 );
207
208 arc_index
209 }
210
211 /// Get or parse AST for a file, with content-hash-based caching.
212 /// Returns Arc to avoid cloning the potentially large AST.
213 /// The cache is invalidated when the content hash changes.
214 pub(crate) fn get_parsed_ast(
215 &self,
216 file_path: &Path,
217 content: &str,
218 ) -> Option<Arc<rustpython_parser::ast::Mod>> {
219 let content_hash = Self::hash_content(content);
220
221 // Check cache first - only use if content hash matches
222 if let Some(cached) = self.ast_cache.get(file_path) {
223 let (cached_hash, cached_ast) = cached.value();
224 if *cached_hash == content_hash {
225 return Some(Arc::clone(cached_ast));
226 }
227 }
228
229 // Parse the content
230 let parsed = rustpython_parser::parse(content, rustpython_parser::Mode::Module, "").ok()?;
231 let arc_ast = Arc::new(parsed);
232
233 // Store in cache with content hash
234 self.ast_cache.insert(
235 file_path.to_path_buf(),
236 (content_hash, Arc::clone(&arc_ast)),
237 );
238
239 Some(arc_ast)
240 }
241
242 /// Compute a hash of the content for cache invalidation.
243 fn hash_content(content: &str) -> u64 {
244 let mut hasher = DefaultHasher::new();
245 content.hash(&mut hasher);
246 hasher.finish()
247 }
248
249 /// Check if a file path is inside an editable install that is NOT within the workspace.
250 /// Returns true if the file is from an external editable install (third-party).
251 pub(crate) fn is_editable_install_third_party(&self, file_path: &Path) -> bool {
252 let installs = self.editable_install_roots.lock().unwrap();
253 let workspace = self.workspace_root.lock().unwrap();
254
255 for install in installs.iter() {
256 if file_path.starts_with(&install.source_root) {
257 if let Some(ref ws) = *workspace {
258 // Not third-party if editable source is inside workspace
259 if install.source_root.starts_with(ws) {
260 return false;
261 }
262 // Not third-party if workspace is inside editable source
263 // (project installed editable in its own venv)
264 if ws.starts_with(&install.source_root) {
265 return false;
266 }
267 }
268 return true;
269 }
270 }
271 false
272 }
273
274 /// Remove all cached data for a file.
275 /// Called when a file is closed or deleted to prevent unbounded memory growth.
276 pub fn cleanup_file_cache(&self, file_path: &Path) {
277 // Use canonical path for consistent cleanup
278 let canonical = file_path
279 .canonicalize()
280 .unwrap_or_else(|_| file_path.to_path_buf());
281
282 debug!("Cleaning up cache for file: {:?}", canonical);
283
284 // Remove from line_index_cache
285 self.line_index_cache.remove(&canonical);
286
287 // Remove from ast_cache
288 self.ast_cache.remove(&canonical);
289
290 // Remove from file_cache
291 self.file_cache.remove(&canonical);
292
293 // Remove from available_fixtures_cache (this file's cached available fixtures)
294 self.available_fixtures_cache.remove(&canonical);
295
296 // Remove from imported_fixtures_cache
297 self.imported_fixtures_cache.remove(&canonical);
298
299 // Note: We don't remove from canonical_path_cache because:
300 // 1. It's keyed by original path, not canonical path
301 // 2. Path->canonical mappings are stable and small
302 // 3. They may be needed again if file is reopened
303
304 // Note: We don't remove definitions/usages here because:
305 // 1. They might be needed for cross-file references
306 // 2. They're cleaned up on next analyze_file call anyway
307 }
308
309 /// Evict entries from caches if they exceed the maximum size.
310 /// Called periodically to prevent unbounded memory growth in very large workspaces.
311 /// Most LSPs rely on did_close cleanup for open files; this is a safety net for
312 /// workspace scan files that accumulate over time.
313 pub(crate) fn evict_cache_if_needed(&self) {
314 // Only evict if significantly over limit to avoid frequent eviction
315 if self.file_cache.len() > MAX_FILE_CACHE_SIZE {
316 debug!(
317 "File cache size ({}) exceeds limit ({}), evicting entries",
318 self.file_cache.len(),
319 MAX_FILE_CACHE_SIZE
320 );
321
322 // Remove ~25% of entries to avoid frequent re-eviction
323 let to_remove_count = self.file_cache.len() / 4;
324 let to_remove: Vec<PathBuf> = self
325 .file_cache
326 .iter()
327 .take(to_remove_count)
328 .map(|entry| entry.key().clone())
329 .collect();
330
331 for path in to_remove {
332 self.file_cache.remove(&path);
333 // Also clean related caches for consistency
334 self.line_index_cache.remove(&path);
335 self.ast_cache.remove(&path);
336 self.available_fixtures_cache.remove(&path);
337 self.imported_fixtures_cache.remove(&path);
338 }
339
340 debug!(
341 "Cache eviction complete, new size: {}",
342 self.file_cache.len()
343 );
344 }
345 }
346}