pytest_language_server/fixtures/mod.rs
1//! Fixture database and analysis module.
2//!
3//! This module provides the core functionality for managing pytest fixtures:
4//! - Scanning workspaces for fixture definitions
5//! - Analyzing Python files for fixtures and their usages
6//! - Resolving fixture definitions based on pytest's priority rules
7//! - Providing completion context for fixture suggestions
8
9mod analyzer;
10pub(crate) mod cli;
11pub mod decorators; // Public for testing
12mod docstring;
13pub mod import_analysis;
14mod imports;
15mod resolver;
16mod scanner;
17pub(crate) mod string_utils; // pub(crate) for inlay_hint provider access
18pub mod types;
19mod undeclared;
20
21#[allow(unused_imports)] // ParamInsertionInfo re-exported for public API via lib.rs
22pub use types::{
23 CompletionContext, FixtureCycle, FixtureDefinition, FixtureScope, FixtureUsage,
24 ParamInsertionInfo, ScopeMismatch, TypeImportSpec, UndeclaredFixture,
25};
26
27use dashmap::DashMap;
28use std::collections::hash_map::DefaultHasher;
29use std::collections::{HashMap, HashSet};
30use std::hash::{Hash, Hasher};
31use std::path::{Path, PathBuf};
32use std::sync::Arc;
33use tracing::debug;
34
35/// An editable install discovered via `direct_url.json` + `.pth` files in site-packages.
36#[derive(Debug, Clone)]
37#[allow(dead_code)] // Fields read in tests and used for debug logging
38pub struct EditableInstall {
39 pub package_name: String,
40 pub raw_package_name: String,
41 pub source_root: PathBuf,
42 pub site_packages: PathBuf,
43}
44
45/// Cache entry for line indices: (content_hash, line_index).
46/// The content hash is used to invalidate the cache when file content changes.
47type LineIndexCacheEntry = (u64, Arc<Vec<usize>>);
48
49/// Cache entry for parsed AST: (content_hash, ast).
50/// The content hash is used to invalidate the cache when file content changes.
51type AstCacheEntry = (u64, Arc<rustpython_parser::ast::Mod>);
52
53/// Cache entry for fixture cycles: (definitions_version, cycles).
54/// The version is incremented when definitions change to invalidate the cache.
55type CycleCacheEntry = (u64, Arc<Vec<types::FixtureCycle>>);
56
57/// Cache entry for available fixtures: (definitions_version, fixtures).
58/// The version is incremented when definitions change to invalidate the cache.
59type AvailableFixturesCacheEntry = (u64, Arc<Vec<FixtureDefinition>>);
60
61/// Cache entry for imported fixtures: (content_hash, definitions_version, imported_fixture_names).
62/// Invalidated when either the file content or fixture definitions change.
63type ImportedFixturesCacheEntry = (u64, u64, Arc<HashSet<String>>);
64
65/// Cache entry for the name→TypeImportSpec map: (content_hash, Arc<map>).
66/// Invalidated when the file content changes (same strategy as ast_cache).
67///
68/// The map is wrapped in `Arc` so a cache hit is an O(1) refcount bump rather
69/// than a full `HashMap` clone.
70///
71/// **Size bound**: this cache is only populated by `get_name_to_import_map`, which
72/// is called from code-action and inlay-hint providers — i.e. only for files that
73/// are already in `file_cache`. Entries are evicted alongside `file_cache` entries
74/// in both `cleanup_file_cache` (per-file, on close/delete) and
75/// `evict_cache_if_needed` (bulk, when `file_cache` exceeds `MAX_FILE_CACHE_SIZE`).
76/// No independent size constant is needed.
77type NameImportMapCacheEntry = (
78 u64,
79 Arc<HashMap<String, crate::fixtures::types::TypeImportSpec>>,
80);
81
82/// Maximum number of files to keep in the file content cache.
83/// When exceeded, the oldest entries are evicted to prevent unbounded memory growth.
84const MAX_FILE_CACHE_SIZE: usize = 2000;
85
86/// The central database for fixture definitions and usages.
87///
88/// Uses `DashMap` for lock-free concurrent access during workspace scanning.
89#[derive(Debug)]
90pub struct FixtureDatabase {
91 /// Map from fixture name to all its definitions (can be in multiple conftest.py files).
92 pub definitions: Arc<DashMap<String, Vec<FixtureDefinition>>>,
93 /// Reverse index: file path -> fixture names defined in that file.
94 /// Used for efficient cleanup when a file is re-analyzed.
95 pub file_definitions: Arc<DashMap<PathBuf, HashSet<String>>>,
96 /// Map from file path to fixtures used in that file.
97 pub usages: Arc<DashMap<PathBuf, Vec<FixtureUsage>>>,
98 /// Reverse index: fixture name -> (file_path, usage) pairs.
99 /// Used for efficient O(1) lookup in find_references_for_definition.
100 pub usage_by_fixture: Arc<DashMap<String, Vec<(PathBuf, FixtureUsage)>>>,
101 /// Cache of file contents for analyzed files (uses Arc for efficient sharing).
102 pub file_cache: Arc<DashMap<PathBuf, Arc<String>>>,
103 /// Map from file path to undeclared fixtures used in function bodies.
104 pub undeclared_fixtures: Arc<DashMap<PathBuf, Vec<UndeclaredFixture>>>,
105 /// Map from file path to imported names in that file.
106 pub imports: Arc<DashMap<PathBuf, HashSet<String>>>,
107 /// Cache of canonical paths to avoid repeated filesystem calls.
108 pub canonical_path_cache: Arc<DashMap<PathBuf, PathBuf>>,
109 /// Cache of line indices (byte offsets) for files to avoid recomputation.
110 /// Stores (content_hash, line_index) to invalidate when content changes.
111 pub line_index_cache: Arc<DashMap<PathBuf, LineIndexCacheEntry>>,
112 /// Cache of parsed AST for files to avoid re-parsing.
113 /// Stores (content_hash, ast) to invalidate when content changes.
114 pub ast_cache: Arc<DashMap<PathBuf, AstCacheEntry>>,
115 /// Version counter for definitions, incremented on each change.
116 /// Used to invalidate cycle detection cache and available fixtures cache.
117 pub definitions_version: Arc<std::sync::atomic::AtomicU64>,
118 /// Cache of detected fixture cycles.
119 /// Stores (definitions_version, cycles) to invalidate when definitions change.
120 pub cycle_cache: Arc<DashMap<(), CycleCacheEntry>>,
121 /// Cache of available fixtures per file.
122 /// Stores (definitions_version, fixtures) to invalidate when definitions change.
123 pub available_fixtures_cache: Arc<DashMap<PathBuf, AvailableFixturesCacheEntry>>,
124 /// Cache of imported fixtures per file.
125 /// Stores (content_hash, definitions_version, fixture_names) for invalidation.
126 pub imported_fixtures_cache: Arc<DashMap<PathBuf, ImportedFixturesCacheEntry>>,
127 /// Discovered site-packages paths from venv scanning.
128 /// Used for resolving absolute imports in venv plugin modules.
129 pub site_packages_paths: Arc<std::sync::Mutex<Vec<PathBuf>>>,
130 /// Discovered editable installs from venv scanning.
131 pub editable_install_roots: Arc<std::sync::Mutex<Vec<EditableInstall>>>,
132 /// Workspace root path, set during scan. Used to distinguish in-workspace editables.
133 pub workspace_root: Arc<std::sync::Mutex<Option<PathBuf>>>,
134 /// Files discovered via pytest11 entry point plugins.
135 /// Used to mark fixtures from these files as `is_plugin` so the resolver
136 /// can find them even when they are not in conftest.py or site-packages.
137 pub plugin_fixture_files: Arc<DashMap<PathBuf, ()>>,
138 /// Cache of the name→TypeImportSpec map per file.
139 /// Stores (content_hash, map) so the result of `build_name_to_import_map`
140 /// is reused across code-action and inlay-hint requests without re-parsing.
141 ///
142 /// Bounded implicitly: see [`NameImportMapCacheEntry`] for the eviction strategy.
143 pub name_import_map_cache: Arc<DashMap<PathBuf, NameImportMapCacheEntry>>,
144}
145
146impl Default for FixtureDatabase {
147 fn default() -> Self {
148 Self::new()
149 }
150}
151
152impl FixtureDatabase {
153 /// Create a new empty fixture database.
154 pub fn new() -> Self {
155 Self {
156 definitions: Arc::new(DashMap::new()),
157 file_definitions: Arc::new(DashMap::new()),
158 usages: Arc::new(DashMap::new()),
159 usage_by_fixture: Arc::new(DashMap::new()),
160 file_cache: Arc::new(DashMap::new()),
161 undeclared_fixtures: Arc::new(DashMap::new()),
162 imports: Arc::new(DashMap::new()),
163 canonical_path_cache: Arc::new(DashMap::new()),
164 line_index_cache: Arc::new(DashMap::new()),
165 ast_cache: Arc::new(DashMap::new()),
166 definitions_version: Arc::new(std::sync::atomic::AtomicU64::new(0)),
167 cycle_cache: Arc::new(DashMap::new()),
168 available_fixtures_cache: Arc::new(DashMap::new()),
169 imported_fixtures_cache: Arc::new(DashMap::new()),
170 site_packages_paths: Arc::new(std::sync::Mutex::new(Vec::new())),
171 editable_install_roots: Arc::new(std::sync::Mutex::new(Vec::new())),
172 workspace_root: Arc::new(std::sync::Mutex::new(None)),
173 plugin_fixture_files: Arc::new(DashMap::new()),
174 name_import_map_cache: Arc::new(DashMap::new()),
175 }
176 }
177
178 /// Increment the definitions version to invalidate cycle cache.
179 /// Called whenever fixture definitions are modified.
180 pub(crate) fn invalidate_cycle_cache(&self) {
181 self.definitions_version
182 .fetch_add(1, std::sync::atomic::Ordering::SeqCst);
183 }
184
185 /// Get canonical path with caching to avoid repeated filesystem calls.
186 /// Falls back to original path if canonicalization fails.
187 pub(crate) fn get_canonical_path(&self, path: PathBuf) -> PathBuf {
188 // Check cache first
189 if let Some(cached) = self.canonical_path_cache.get(&path) {
190 return cached.value().clone();
191 }
192
193 // Attempt canonicalization
194 let canonical = path.canonicalize().unwrap_or_else(|_| {
195 debug!("Could not canonicalize path {:?}, using as-is", path);
196 path.clone()
197 });
198
199 // Store in cache for future lookups
200 self.canonical_path_cache.insert(path, canonical.clone());
201 canonical
202 }
203
204 /// Get file content from cache or read from filesystem.
205 /// Returns None if file cannot be read.
206 pub(crate) fn get_file_content(&self, file_path: &Path) -> Option<Arc<String>> {
207 if let Some(cached) = self.file_cache.get(file_path) {
208 Some(Arc::clone(cached.value()))
209 } else {
210 std::fs::read_to_string(file_path).ok().map(Arc::new)
211 }
212 }
213
214 /// Get or compute line index for a file, with content-hash-based caching.
215 /// Returns Arc to avoid cloning the potentially large Vec.
216 /// The cache is invalidated when the content hash changes.
217 pub(crate) fn get_line_index(&self, file_path: &Path, content: &str) -> Arc<Vec<usize>> {
218 let content_hash = Self::hash_content(content);
219
220 // Check cache first - only use if content hash matches
221 if let Some(cached) = self.line_index_cache.get(file_path) {
222 let (cached_hash, cached_index) = cached.value();
223 if *cached_hash == content_hash {
224 return Arc::clone(cached_index);
225 }
226 }
227
228 // Build line index
229 let line_index = Self::build_line_index(content);
230 let arc_index = Arc::new(line_index);
231
232 // Store in cache with content hash
233 self.line_index_cache.insert(
234 file_path.to_path_buf(),
235 (content_hash, Arc::clone(&arc_index)),
236 );
237
238 arc_index
239 }
240
241 /// Get or parse AST for a file, with content-hash-based caching.
242 /// Returns Arc to avoid cloning the potentially large AST.
243 /// The cache is invalidated when the content hash changes.
244 pub(crate) fn get_parsed_ast(
245 &self,
246 file_path: &Path,
247 content: &str,
248 ) -> Option<Arc<rustpython_parser::ast::Mod>> {
249 let content_hash = Self::hash_content(content);
250
251 // Check cache first - only use if content hash matches
252 if let Some(cached) = self.ast_cache.get(file_path) {
253 let (cached_hash, cached_ast) = cached.value();
254 if *cached_hash == content_hash {
255 return Some(Arc::clone(cached_ast));
256 }
257 }
258
259 // Parse the content
260 let parsed = rustpython_parser::parse(content, rustpython_parser::Mode::Module, "").ok()?;
261 let arc_ast = Arc::new(parsed);
262
263 // Store in cache with content hash
264 self.ast_cache.insert(
265 file_path.to_path_buf(),
266 (content_hash, Arc::clone(&arc_ast)),
267 );
268
269 Some(arc_ast)
270 }
271
272 /// Get or compute the name→[`TypeImportSpec`] map for a file, with
273 /// content-hash-based caching.
274 ///
275 /// This is the preferred way for providers to obtain a consumer-file's
276 /// import map without re-parsing on every request. The result is
277 /// recomputed only when the file content changes.
278 pub fn get_name_to_import_map(
279 &self,
280 file_path: &Path,
281 content: &str,
282 ) -> Arc<HashMap<String, crate::fixtures::types::TypeImportSpec>> {
283 let hash = Self::hash_content(content);
284
285 // Return cached value when content hasn't changed.
286 // Arc::clone is an O(1) refcount bump — no HashMap data is copied.
287 if let Some(entry) = self.name_import_map_cache.get(file_path) {
288 let (cached_hash, arc_map) = entry.value();
289 if *cached_hash == hash {
290 return Arc::clone(arc_map);
291 }
292 }
293
294 // Compute from AST (reuses ast_cache internally).
295 let map = match self.get_parsed_ast(file_path, content) {
296 Some(ast) => {
297 if let rustpython_parser::ast::Mod::Module(module) = ast.as_ref() {
298 self.build_name_to_import_map(&module.body, file_path)
299 } else {
300 HashMap::new()
301 }
302 }
303 None => HashMap::new(),
304 };
305
306 let arc_map = Arc::new(map);
307 self.name_import_map_cache
308 .insert(file_path.to_path_buf(), (hash, Arc::clone(&arc_map)));
309 arc_map
310 }
311
312 /// Compute a hash of the content for cache invalidation.
313 fn hash_content(content: &str) -> u64 {
314 let mut hasher = DefaultHasher::new();
315 content.hash(&mut hasher);
316 hasher.finish()
317 }
318
319 /// Check if a file path is inside an editable install that is NOT within the workspace.
320 /// Returns true if the file is from an external editable install (third-party).
321 pub(crate) fn is_editable_install_third_party(&self, file_path: &Path) -> bool {
322 let installs = self.editable_install_roots.lock().unwrap();
323 let workspace = self.workspace_root.lock().unwrap();
324
325 for install in installs.iter() {
326 if file_path.starts_with(&install.source_root) {
327 if let Some(ref ws) = *workspace {
328 // Not third-party if editable source is inside workspace
329 if install.source_root.starts_with(ws) {
330 return false;
331 }
332 // Not third-party if workspace is inside editable source
333 // (project installed editable in its own venv)
334 if ws.starts_with(&install.source_root) {
335 return false;
336 }
337 }
338 return true;
339 }
340 }
341 false
342 }
343
344 /// Remove all cached data for a file.
345 /// Called when a file is closed or deleted to prevent unbounded memory growth.
346 pub fn cleanup_file_cache(&self, file_path: &Path) {
347 // Use canonical path for consistent cleanup
348 let canonical = file_path
349 .canonicalize()
350 .unwrap_or_else(|_| file_path.to_path_buf());
351
352 debug!("Cleaning up cache for file: {:?}", canonical);
353
354 // Remove from line_index_cache
355 self.line_index_cache.remove(&canonical);
356
357 // Remove from ast_cache
358 self.ast_cache.remove(&canonical);
359
360 // Remove from name_import_map_cache
361 self.name_import_map_cache.remove(&canonical);
362
363 // Remove from file_cache
364 self.file_cache.remove(&canonical);
365
366 // Remove from available_fixtures_cache (this file's cached available fixtures)
367 self.available_fixtures_cache.remove(&canonical);
368
369 // Remove from imported_fixtures_cache
370 self.imported_fixtures_cache.remove(&canonical);
371
372 // Note: We don't remove from canonical_path_cache because:
373 // 1. It's keyed by original path, not canonical path
374 // 2. Path->canonical mappings are stable and small
375 // 3. They may be needed again if file is reopened
376
377 // Note: We don't remove definitions/usages here because:
378 // 1. They might be needed for cross-file references
379 // 2. They're cleaned up on next analyze_file call anyway
380 }
381
382 /// Evict entries from caches if they exceed the maximum size.
383 /// Called periodically to prevent unbounded memory growth in very large workspaces.
384 /// Most LSPs rely on did_close cleanup for open files; this is a safety net for
385 /// workspace scan files that accumulate over time.
386 pub(crate) fn evict_cache_if_needed(&self) {
387 // Only evict if significantly over limit to avoid frequent eviction
388 if self.file_cache.len() > MAX_FILE_CACHE_SIZE {
389 debug!(
390 "File cache size ({}) exceeds limit ({}), evicting entries",
391 self.file_cache.len(),
392 MAX_FILE_CACHE_SIZE
393 );
394
395 // Remove ~25% of entries to avoid frequent re-eviction
396 let to_remove_count = self.file_cache.len() / 4;
397 let to_remove: Vec<PathBuf> = self
398 .file_cache
399 .iter()
400 .take(to_remove_count)
401 .map(|entry| entry.key().clone())
402 .collect();
403
404 for path in to_remove {
405 self.file_cache.remove(&path);
406 // Also clean related caches for consistency
407 self.line_index_cache.remove(&path);
408 self.ast_cache.remove(&path);
409 self.available_fixtures_cache.remove(&path);
410 self.imported_fixtures_cache.remove(&path);
411 self.name_import_map_cache.remove(&path);
412 }
413
414 debug!(
415 "Cache eviction complete, new size: {}",
416 self.file_cache.len()
417 );
418 }
419 }
420}