pytest_language_server/fixtures/mod.rs
1//! Fixture database and analysis module.
2//!
3//! This module provides the core functionality for managing pytest fixtures:
4//! - Scanning workspaces for fixture definitions
5//! - Analyzing Python files for fixtures and their usages
6//! - Resolving fixture definitions based on pytest's priority rules
7//! - Providing completion context for fixture suggestions
8
9mod analyzer;
10pub(crate) mod cli;
11pub mod decorators; // Public for testing
12mod docstring;
13mod imports;
14mod resolver;
15mod scanner;
16pub(crate) mod string_utils; // pub(crate) for inlay_hint provider access
17pub mod types;
18mod undeclared;
19
20#[allow(unused_imports)] // ParamInsertionInfo re-exported for public API via lib.rs
21pub use types::{
22 CompletionContext, FixtureCycle, FixtureDefinition, FixtureScope, FixtureUsage,
23 ParamInsertionInfo, ScopeMismatch, UndeclaredFixture,
24};
25
26use dashmap::DashMap;
27use std::collections::hash_map::DefaultHasher;
28use std::collections::HashSet;
29use std::hash::{Hash, Hasher};
30use std::path::{Path, PathBuf};
31use std::sync::Arc;
32use tracing::debug;
33
34/// An editable install discovered via `direct_url.json` + `.pth` files in site-packages.
35#[derive(Debug, Clone)]
36#[allow(dead_code)] // Fields read in tests and used for debug logging
37pub struct EditableInstall {
38 pub package_name: String,
39 pub raw_package_name: String,
40 pub source_root: PathBuf,
41 pub site_packages: PathBuf,
42}
43
44/// Cache entry for line indices: (content_hash, line_index).
45/// The content hash is used to invalidate the cache when file content changes.
46type LineIndexCacheEntry = (u64, Arc<Vec<usize>>);
47
48/// Cache entry for parsed AST: (content_hash, ast).
49/// The content hash is used to invalidate the cache when file content changes.
50type AstCacheEntry = (u64, Arc<rustpython_parser::ast::Mod>);
51
52/// Cache entry for fixture cycles: (definitions_version, cycles).
53/// The version is incremented when definitions change to invalidate the cache.
54type CycleCacheEntry = (u64, Arc<Vec<types::FixtureCycle>>);
55
56/// Cache entry for available fixtures: (definitions_version, fixtures).
57/// The version is incremented when definitions change to invalidate the cache.
58type AvailableFixturesCacheEntry = (u64, Arc<Vec<FixtureDefinition>>);
59
60/// Cache entry for imported fixtures: (content_hash, definitions_version, imported_fixture_names).
61/// Invalidated when either the file content or fixture definitions change.
62type ImportedFixturesCacheEntry = (u64, u64, Arc<HashSet<String>>);
63
64/// Maximum number of files to keep in the file content cache.
65/// When exceeded, the oldest entries are evicted to prevent unbounded memory growth.
66const MAX_FILE_CACHE_SIZE: usize = 2000;
67
68/// The central database for fixture definitions and usages.
69///
70/// Uses `DashMap` for lock-free concurrent access during workspace scanning.
71#[derive(Debug)]
72pub struct FixtureDatabase {
73 /// Map from fixture name to all its definitions (can be in multiple conftest.py files).
74 pub definitions: Arc<DashMap<String, Vec<FixtureDefinition>>>,
75 /// Reverse index: file path -> fixture names defined in that file.
76 /// Used for efficient cleanup when a file is re-analyzed.
77 pub file_definitions: Arc<DashMap<PathBuf, HashSet<String>>>,
78 /// Map from file path to fixtures used in that file.
79 pub usages: Arc<DashMap<PathBuf, Vec<FixtureUsage>>>,
80 /// Reverse index: fixture name -> (file_path, usage) pairs.
81 /// Used for efficient O(1) lookup in find_references_for_definition.
82 pub usage_by_fixture: Arc<DashMap<String, Vec<(PathBuf, FixtureUsage)>>>,
83 /// Cache of file contents for analyzed files (uses Arc for efficient sharing).
84 pub file_cache: Arc<DashMap<PathBuf, Arc<String>>>,
85 /// Map from file path to undeclared fixtures used in function bodies.
86 pub undeclared_fixtures: Arc<DashMap<PathBuf, Vec<UndeclaredFixture>>>,
87 /// Map from file path to imported names in that file.
88 pub imports: Arc<DashMap<PathBuf, HashSet<String>>>,
89 /// Cache of canonical paths to avoid repeated filesystem calls.
90 pub canonical_path_cache: Arc<DashMap<PathBuf, PathBuf>>,
91 /// Cache of line indices (byte offsets) for files to avoid recomputation.
92 /// Stores (content_hash, line_index) to invalidate when content changes.
93 pub line_index_cache: Arc<DashMap<PathBuf, LineIndexCacheEntry>>,
94 /// Cache of parsed AST for files to avoid re-parsing.
95 /// Stores (content_hash, ast) to invalidate when content changes.
96 pub ast_cache: Arc<DashMap<PathBuf, AstCacheEntry>>,
97 /// Version counter for definitions, incremented on each change.
98 /// Used to invalidate cycle detection cache and available fixtures cache.
99 pub definitions_version: Arc<std::sync::atomic::AtomicU64>,
100 /// Cache of detected fixture cycles.
101 /// Stores (definitions_version, cycles) to invalidate when definitions change.
102 pub cycle_cache: Arc<DashMap<(), CycleCacheEntry>>,
103 /// Cache of available fixtures per file.
104 /// Stores (definitions_version, fixtures) to invalidate when definitions change.
105 pub available_fixtures_cache: Arc<DashMap<PathBuf, AvailableFixturesCacheEntry>>,
106 /// Cache of imported fixtures per file.
107 /// Stores (content_hash, definitions_version, fixture_names) for invalidation.
108 pub imported_fixtures_cache: Arc<DashMap<PathBuf, ImportedFixturesCacheEntry>>,
109 /// Discovered site-packages paths from venv scanning.
110 /// Used for resolving absolute imports in venv plugin modules.
111 pub site_packages_paths: Arc<std::sync::Mutex<Vec<PathBuf>>>,
112 /// Discovered editable installs from venv scanning.
113 pub editable_install_roots: Arc<std::sync::Mutex<Vec<EditableInstall>>>,
114 /// Workspace root path, set during scan. Used to distinguish in-workspace editables.
115 pub workspace_root: Arc<std::sync::Mutex<Option<PathBuf>>>,
116 /// Files discovered via pytest11 entry point plugins.
117 /// Used to mark fixtures from these files as `is_plugin` so the resolver
118 /// can find them even when they are not in conftest.py or site-packages.
119 pub plugin_fixture_files: Arc<DashMap<PathBuf, ()>>,
120}
121
122impl Default for FixtureDatabase {
123 fn default() -> Self {
124 Self::new()
125 }
126}
127
128impl FixtureDatabase {
129 /// Create a new empty fixture database.
130 pub fn new() -> Self {
131 Self {
132 definitions: Arc::new(DashMap::new()),
133 file_definitions: Arc::new(DashMap::new()),
134 usages: Arc::new(DashMap::new()),
135 usage_by_fixture: Arc::new(DashMap::new()),
136 file_cache: Arc::new(DashMap::new()),
137 undeclared_fixtures: Arc::new(DashMap::new()),
138 imports: Arc::new(DashMap::new()),
139 canonical_path_cache: Arc::new(DashMap::new()),
140 line_index_cache: Arc::new(DashMap::new()),
141 ast_cache: Arc::new(DashMap::new()),
142 definitions_version: Arc::new(std::sync::atomic::AtomicU64::new(0)),
143 cycle_cache: Arc::new(DashMap::new()),
144 available_fixtures_cache: Arc::new(DashMap::new()),
145 imported_fixtures_cache: Arc::new(DashMap::new()),
146 site_packages_paths: Arc::new(std::sync::Mutex::new(Vec::new())),
147 editable_install_roots: Arc::new(std::sync::Mutex::new(Vec::new())),
148 workspace_root: Arc::new(std::sync::Mutex::new(None)),
149 plugin_fixture_files: Arc::new(DashMap::new()),
150 }
151 }
152
153 /// Increment the definitions version to invalidate cycle cache.
154 /// Called whenever fixture definitions are modified.
155 pub(crate) fn invalidate_cycle_cache(&self) {
156 self.definitions_version
157 .fetch_add(1, std::sync::atomic::Ordering::SeqCst);
158 }
159
160 /// Get canonical path with caching to avoid repeated filesystem calls.
161 /// Falls back to original path if canonicalization fails.
162 pub(crate) fn get_canonical_path(&self, path: PathBuf) -> PathBuf {
163 // Check cache first
164 if let Some(cached) = self.canonical_path_cache.get(&path) {
165 return cached.value().clone();
166 }
167
168 // Attempt canonicalization
169 let canonical = path.canonicalize().unwrap_or_else(|_| {
170 debug!("Could not canonicalize path {:?}, using as-is", path);
171 path.clone()
172 });
173
174 // Store in cache for future lookups
175 self.canonical_path_cache.insert(path, canonical.clone());
176 canonical
177 }
178
179 /// Get file content from cache or read from filesystem.
180 /// Returns None if file cannot be read.
181 pub(crate) fn get_file_content(&self, file_path: &Path) -> Option<Arc<String>> {
182 if let Some(cached) = self.file_cache.get(file_path) {
183 Some(Arc::clone(cached.value()))
184 } else {
185 std::fs::read_to_string(file_path).ok().map(Arc::new)
186 }
187 }
188
189 /// Get or compute line index for a file, with content-hash-based caching.
190 /// Returns Arc to avoid cloning the potentially large Vec.
191 /// The cache is invalidated when the content hash changes.
192 pub(crate) fn get_line_index(&self, file_path: &Path, content: &str) -> Arc<Vec<usize>> {
193 let content_hash = Self::hash_content(content);
194
195 // Check cache first - only use if content hash matches
196 if let Some(cached) = self.line_index_cache.get(file_path) {
197 let (cached_hash, cached_index) = cached.value();
198 if *cached_hash == content_hash {
199 return Arc::clone(cached_index);
200 }
201 }
202
203 // Build line index
204 let line_index = Self::build_line_index(content);
205 let arc_index = Arc::new(line_index);
206
207 // Store in cache with content hash
208 self.line_index_cache.insert(
209 file_path.to_path_buf(),
210 (content_hash, Arc::clone(&arc_index)),
211 );
212
213 arc_index
214 }
215
216 /// Get or parse AST for a file, with content-hash-based caching.
217 /// Returns Arc to avoid cloning the potentially large AST.
218 /// The cache is invalidated when the content hash changes.
219 pub(crate) fn get_parsed_ast(
220 &self,
221 file_path: &Path,
222 content: &str,
223 ) -> Option<Arc<rustpython_parser::ast::Mod>> {
224 let content_hash = Self::hash_content(content);
225
226 // Check cache first - only use if content hash matches
227 if let Some(cached) = self.ast_cache.get(file_path) {
228 let (cached_hash, cached_ast) = cached.value();
229 if *cached_hash == content_hash {
230 return Some(Arc::clone(cached_ast));
231 }
232 }
233
234 // Parse the content
235 let parsed = rustpython_parser::parse(content, rustpython_parser::Mode::Module, "").ok()?;
236 let arc_ast = Arc::new(parsed);
237
238 // Store in cache with content hash
239 self.ast_cache.insert(
240 file_path.to_path_buf(),
241 (content_hash, Arc::clone(&arc_ast)),
242 );
243
244 Some(arc_ast)
245 }
246
247 /// Compute a hash of the content for cache invalidation.
248 fn hash_content(content: &str) -> u64 {
249 let mut hasher = DefaultHasher::new();
250 content.hash(&mut hasher);
251 hasher.finish()
252 }
253
254 /// Check if a file path is inside an editable install that is NOT within the workspace.
255 /// Returns true if the file is from an external editable install (third-party).
256 pub(crate) fn is_editable_install_third_party(&self, file_path: &Path) -> bool {
257 let installs = self.editable_install_roots.lock().unwrap();
258 let workspace = self.workspace_root.lock().unwrap();
259
260 for install in installs.iter() {
261 if file_path.starts_with(&install.source_root) {
262 if let Some(ref ws) = *workspace {
263 // Not third-party if editable source is inside workspace
264 if install.source_root.starts_with(ws) {
265 return false;
266 }
267 // Not third-party if workspace is inside editable source
268 // (project installed editable in its own venv)
269 if ws.starts_with(&install.source_root) {
270 return false;
271 }
272 }
273 return true;
274 }
275 }
276 false
277 }
278
279 /// Remove all cached data for a file.
280 /// Called when a file is closed or deleted to prevent unbounded memory growth.
281 pub fn cleanup_file_cache(&self, file_path: &Path) {
282 // Use canonical path for consistent cleanup
283 let canonical = file_path
284 .canonicalize()
285 .unwrap_or_else(|_| file_path.to_path_buf());
286
287 debug!("Cleaning up cache for file: {:?}", canonical);
288
289 // Remove from line_index_cache
290 self.line_index_cache.remove(&canonical);
291
292 // Remove from ast_cache
293 self.ast_cache.remove(&canonical);
294
295 // Remove from file_cache
296 self.file_cache.remove(&canonical);
297
298 // Remove from available_fixtures_cache (this file's cached available fixtures)
299 self.available_fixtures_cache.remove(&canonical);
300
301 // Remove from imported_fixtures_cache
302 self.imported_fixtures_cache.remove(&canonical);
303
304 // Note: We don't remove from canonical_path_cache because:
305 // 1. It's keyed by original path, not canonical path
306 // 2. Path->canonical mappings are stable and small
307 // 3. They may be needed again if file is reopened
308
309 // Note: We don't remove definitions/usages here because:
310 // 1. They might be needed for cross-file references
311 // 2. They're cleaned up on next analyze_file call anyway
312 }
313
314 /// Evict entries from caches if they exceed the maximum size.
315 /// Called periodically to prevent unbounded memory growth in very large workspaces.
316 /// Most LSPs rely on did_close cleanup for open files; this is a safety net for
317 /// workspace scan files that accumulate over time.
318 pub(crate) fn evict_cache_if_needed(&self) {
319 // Only evict if significantly over limit to avoid frequent eviction
320 if self.file_cache.len() > MAX_FILE_CACHE_SIZE {
321 debug!(
322 "File cache size ({}) exceeds limit ({}), evicting entries",
323 self.file_cache.len(),
324 MAX_FILE_CACHE_SIZE
325 );
326
327 // Remove ~25% of entries to avoid frequent re-eviction
328 let to_remove_count = self.file_cache.len() / 4;
329 let to_remove: Vec<PathBuf> = self
330 .file_cache
331 .iter()
332 .take(to_remove_count)
333 .map(|entry| entry.key().clone())
334 .collect();
335
336 for path in to_remove {
337 self.file_cache.remove(&path);
338 // Also clean related caches for consistency
339 self.line_index_cache.remove(&path);
340 self.ast_cache.remove(&path);
341 self.available_fixtures_cache.remove(&path);
342 self.imported_fixtures_cache.remove(&path);
343 }
344
345 debug!(
346 "Cache eviction complete, new size: {}",
347 self.file_cache.len()
348 );
349 }
350 }
351}