Skip to main content

pytest_language_server/fixtures/
mod.rs

1//! Fixture database and analysis module.
2//!
3//! This module provides the core functionality for managing pytest fixtures:
4//! - Scanning workspaces for fixture definitions
5//! - Analyzing Python files for fixtures and their usages
6//! - Resolving fixture definitions based on pytest's priority rules
7//! - Providing completion context for fixture suggestions
8
9mod analyzer;
10pub(crate) mod cli;
11pub mod decorators; // Public for testing
12mod docstring;
13pub mod import_analysis;
14mod imports;
15mod resolver;
16mod scanner;
17pub(crate) mod string_utils; // pub(crate) for inlay_hint provider access
18pub mod types;
19mod undeclared;
20
21#[allow(unused_imports)] // ParamInsertionInfo re-exported for public API via lib.rs
22pub use types::{
23    CompletionContext, FixtureCycle, FixtureDefinition, FixtureScope, FixtureUsage,
24    ParamInsertionInfo, ScopeMismatch, TypeImportSpec, UndeclaredFixture,
25};
26
27use dashmap::DashMap;
28use std::collections::hash_map::DefaultHasher;
29use std::collections::{HashMap, HashSet};
30use std::hash::{Hash, Hasher};
31use std::path::{Path, PathBuf};
32use std::sync::Arc;
33use tracing::debug;
34
35/// An editable install discovered via `direct_url.json` + `.pth` files in site-packages.
36#[derive(Debug, Clone)]
37#[allow(dead_code)] // Fields read in tests and used for debug logging
38pub struct EditableInstall {
39    pub package_name: String,
40    pub raw_package_name: String,
41    pub source_root: PathBuf,
42    pub site_packages: PathBuf,
43}
44
45/// Cache entry for line indices: (content_hash, line_index).
46/// The content hash is used to invalidate the cache when file content changes.
47type LineIndexCacheEntry = (u64, Arc<Vec<usize>>);
48
49/// Cache entry for parsed AST: (content_hash, ast).
50/// The content hash is used to invalidate the cache when file content changes.
51type AstCacheEntry = (u64, Arc<rustpython_parser::ast::Mod>);
52
53/// Cache entry for fixture cycles: (definitions_version, cycles).
54/// The version is incremented when definitions change to invalidate the cache.
55type CycleCacheEntry = (u64, Arc<Vec<types::FixtureCycle>>);
56
57/// Cache entry for available fixtures: (definitions_version, fixtures).
58/// The version is incremented when definitions change to invalidate the cache.
59type AvailableFixturesCacheEntry = (u64, Arc<Vec<FixtureDefinition>>);
60
61/// Cache entry for imported fixtures: (content_hash, definitions_version, imported_fixture_names).
62/// Invalidated when either the file content or fixture definitions change.
63type ImportedFixturesCacheEntry = (u64, u64, Arc<HashSet<String>>);
64
65/// Cache entry for the name→TypeImportSpec map: (content_hash, Arc<map>).
66/// Invalidated when the file content changes (same strategy as ast_cache).
67///
68/// The map is wrapped in `Arc` so a cache hit is an O(1) refcount bump rather
69/// than a full `HashMap` clone.
70///
71/// **Size bound**: this cache is only populated by `get_name_to_import_map`, which
72/// is called from code-action and inlay-hint providers — i.e. only for files that
73/// are already in `file_cache`.  Entries are evicted alongside `file_cache` entries
74/// in both `cleanup_file_cache` (per-file, on close/delete) and
75/// `evict_cache_if_needed` (bulk, when `file_cache` exceeds `MAX_FILE_CACHE_SIZE`).
76/// No independent size constant is needed.
77type NameImportMapCacheEntry = (
78    u64,
79    Arc<HashMap<String, crate::fixtures::types::TypeImportSpec>>,
80);
81
82/// Maximum number of files to keep in the file content cache.
83/// When exceeded, the oldest entries are evicted to prevent unbounded memory growth.
84const MAX_FILE_CACHE_SIZE: usize = 2000;
85
86/// The central database for fixture definitions and usages.
87///
88/// Uses `DashMap` for lock-free concurrent access during workspace scanning.
89#[derive(Debug)]
90pub struct FixtureDatabase {
91    /// Map from fixture name to all its definitions (can be in multiple conftest.py files).
92    pub definitions: Arc<DashMap<String, Vec<FixtureDefinition>>>,
93    /// Reverse index: file path -> fixture names defined in that file.
94    /// Used for efficient cleanup when a file is re-analyzed.
95    pub file_definitions: Arc<DashMap<PathBuf, HashSet<String>>>,
96    /// Map from file path to fixtures used in that file.
97    pub usages: Arc<DashMap<PathBuf, Vec<FixtureUsage>>>,
98    /// Reverse index: fixture name -> (file_path, usage) pairs.
99    /// Used for efficient O(1) lookup in find_references_for_definition.
100    pub usage_by_fixture: Arc<DashMap<String, Vec<(PathBuf, FixtureUsage)>>>,
101    /// Cache of file contents for analyzed files (uses Arc for efficient sharing).
102    pub file_cache: Arc<DashMap<PathBuf, Arc<String>>>,
103    /// Map from file path to undeclared fixtures used in function bodies.
104    pub undeclared_fixtures: Arc<DashMap<PathBuf, Vec<UndeclaredFixture>>>,
105    /// Map from file path to imported names in that file.
106    pub imports: Arc<DashMap<PathBuf, HashSet<String>>>,
107    /// Cache of canonical paths to avoid repeated filesystem calls.
108    pub canonical_path_cache: Arc<DashMap<PathBuf, PathBuf>>,
109    /// Cache of line indices (byte offsets) for files to avoid recomputation.
110    /// Stores (content_hash, line_index) to invalidate when content changes.
111    pub line_index_cache: Arc<DashMap<PathBuf, LineIndexCacheEntry>>,
112    /// Cache of parsed AST for files to avoid re-parsing.
113    /// Stores (content_hash, ast) to invalidate when content changes.
114    pub ast_cache: Arc<DashMap<PathBuf, AstCacheEntry>>,
115    /// Version counter for definitions, incremented on each change.
116    /// Used to invalidate cycle detection cache and available fixtures cache.
117    pub definitions_version: Arc<std::sync::atomic::AtomicU64>,
118    /// Cache of detected fixture cycles.
119    /// Stores (definitions_version, cycles) to invalidate when definitions change.
120    pub cycle_cache: Arc<DashMap<(), CycleCacheEntry>>,
121    /// Cache of available fixtures per file.
122    /// Stores (definitions_version, fixtures) to invalidate when definitions change.
123    pub available_fixtures_cache: Arc<DashMap<PathBuf, AvailableFixturesCacheEntry>>,
124    /// Cache of imported fixtures per file.
125    /// Stores (content_hash, definitions_version, fixture_names) for invalidation.
126    pub imported_fixtures_cache: Arc<DashMap<PathBuf, ImportedFixturesCacheEntry>>,
127    /// Discovered site-packages paths from venv scanning.
128    /// Used for resolving absolute imports in venv plugin modules.
129    pub site_packages_paths: Arc<std::sync::Mutex<Vec<PathBuf>>>,
130    /// Discovered editable installs from venv scanning.
131    pub editable_install_roots: Arc<std::sync::Mutex<Vec<EditableInstall>>>,
132    /// Workspace root path, set during scan. Used to distinguish in-workspace editables.
133    pub workspace_root: Arc<std::sync::Mutex<Option<PathBuf>>>,
134    /// Files discovered via pytest11 entry point plugins.
135    /// Used to mark fixtures from these files as `is_plugin` so the resolver
136    /// can find them even when they are not in conftest.py or site-packages.
137    pub plugin_fixture_files: Arc<DashMap<PathBuf, ()>>,
138    /// Cache of the name→TypeImportSpec map per file.
139    /// Stores (content_hash, map) so the result of `build_name_to_import_map`
140    /// is reused across code-action and inlay-hint requests without re-parsing.
141    ///
142    /// Bounded implicitly: see [`NameImportMapCacheEntry`] for the eviction strategy.
143    pub name_import_map_cache: Arc<DashMap<PathBuf, NameImportMapCacheEntry>>,
144}
145
146impl Default for FixtureDatabase {
147    fn default() -> Self {
148        Self::new()
149    }
150}
151
152impl FixtureDatabase {
153    /// Create a new empty fixture database.
154    pub fn new() -> Self {
155        Self {
156            definitions: Arc::new(DashMap::new()),
157            file_definitions: Arc::new(DashMap::new()),
158            usages: Arc::new(DashMap::new()),
159            usage_by_fixture: Arc::new(DashMap::new()),
160            file_cache: Arc::new(DashMap::new()),
161            undeclared_fixtures: Arc::new(DashMap::new()),
162            imports: Arc::new(DashMap::new()),
163            canonical_path_cache: Arc::new(DashMap::new()),
164            line_index_cache: Arc::new(DashMap::new()),
165            ast_cache: Arc::new(DashMap::new()),
166            definitions_version: Arc::new(std::sync::atomic::AtomicU64::new(0)),
167            cycle_cache: Arc::new(DashMap::new()),
168            available_fixtures_cache: Arc::new(DashMap::new()),
169            imported_fixtures_cache: Arc::new(DashMap::new()),
170            site_packages_paths: Arc::new(std::sync::Mutex::new(Vec::new())),
171            editable_install_roots: Arc::new(std::sync::Mutex::new(Vec::new())),
172            workspace_root: Arc::new(std::sync::Mutex::new(None)),
173            plugin_fixture_files: Arc::new(DashMap::new()),
174            name_import_map_cache: Arc::new(DashMap::new()),
175        }
176    }
177
178    /// Increment the definitions version to invalidate cycle cache.
179    /// Called whenever fixture definitions are modified.
180    pub(crate) fn invalidate_cycle_cache(&self) {
181        self.definitions_version
182            .fetch_add(1, std::sync::atomic::Ordering::SeqCst);
183    }
184
185    /// Get canonical path with caching to avoid repeated filesystem calls.
186    /// Falls back to original path if canonicalization fails.
187    pub(crate) fn get_canonical_path(&self, path: PathBuf) -> PathBuf {
188        // Check cache first
189        if let Some(cached) = self.canonical_path_cache.get(&path) {
190            return cached.value().clone();
191        }
192
193        // Attempt canonicalization
194        let canonical = path.canonicalize().unwrap_or_else(|_| {
195            debug!("Could not canonicalize path {:?}, using as-is", path);
196            path.clone()
197        });
198
199        // Store in cache for future lookups
200        self.canonical_path_cache.insert(path, canonical.clone());
201        canonical
202    }
203
204    /// Get file content from cache or read from filesystem.
205    /// Returns None if file cannot be read.
206    pub(crate) fn get_file_content(&self, file_path: &Path) -> Option<Arc<String>> {
207        if let Some(cached) = self.file_cache.get(file_path) {
208            Some(Arc::clone(cached.value()))
209        } else {
210            std::fs::read_to_string(file_path).ok().map(Arc::new)
211        }
212    }
213
214    /// Get or compute line index for a file, with content-hash-based caching.
215    /// Returns Arc to avoid cloning the potentially large Vec.
216    /// The cache is invalidated when the content hash changes.
217    pub(crate) fn get_line_index(&self, file_path: &Path, content: &str) -> Arc<Vec<usize>> {
218        let content_hash = Self::hash_content(content);
219
220        // Check cache first - only use if content hash matches
221        if let Some(cached) = self.line_index_cache.get(file_path) {
222            let (cached_hash, cached_index) = cached.value();
223            if *cached_hash == content_hash {
224                return Arc::clone(cached_index);
225            }
226        }
227
228        // Build line index
229        let line_index = Self::build_line_index(content);
230        let arc_index = Arc::new(line_index);
231
232        // Store in cache with content hash
233        self.line_index_cache.insert(
234            file_path.to_path_buf(),
235            (content_hash, Arc::clone(&arc_index)),
236        );
237
238        arc_index
239    }
240
241    /// Get or parse AST for a file, with content-hash-based caching.
242    /// Returns Arc to avoid cloning the potentially large AST.
243    /// The cache is invalidated when the content hash changes.
244    pub(crate) fn get_parsed_ast(
245        &self,
246        file_path: &Path,
247        content: &str,
248    ) -> Option<Arc<rustpython_parser::ast::Mod>> {
249        let content_hash = Self::hash_content(content);
250
251        // Check cache first - only use if content hash matches
252        if let Some(cached) = self.ast_cache.get(file_path) {
253            let (cached_hash, cached_ast) = cached.value();
254            if *cached_hash == content_hash {
255                return Some(Arc::clone(cached_ast));
256            }
257        }
258
259        // Parse the content
260        let parsed = rustpython_parser::parse(content, rustpython_parser::Mode::Module, "").ok()?;
261        let arc_ast = Arc::new(parsed);
262
263        // Store in cache with content hash
264        self.ast_cache.insert(
265            file_path.to_path_buf(),
266            (content_hash, Arc::clone(&arc_ast)),
267        );
268
269        Some(arc_ast)
270    }
271
272    /// Get or compute the name→[`TypeImportSpec`] map for a file, with
273    /// content-hash-based caching.
274    ///
275    /// This is the preferred way for providers to obtain a consumer-file's
276    /// import map without re-parsing on every request.  The result is
277    /// recomputed only when the file content changes.
278    pub fn get_name_to_import_map(
279        &self,
280        file_path: &Path,
281        content: &str,
282    ) -> Arc<HashMap<String, crate::fixtures::types::TypeImportSpec>> {
283        let hash = Self::hash_content(content);
284
285        // Return cached value when content hasn't changed.
286        // Arc::clone is an O(1) refcount bump — no HashMap data is copied.
287        if let Some(entry) = self.name_import_map_cache.get(file_path) {
288            let (cached_hash, arc_map) = entry.value();
289            if *cached_hash == hash {
290                return Arc::clone(arc_map);
291            }
292        }
293
294        // Compute from AST (reuses ast_cache internally).
295        let map = match self.get_parsed_ast(file_path, content) {
296            Some(ast) => {
297                if let rustpython_parser::ast::Mod::Module(module) = ast.as_ref() {
298                    self.build_name_to_import_map(&module.body, file_path)
299                } else {
300                    HashMap::new()
301                }
302            }
303            None => HashMap::new(),
304        };
305
306        let arc_map = Arc::new(map);
307        self.name_import_map_cache
308            .insert(file_path.to_path_buf(), (hash, Arc::clone(&arc_map)));
309        arc_map
310    }
311
312    /// Compute a hash of the content for cache invalidation.
313    fn hash_content(content: &str) -> u64 {
314        let mut hasher = DefaultHasher::new();
315        content.hash(&mut hasher);
316        hasher.finish()
317    }
318
319    /// Check if a file path is inside an editable install that is NOT within the workspace.
320    /// Returns true if the file is from an external editable install (third-party).
321    pub(crate) fn is_editable_install_third_party(&self, file_path: &Path) -> bool {
322        let installs = self.editable_install_roots.lock().unwrap();
323        let workspace = self.workspace_root.lock().unwrap();
324
325        for install in installs.iter() {
326            if file_path.starts_with(&install.source_root) {
327                if let Some(ref ws) = *workspace {
328                    // Not third-party if editable source is inside workspace
329                    if install.source_root.starts_with(ws) {
330                        return false;
331                    }
332                    // Not third-party if workspace is inside editable source
333                    // (project installed editable in its own venv)
334                    if ws.starts_with(&install.source_root) {
335                        return false;
336                    }
337                }
338                return true;
339            }
340        }
341        false
342    }
343
344    /// Remove all cached data for a file.
345    /// Called when a file is closed or deleted to prevent unbounded memory growth.
346    pub fn cleanup_file_cache(&self, file_path: &Path) {
347        // Use canonical path for consistent cleanup
348        let canonical = file_path
349            .canonicalize()
350            .unwrap_or_else(|_| file_path.to_path_buf());
351
352        debug!("Cleaning up cache for file: {:?}", canonical);
353
354        // Remove from line_index_cache
355        self.line_index_cache.remove(&canonical);
356
357        // Remove from ast_cache
358        self.ast_cache.remove(&canonical);
359
360        // Remove from name_import_map_cache
361        self.name_import_map_cache.remove(&canonical);
362
363        // Remove from file_cache
364        self.file_cache.remove(&canonical);
365
366        // Remove from available_fixtures_cache (this file's cached available fixtures)
367        self.available_fixtures_cache.remove(&canonical);
368
369        // Remove from imported_fixtures_cache
370        self.imported_fixtures_cache.remove(&canonical);
371
372        // Note: We don't remove from canonical_path_cache because:
373        // 1. It's keyed by original path, not canonical path
374        // 2. Path->canonical mappings are stable and small
375        // 3. They may be needed again if file is reopened
376
377        // Note: We don't remove definitions/usages here because:
378        // 1. They might be needed for cross-file references
379        // 2. They're cleaned up on next analyze_file call anyway
380    }
381
382    /// Evict entries from caches if they exceed the maximum size.
383    /// Called periodically to prevent unbounded memory growth in very large workspaces.
384    /// Most LSPs rely on did_close cleanup for open files; this is a safety net for
385    /// workspace scan files that accumulate over time.
386    pub(crate) fn evict_cache_if_needed(&self) {
387        // Only evict if significantly over limit to avoid frequent eviction
388        if self.file_cache.len() > MAX_FILE_CACHE_SIZE {
389            debug!(
390                "File cache size ({}) exceeds limit ({}), evicting entries",
391                self.file_cache.len(),
392                MAX_FILE_CACHE_SIZE
393            );
394
395            // Remove ~25% of entries to avoid frequent re-eviction
396            let to_remove_count = self.file_cache.len() / 4;
397            let to_remove: Vec<PathBuf> = self
398                .file_cache
399                .iter()
400                .take(to_remove_count)
401                .map(|entry| entry.key().clone())
402                .collect();
403
404            for path in to_remove {
405                self.file_cache.remove(&path);
406                // Also clean related caches for consistency
407                self.line_index_cache.remove(&path);
408                self.ast_cache.remove(&path);
409                self.available_fixtures_cache.remove(&path);
410                self.imported_fixtures_cache.remove(&path);
411                self.name_import_map_cache.remove(&path);
412            }
413
414            debug!(
415                "Cache eviction complete, new size: {}",
416                self.file_cache.len()
417            );
418        }
419    }
420}