Skip to main content

pytest_language_server/fixtures/
mod.rs

1//! Fixture database and analysis module.
2//!
3//! This module provides the core functionality for managing pytest fixtures:
4//! - Scanning workspaces for fixture definitions
5//! - Analyzing Python files for fixtures and their usages
6//! - Resolving fixture definitions based on pytest's priority rules
7//! - Providing completion context for fixture suggestions
8
9mod analyzer;
10pub(crate) mod cli;
11pub mod decorators; // Public for testing
12mod docstring;
13mod imports;
14mod resolver;
15mod scanner;
16pub(crate) mod string_utils; // pub(crate) for inlay_hint provider access
17pub mod types;
18mod undeclared;
19
20#[allow(unused_imports)] // ParamInsertionInfo re-exported for public API via lib.rs
21pub use types::{
22    CompletionContext, FixtureCycle, FixtureDefinition, FixtureScope, FixtureUsage,
23    ParamInsertionInfo, ScopeMismatch, UndeclaredFixture,
24};
25
26use dashmap::DashMap;
27use std::collections::hash_map::DefaultHasher;
28use std::collections::HashSet;
29use std::hash::{Hash, Hasher};
30use std::path::{Path, PathBuf};
31use std::sync::Arc;
32use tracing::debug;
33
34/// An editable install discovered via `direct_url.json` + `.pth` files in site-packages.
35#[derive(Debug, Clone)]
36#[allow(dead_code)] // Fields read in tests and used for debug logging
37pub struct EditableInstall {
38    pub package_name: String,
39    pub raw_package_name: String,
40    pub source_root: PathBuf,
41    pub site_packages: PathBuf,
42}
43
44/// Cache entry for line indices: (content_hash, line_index).
45/// The content hash is used to invalidate the cache when file content changes.
46type LineIndexCacheEntry = (u64, Arc<Vec<usize>>);
47
48/// Cache entry for parsed AST: (content_hash, ast).
49/// The content hash is used to invalidate the cache when file content changes.
50type AstCacheEntry = (u64, Arc<rustpython_parser::ast::Mod>);
51
52/// Cache entry for fixture cycles: (definitions_version, cycles).
53/// The version is incremented when definitions change to invalidate the cache.
54type CycleCacheEntry = (u64, Arc<Vec<types::FixtureCycle>>);
55
56/// Cache entry for available fixtures: (definitions_version, fixtures).
57/// The version is incremented when definitions change to invalidate the cache.
58type AvailableFixturesCacheEntry = (u64, Arc<Vec<FixtureDefinition>>);
59
60/// Cache entry for imported fixtures: (content_hash, definitions_version, imported_fixture_names).
61/// Invalidated when either the file content or fixture definitions change.
62type ImportedFixturesCacheEntry = (u64, u64, Arc<HashSet<String>>);
63
64/// Maximum number of files to keep in the file content cache.
65/// When exceeded, the oldest entries are evicted to prevent unbounded memory growth.
66const MAX_FILE_CACHE_SIZE: usize = 2000;
67
68/// The central database for fixture definitions and usages.
69///
70/// Uses `DashMap` for lock-free concurrent access during workspace scanning.
71#[derive(Debug)]
72pub struct FixtureDatabase {
73    /// Map from fixture name to all its definitions (can be in multiple conftest.py files).
74    pub definitions: Arc<DashMap<String, Vec<FixtureDefinition>>>,
75    /// Reverse index: file path -> fixture names defined in that file.
76    /// Used for efficient cleanup when a file is re-analyzed.
77    pub file_definitions: Arc<DashMap<PathBuf, HashSet<String>>>,
78    /// Map from file path to fixtures used in that file.
79    pub usages: Arc<DashMap<PathBuf, Vec<FixtureUsage>>>,
80    /// Reverse index: fixture name -> (file_path, usage) pairs.
81    /// Used for efficient O(1) lookup in find_references_for_definition.
82    pub usage_by_fixture: Arc<DashMap<String, Vec<(PathBuf, FixtureUsage)>>>,
83    /// Cache of file contents for analyzed files (uses Arc for efficient sharing).
84    pub file_cache: Arc<DashMap<PathBuf, Arc<String>>>,
85    /// Map from file path to undeclared fixtures used in function bodies.
86    pub undeclared_fixtures: Arc<DashMap<PathBuf, Vec<UndeclaredFixture>>>,
87    /// Map from file path to imported names in that file.
88    pub imports: Arc<DashMap<PathBuf, HashSet<String>>>,
89    /// Cache of canonical paths to avoid repeated filesystem calls.
90    pub canonical_path_cache: Arc<DashMap<PathBuf, PathBuf>>,
91    /// Cache of line indices (byte offsets) for files to avoid recomputation.
92    /// Stores (content_hash, line_index) to invalidate when content changes.
93    pub line_index_cache: Arc<DashMap<PathBuf, LineIndexCacheEntry>>,
94    /// Cache of parsed AST for files to avoid re-parsing.
95    /// Stores (content_hash, ast) to invalidate when content changes.
96    pub ast_cache: Arc<DashMap<PathBuf, AstCacheEntry>>,
97    /// Version counter for definitions, incremented on each change.
98    /// Used to invalidate cycle detection cache and available fixtures cache.
99    pub definitions_version: Arc<std::sync::atomic::AtomicU64>,
100    /// Cache of detected fixture cycles.
101    /// Stores (definitions_version, cycles) to invalidate when definitions change.
102    pub cycle_cache: Arc<DashMap<(), CycleCacheEntry>>,
103    /// Cache of available fixtures per file.
104    /// Stores (definitions_version, fixtures) to invalidate when definitions change.
105    pub available_fixtures_cache: Arc<DashMap<PathBuf, AvailableFixturesCacheEntry>>,
106    /// Cache of imported fixtures per file.
107    /// Stores (content_hash, definitions_version, fixture_names) for invalidation.
108    pub imported_fixtures_cache: Arc<DashMap<PathBuf, ImportedFixturesCacheEntry>>,
109    /// Discovered site-packages paths from venv scanning.
110    /// Used for resolving absolute imports in venv plugin modules.
111    pub site_packages_paths: Arc<std::sync::Mutex<Vec<PathBuf>>>,
112    /// Discovered editable installs from venv scanning.
113    pub editable_install_roots: Arc<std::sync::Mutex<Vec<EditableInstall>>>,
114    /// Workspace root path, set during scan. Used to distinguish in-workspace editables.
115    pub workspace_root: Arc<std::sync::Mutex<Option<PathBuf>>>,
116    /// Files discovered via pytest11 entry point plugins.
117    /// Used to mark fixtures from these files as `is_plugin` so the resolver
118    /// can find them even when they are not in conftest.py or site-packages.
119    pub plugin_fixture_files: Arc<DashMap<PathBuf, ()>>,
120}
121
122impl Default for FixtureDatabase {
123    fn default() -> Self {
124        Self::new()
125    }
126}
127
128impl FixtureDatabase {
129    /// Create a new empty fixture database.
130    pub fn new() -> Self {
131        Self {
132            definitions: Arc::new(DashMap::new()),
133            file_definitions: Arc::new(DashMap::new()),
134            usages: Arc::new(DashMap::new()),
135            usage_by_fixture: Arc::new(DashMap::new()),
136            file_cache: Arc::new(DashMap::new()),
137            undeclared_fixtures: Arc::new(DashMap::new()),
138            imports: Arc::new(DashMap::new()),
139            canonical_path_cache: Arc::new(DashMap::new()),
140            line_index_cache: Arc::new(DashMap::new()),
141            ast_cache: Arc::new(DashMap::new()),
142            definitions_version: Arc::new(std::sync::atomic::AtomicU64::new(0)),
143            cycle_cache: Arc::new(DashMap::new()),
144            available_fixtures_cache: Arc::new(DashMap::new()),
145            imported_fixtures_cache: Arc::new(DashMap::new()),
146            site_packages_paths: Arc::new(std::sync::Mutex::new(Vec::new())),
147            editable_install_roots: Arc::new(std::sync::Mutex::new(Vec::new())),
148            workspace_root: Arc::new(std::sync::Mutex::new(None)),
149            plugin_fixture_files: Arc::new(DashMap::new()),
150        }
151    }
152
153    /// Increment the definitions version to invalidate cycle cache.
154    /// Called whenever fixture definitions are modified.
155    pub(crate) fn invalidate_cycle_cache(&self) {
156        self.definitions_version
157            .fetch_add(1, std::sync::atomic::Ordering::SeqCst);
158    }
159
160    /// Get canonical path with caching to avoid repeated filesystem calls.
161    /// Falls back to original path if canonicalization fails.
162    pub(crate) fn get_canonical_path(&self, path: PathBuf) -> PathBuf {
163        // Check cache first
164        if let Some(cached) = self.canonical_path_cache.get(&path) {
165            return cached.value().clone();
166        }
167
168        // Attempt canonicalization
169        let canonical = path.canonicalize().unwrap_or_else(|_| {
170            debug!("Could not canonicalize path {:?}, using as-is", path);
171            path.clone()
172        });
173
174        // Store in cache for future lookups
175        self.canonical_path_cache.insert(path, canonical.clone());
176        canonical
177    }
178
179    /// Get file content from cache or read from filesystem.
180    /// Returns None if file cannot be read.
181    pub(crate) fn get_file_content(&self, file_path: &Path) -> Option<Arc<String>> {
182        if let Some(cached) = self.file_cache.get(file_path) {
183            Some(Arc::clone(cached.value()))
184        } else {
185            std::fs::read_to_string(file_path).ok().map(Arc::new)
186        }
187    }
188
189    /// Get or compute line index for a file, with content-hash-based caching.
190    /// Returns Arc to avoid cloning the potentially large Vec.
191    /// The cache is invalidated when the content hash changes.
192    pub(crate) fn get_line_index(&self, file_path: &Path, content: &str) -> Arc<Vec<usize>> {
193        let content_hash = Self::hash_content(content);
194
195        // Check cache first - only use if content hash matches
196        if let Some(cached) = self.line_index_cache.get(file_path) {
197            let (cached_hash, cached_index) = cached.value();
198            if *cached_hash == content_hash {
199                return Arc::clone(cached_index);
200            }
201        }
202
203        // Build line index
204        let line_index = Self::build_line_index(content);
205        let arc_index = Arc::new(line_index);
206
207        // Store in cache with content hash
208        self.line_index_cache.insert(
209            file_path.to_path_buf(),
210            (content_hash, Arc::clone(&arc_index)),
211        );
212
213        arc_index
214    }
215
216    /// Get or parse AST for a file, with content-hash-based caching.
217    /// Returns Arc to avoid cloning the potentially large AST.
218    /// The cache is invalidated when the content hash changes.
219    pub(crate) fn get_parsed_ast(
220        &self,
221        file_path: &Path,
222        content: &str,
223    ) -> Option<Arc<rustpython_parser::ast::Mod>> {
224        let content_hash = Self::hash_content(content);
225
226        // Check cache first - only use if content hash matches
227        if let Some(cached) = self.ast_cache.get(file_path) {
228            let (cached_hash, cached_ast) = cached.value();
229            if *cached_hash == content_hash {
230                return Some(Arc::clone(cached_ast));
231            }
232        }
233
234        // Parse the content
235        let parsed = rustpython_parser::parse(content, rustpython_parser::Mode::Module, "").ok()?;
236        let arc_ast = Arc::new(parsed);
237
238        // Store in cache with content hash
239        self.ast_cache.insert(
240            file_path.to_path_buf(),
241            (content_hash, Arc::clone(&arc_ast)),
242        );
243
244        Some(arc_ast)
245    }
246
247    /// Compute a hash of the content for cache invalidation.
248    fn hash_content(content: &str) -> u64 {
249        let mut hasher = DefaultHasher::new();
250        content.hash(&mut hasher);
251        hasher.finish()
252    }
253
254    /// Check if a file path is inside an editable install that is NOT within the workspace.
255    /// Returns true if the file is from an external editable install (third-party).
256    pub(crate) fn is_editable_install_third_party(&self, file_path: &Path) -> bool {
257        let installs = self.editable_install_roots.lock().unwrap();
258        let workspace = self.workspace_root.lock().unwrap();
259
260        for install in installs.iter() {
261            if file_path.starts_with(&install.source_root) {
262                if let Some(ref ws) = *workspace {
263                    // Not third-party if editable source is inside workspace
264                    if install.source_root.starts_with(ws) {
265                        return false;
266                    }
267                    // Not third-party if workspace is inside editable source
268                    // (project installed editable in its own venv)
269                    if ws.starts_with(&install.source_root) {
270                        return false;
271                    }
272                }
273                return true;
274            }
275        }
276        false
277    }
278
279    /// Remove all cached data for a file.
280    /// Called when a file is closed or deleted to prevent unbounded memory growth.
281    pub fn cleanup_file_cache(&self, file_path: &Path) {
282        // Use canonical path for consistent cleanup
283        let canonical = file_path
284            .canonicalize()
285            .unwrap_or_else(|_| file_path.to_path_buf());
286
287        debug!("Cleaning up cache for file: {:?}", canonical);
288
289        // Remove from line_index_cache
290        self.line_index_cache.remove(&canonical);
291
292        // Remove from ast_cache
293        self.ast_cache.remove(&canonical);
294
295        // Remove from file_cache
296        self.file_cache.remove(&canonical);
297
298        // Remove from available_fixtures_cache (this file's cached available fixtures)
299        self.available_fixtures_cache.remove(&canonical);
300
301        // Remove from imported_fixtures_cache
302        self.imported_fixtures_cache.remove(&canonical);
303
304        // Note: We don't remove from canonical_path_cache because:
305        // 1. It's keyed by original path, not canonical path
306        // 2. Path->canonical mappings are stable and small
307        // 3. They may be needed again if file is reopened
308
309        // Note: We don't remove definitions/usages here because:
310        // 1. They might be needed for cross-file references
311        // 2. They're cleaned up on next analyze_file call anyway
312    }
313
314    /// Evict entries from caches if they exceed the maximum size.
315    /// Called periodically to prevent unbounded memory growth in very large workspaces.
316    /// Most LSPs rely on did_close cleanup for open files; this is a safety net for
317    /// workspace scan files that accumulate over time.
318    pub(crate) fn evict_cache_if_needed(&self) {
319        // Only evict if significantly over limit to avoid frequent eviction
320        if self.file_cache.len() > MAX_FILE_CACHE_SIZE {
321            debug!(
322                "File cache size ({}) exceeds limit ({}), evicting entries",
323                self.file_cache.len(),
324                MAX_FILE_CACHE_SIZE
325            );
326
327            // Remove ~25% of entries to avoid frequent re-eviction
328            let to_remove_count = self.file_cache.len() / 4;
329            let to_remove: Vec<PathBuf> = self
330                .file_cache
331                .iter()
332                .take(to_remove_count)
333                .map(|entry| entry.key().clone())
334                .collect();
335
336            for path in to_remove {
337                self.file_cache.remove(&path);
338                // Also clean related caches for consistency
339                self.line_index_cache.remove(&path);
340                self.ast_cache.remove(&path);
341                self.available_fixtures_cache.remove(&path);
342                self.imported_fixtures_cache.remove(&path);
343            }
344
345            debug!(
346                "Cache eviction complete, new size: {}",
347                self.file_cache.len()
348            );
349        }
350    }
351}