Skip to main content

pytest_language_server/fixtures/
mod.rs

1//! Fixture database and analysis module.
2//!
3//! This module provides the core functionality for managing pytest fixtures:
4//! - Scanning workspaces for fixture definitions
5//! - Analyzing Python files for fixtures and their usages
6//! - Resolving fixture definitions based on pytest's priority rules
7//! - Providing completion context for fixture suggestions
8
9mod analyzer;
10pub(crate) mod cli;
11pub mod decorators; // Public for testing
12mod docstring;
13mod imports;
14mod resolver;
15mod scanner;
16pub(crate) mod string_utils; // pub(crate) for inlay_hint provider access
17pub mod types;
18mod undeclared;
19
20#[allow(unused_imports)] // ParamInsertionInfo re-exported for public API via lib.rs
21pub use types::{
22    CompletionContext, FixtureCycle, FixtureDefinition, FixtureScope, FixtureUsage,
23    ParamInsertionInfo, ScopeMismatch, UndeclaredFixture,
24};
25
26use dashmap::DashMap;
27use std::collections::hash_map::DefaultHasher;
28use std::collections::HashSet;
29use std::hash::{Hash, Hasher};
30use std::path::{Path, PathBuf};
31use std::sync::Arc;
32use tracing::debug;
33
34/// An editable install discovered via `direct_url.json` + `.pth` files in site-packages.
35#[derive(Debug, Clone)]
36#[allow(dead_code)] // Fields read in tests and used for debug logging
37pub struct EditableInstall {
38    pub package_name: String,
39    pub raw_package_name: String,
40    pub source_root: PathBuf,
41    pub site_packages: PathBuf,
42}
43
44/// Cache entry for line indices: (content_hash, line_index).
45/// The content hash is used to invalidate the cache when file content changes.
46type LineIndexCacheEntry = (u64, Arc<Vec<usize>>);
47
48/// Cache entry for parsed AST: (content_hash, ast).
49/// The content hash is used to invalidate the cache when file content changes.
50type AstCacheEntry = (u64, Arc<rustpython_parser::ast::Mod>);
51
52/// Cache entry for fixture cycles: (definitions_version, cycles).
53/// The version is incremented when definitions change to invalidate the cache.
54type CycleCacheEntry = (u64, Arc<Vec<types::FixtureCycle>>);
55
56/// Cache entry for available fixtures: (definitions_version, fixtures).
57/// The version is incremented when definitions change to invalidate the cache.
58type AvailableFixturesCacheEntry = (u64, Arc<Vec<FixtureDefinition>>);
59
60/// Cache entry for imported fixtures: (content_hash, definitions_version, imported_fixture_names).
61/// Invalidated when either the file content or fixture definitions change.
62type ImportedFixturesCacheEntry = (u64, u64, Arc<HashSet<String>>);
63
64/// Maximum number of files to keep in the file content cache.
65/// When exceeded, the oldest entries are evicted to prevent unbounded memory growth.
66const MAX_FILE_CACHE_SIZE: usize = 2000;
67
68/// The central database for fixture definitions and usages.
69///
70/// Uses `DashMap` for lock-free concurrent access during workspace scanning.
71#[derive(Debug)]
72pub struct FixtureDatabase {
73    /// Map from fixture name to all its definitions (can be in multiple conftest.py files).
74    pub definitions: Arc<DashMap<String, Vec<FixtureDefinition>>>,
75    /// Reverse index: file path -> fixture names defined in that file.
76    /// Used for efficient cleanup when a file is re-analyzed.
77    pub file_definitions: Arc<DashMap<PathBuf, HashSet<String>>>,
78    /// Map from file path to fixtures used in that file.
79    pub usages: Arc<DashMap<PathBuf, Vec<FixtureUsage>>>,
80    /// Reverse index: fixture name -> (file_path, usage) pairs.
81    /// Used for efficient O(1) lookup in find_references_for_definition.
82    pub usage_by_fixture: Arc<DashMap<String, Vec<(PathBuf, FixtureUsage)>>>,
83    /// Cache of file contents for analyzed files (uses Arc for efficient sharing).
84    pub file_cache: Arc<DashMap<PathBuf, Arc<String>>>,
85    /// Map from file path to undeclared fixtures used in function bodies.
86    pub undeclared_fixtures: Arc<DashMap<PathBuf, Vec<UndeclaredFixture>>>,
87    /// Map from file path to imported names in that file.
88    pub imports: Arc<DashMap<PathBuf, HashSet<String>>>,
89    /// Cache of canonical paths to avoid repeated filesystem calls.
90    pub canonical_path_cache: Arc<DashMap<PathBuf, PathBuf>>,
91    /// Cache of line indices (byte offsets) for files to avoid recomputation.
92    /// Stores (content_hash, line_index) to invalidate when content changes.
93    pub line_index_cache: Arc<DashMap<PathBuf, LineIndexCacheEntry>>,
94    /// Cache of parsed AST for files to avoid re-parsing.
95    /// Stores (content_hash, ast) to invalidate when content changes.
96    pub ast_cache: Arc<DashMap<PathBuf, AstCacheEntry>>,
97    /// Version counter for definitions, incremented on each change.
98    /// Used to invalidate cycle detection cache and available fixtures cache.
99    pub definitions_version: Arc<std::sync::atomic::AtomicU64>,
100    /// Cache of detected fixture cycles.
101    /// Stores (definitions_version, cycles) to invalidate when definitions change.
102    pub cycle_cache: Arc<DashMap<(), CycleCacheEntry>>,
103    /// Cache of available fixtures per file.
104    /// Stores (definitions_version, fixtures) to invalidate when definitions change.
105    pub available_fixtures_cache: Arc<DashMap<PathBuf, AvailableFixturesCacheEntry>>,
106    /// Cache of imported fixtures per file.
107    /// Stores (content_hash, definitions_version, fixture_names) for invalidation.
108    pub imported_fixtures_cache: Arc<DashMap<PathBuf, ImportedFixturesCacheEntry>>,
109    /// Discovered site-packages paths from venv scanning.
110    /// Used for resolving absolute imports in venv plugin modules.
111    pub site_packages_paths: Arc<std::sync::Mutex<Vec<PathBuf>>>,
112    /// Discovered editable installs from venv scanning.
113    pub editable_install_roots: Arc<std::sync::Mutex<Vec<EditableInstall>>>,
114    /// Workspace root path, set during scan. Used to distinguish in-workspace editables.
115    pub workspace_root: Arc<std::sync::Mutex<Option<PathBuf>>>,
116}
117
118impl Default for FixtureDatabase {
119    fn default() -> Self {
120        Self::new()
121    }
122}
123
124impl FixtureDatabase {
125    /// Create a new empty fixture database.
126    pub fn new() -> Self {
127        Self {
128            definitions: Arc::new(DashMap::new()),
129            file_definitions: Arc::new(DashMap::new()),
130            usages: Arc::new(DashMap::new()),
131            usage_by_fixture: Arc::new(DashMap::new()),
132            file_cache: Arc::new(DashMap::new()),
133            undeclared_fixtures: Arc::new(DashMap::new()),
134            imports: Arc::new(DashMap::new()),
135            canonical_path_cache: Arc::new(DashMap::new()),
136            line_index_cache: Arc::new(DashMap::new()),
137            ast_cache: Arc::new(DashMap::new()),
138            definitions_version: Arc::new(std::sync::atomic::AtomicU64::new(0)),
139            cycle_cache: Arc::new(DashMap::new()),
140            available_fixtures_cache: Arc::new(DashMap::new()),
141            imported_fixtures_cache: Arc::new(DashMap::new()),
142            site_packages_paths: Arc::new(std::sync::Mutex::new(Vec::new())),
143            editable_install_roots: Arc::new(std::sync::Mutex::new(Vec::new())),
144            workspace_root: Arc::new(std::sync::Mutex::new(None)),
145        }
146    }
147
148    /// Increment the definitions version to invalidate cycle cache.
149    /// Called whenever fixture definitions are modified.
150    pub(crate) fn invalidate_cycle_cache(&self) {
151        self.definitions_version
152            .fetch_add(1, std::sync::atomic::Ordering::SeqCst);
153    }
154
155    /// Get canonical path with caching to avoid repeated filesystem calls.
156    /// Falls back to original path if canonicalization fails.
157    pub(crate) fn get_canonical_path(&self, path: PathBuf) -> PathBuf {
158        // Check cache first
159        if let Some(cached) = self.canonical_path_cache.get(&path) {
160            return cached.value().clone();
161        }
162
163        // Attempt canonicalization
164        let canonical = path.canonicalize().unwrap_or_else(|_| {
165            debug!("Could not canonicalize path {:?}, using as-is", path);
166            path.clone()
167        });
168
169        // Store in cache for future lookups
170        self.canonical_path_cache.insert(path, canonical.clone());
171        canonical
172    }
173
174    /// Get file content from cache or read from filesystem.
175    /// Returns None if file cannot be read.
176    pub(crate) fn get_file_content(&self, file_path: &Path) -> Option<Arc<String>> {
177        if let Some(cached) = self.file_cache.get(file_path) {
178            Some(Arc::clone(cached.value()))
179        } else {
180            std::fs::read_to_string(file_path).ok().map(Arc::new)
181        }
182    }
183
184    /// Get or compute line index for a file, with content-hash-based caching.
185    /// Returns Arc to avoid cloning the potentially large Vec.
186    /// The cache is invalidated when the content hash changes.
187    pub(crate) fn get_line_index(&self, file_path: &Path, content: &str) -> Arc<Vec<usize>> {
188        let content_hash = Self::hash_content(content);
189
190        // Check cache first - only use if content hash matches
191        if let Some(cached) = self.line_index_cache.get(file_path) {
192            let (cached_hash, cached_index) = cached.value();
193            if *cached_hash == content_hash {
194                return Arc::clone(cached_index);
195            }
196        }
197
198        // Build line index
199        let line_index = Self::build_line_index(content);
200        let arc_index = Arc::new(line_index);
201
202        // Store in cache with content hash
203        self.line_index_cache.insert(
204            file_path.to_path_buf(),
205            (content_hash, Arc::clone(&arc_index)),
206        );
207
208        arc_index
209    }
210
211    /// Get or parse AST for a file, with content-hash-based caching.
212    /// Returns Arc to avoid cloning the potentially large AST.
213    /// The cache is invalidated when the content hash changes.
214    pub(crate) fn get_parsed_ast(
215        &self,
216        file_path: &Path,
217        content: &str,
218    ) -> Option<Arc<rustpython_parser::ast::Mod>> {
219        let content_hash = Self::hash_content(content);
220
221        // Check cache first - only use if content hash matches
222        if let Some(cached) = self.ast_cache.get(file_path) {
223            let (cached_hash, cached_ast) = cached.value();
224            if *cached_hash == content_hash {
225                return Some(Arc::clone(cached_ast));
226            }
227        }
228
229        // Parse the content
230        let parsed = rustpython_parser::parse(content, rustpython_parser::Mode::Module, "").ok()?;
231        let arc_ast = Arc::new(parsed);
232
233        // Store in cache with content hash
234        self.ast_cache.insert(
235            file_path.to_path_buf(),
236            (content_hash, Arc::clone(&arc_ast)),
237        );
238
239        Some(arc_ast)
240    }
241
242    /// Compute a hash of the content for cache invalidation.
243    fn hash_content(content: &str) -> u64 {
244        let mut hasher = DefaultHasher::new();
245        content.hash(&mut hasher);
246        hasher.finish()
247    }
248
249    /// Check if a file path is inside an editable install that is NOT within the workspace.
250    /// Returns true if the file is from an external editable install (third-party).
251    pub(crate) fn is_editable_install_third_party(&self, file_path: &Path) -> bool {
252        let installs = self.editable_install_roots.lock().unwrap();
253        let workspace = self.workspace_root.lock().unwrap();
254
255        for install in installs.iter() {
256            if file_path.starts_with(&install.source_root) {
257                if let Some(ref ws) = *workspace {
258                    // Not third-party if editable source is inside workspace
259                    if install.source_root.starts_with(ws) {
260                        return false;
261                    }
262                    // Not third-party if workspace is inside editable source
263                    // (project installed editable in its own venv)
264                    if ws.starts_with(&install.source_root) {
265                        return false;
266                    }
267                }
268                return true;
269            }
270        }
271        false
272    }
273
274    /// Remove all cached data for a file.
275    /// Called when a file is closed or deleted to prevent unbounded memory growth.
276    pub fn cleanup_file_cache(&self, file_path: &Path) {
277        // Use canonical path for consistent cleanup
278        let canonical = file_path
279            .canonicalize()
280            .unwrap_or_else(|_| file_path.to_path_buf());
281
282        debug!("Cleaning up cache for file: {:?}", canonical);
283
284        // Remove from line_index_cache
285        self.line_index_cache.remove(&canonical);
286
287        // Remove from ast_cache
288        self.ast_cache.remove(&canonical);
289
290        // Remove from file_cache
291        self.file_cache.remove(&canonical);
292
293        // Remove from available_fixtures_cache (this file's cached available fixtures)
294        self.available_fixtures_cache.remove(&canonical);
295
296        // Remove from imported_fixtures_cache
297        self.imported_fixtures_cache.remove(&canonical);
298
299        // Note: We don't remove from canonical_path_cache because:
300        // 1. It's keyed by original path, not canonical path
301        // 2. Path->canonical mappings are stable and small
302        // 3. They may be needed again if file is reopened
303
304        // Note: We don't remove definitions/usages here because:
305        // 1. They might be needed for cross-file references
306        // 2. They're cleaned up on next analyze_file call anyway
307    }
308
309    /// Evict entries from caches if they exceed the maximum size.
310    /// Called periodically to prevent unbounded memory growth in very large workspaces.
311    /// Most LSPs rely on did_close cleanup for open files; this is a safety net for
312    /// workspace scan files that accumulate over time.
313    pub(crate) fn evict_cache_if_needed(&self) {
314        // Only evict if significantly over limit to avoid frequent eviction
315        if self.file_cache.len() > MAX_FILE_CACHE_SIZE {
316            debug!(
317                "File cache size ({}) exceeds limit ({}), evicting entries",
318                self.file_cache.len(),
319                MAX_FILE_CACHE_SIZE
320            );
321
322            // Remove ~25% of entries to avoid frequent re-eviction
323            let to_remove_count = self.file_cache.len() / 4;
324            let to_remove: Vec<PathBuf> = self
325                .file_cache
326                .iter()
327                .take(to_remove_count)
328                .map(|entry| entry.key().clone())
329                .collect();
330
331            for path in to_remove {
332                self.file_cache.remove(&path);
333                // Also clean related caches for consistency
334                self.line_index_cache.remove(&path);
335                self.ast_cache.remove(&path);
336                self.available_fixtures_cache.remove(&path);
337                self.imported_fixtures_cache.remove(&path);
338            }
339
340            debug!(
341                "Cache eviction complete, new size: {}",
342                self.file_cache.len()
343            );
344        }
345    }
346}