pytest_language_server/fixtures/
scanner.rs

1//! Workspace and virtual environment scanning for fixture definitions.
2
3use super::FixtureDatabase;
4use glob::Pattern;
5use rayon::prelude::*;
6use std::path::Path;
7use std::sync::atomic::{AtomicUsize, Ordering};
8use tracing::{debug, error, info, warn};
9use walkdir::WalkDir;
10
11impl FixtureDatabase {
12    /// Directories that should be skipped during workspace scanning.
13    /// These are typically large directories that don't contain test files.
14    const SKIP_DIRECTORIES: &'static [&'static str] = &[
15        // Version control
16        ".git",
17        ".hg",
18        ".svn",
19        // Virtual environments (scanned separately for plugins)
20        ".venv",
21        "venv",
22        "env",
23        ".env",
24        // Python caches and build artifacts
25        "__pycache__",
26        ".pytest_cache",
27        ".mypy_cache",
28        ".ruff_cache",
29        ".tox",
30        ".nox",
31        "build",
32        "dist",
33        ".eggs",
34        // JavaScript/Node
35        "node_modules",
36        "bower_components",
37        // Rust (for mixed projects)
38        "target",
39        // IDE and editor directories
40        ".idea",
41        ".vscode",
42        // Other common large directories
43        ".cache",
44        ".local",
45        "vendor",
46        "site-packages",
47    ];
48
49    /// Check if a directory should be skipped during scanning.
50    pub(crate) fn should_skip_directory(dir_name: &str) -> bool {
51        // Check exact matches
52        if Self::SKIP_DIRECTORIES.contains(&dir_name) {
53            return true;
54        }
55        // Also skip directories ending with .egg-info
56        if dir_name.ends_with(".egg-info") {
57            return true;
58        }
59        false
60    }
61
62    /// Scan a workspace directory for test files and conftest.py files.
63    /// Optionally accepts exclude patterns from configuration.
64    pub fn scan_workspace(&self, root_path: &Path) {
65        self.scan_workspace_with_excludes(root_path, &[]);
66    }
67
68    /// Scan a workspace directory with custom exclude patterns.
69    pub fn scan_workspace_with_excludes(&self, root_path: &Path, exclude_patterns: &[Pattern]) {
70        info!("Scanning workspace: {:?}", root_path);
71
72        // Defensive check: ensure the root path exists
73        if !root_path.exists() {
74            warn!(
75                "Workspace path does not exist, skipping scan: {:?}",
76                root_path
77            );
78            return;
79        }
80
81        // Phase 1: Collect all file paths (sequential, fast)
82        let mut files_to_process: Vec<std::path::PathBuf> = Vec::new();
83        let mut skipped_dirs = 0;
84
85        // Use WalkDir with filter to skip large/irrelevant directories
86        let walker = WalkDir::new(root_path).into_iter().filter_entry(|entry| {
87            // Allow files to pass through
88            if entry.file_type().is_file() {
89                return true;
90            }
91            // For directories, check if we should skip them
92            if let Some(dir_name) = entry.file_name().to_str() {
93                !Self::should_skip_directory(dir_name)
94            } else {
95                true
96            }
97        });
98
99        for entry in walker {
100            let entry = match entry {
101                Ok(e) => e,
102                Err(err) => {
103                    // Log directory traversal errors (permission denied, etc.)
104                    if err
105                        .io_error()
106                        .is_some_and(|e| e.kind() == std::io::ErrorKind::PermissionDenied)
107                    {
108                        warn!(
109                            "Permission denied accessing path during workspace scan: {}",
110                            err
111                        );
112                    } else {
113                        debug!("Error during workspace scan: {}", err);
114                    }
115                    continue;
116                }
117            };
118
119            let path = entry.path();
120
121            // Skip files in filtered directories (shouldn't happen with filter_entry, but just in case)
122            if path.components().any(|c| {
123                c.as_os_str()
124                    .to_str()
125                    .is_some_and(Self::should_skip_directory)
126            }) {
127                skipped_dirs += 1;
128                continue;
129            }
130
131            // Skip files matching user-configured exclude patterns
132            // Patterns are matched against paths relative to workspace root
133            if !exclude_patterns.is_empty() {
134                if let Ok(relative_path) = path.strip_prefix(root_path) {
135                    let relative_str = relative_path.to_string_lossy();
136                    if exclude_patterns.iter().any(|p| p.matches(&relative_str)) {
137                        debug!("Skipping excluded path: {:?}", path);
138                        continue;
139                    }
140                }
141            }
142
143            // Look for conftest.py or test_*.py or *_test.py files
144            if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
145                if filename == "conftest.py"
146                    || filename.starts_with("test_") && filename.ends_with(".py")
147                    || filename.ends_with("_test.py")
148                {
149                    files_to_process.push(path.to_path_buf());
150                }
151            }
152        }
153
154        if skipped_dirs > 0 {
155            debug!("Skipped {} entries in filtered directories", skipped_dirs);
156        }
157
158        let total_files = files_to_process.len();
159        info!("Found {} test/conftest files to process", total_files);
160
161        // Phase 2: Process files in parallel using rayon
162        // Use analyze_file_fresh since this is initial scan (no previous definitions to clean)
163        let error_count = AtomicUsize::new(0);
164
165        files_to_process.par_iter().for_each(|path| {
166            debug!("Found test/conftest file: {:?}", path);
167            match std::fs::read_to_string(path) {
168                Ok(content) => {
169                    self.analyze_file_fresh(path.clone(), &content);
170                }
171                Err(err) => {
172                    if err.kind() == std::io::ErrorKind::PermissionDenied {
173                        warn!("Permission denied reading file: {:?}", path);
174                    } else {
175                        error!("Failed to read file {:?}: {}", path, err);
176                        error_count.fetch_add(1, Ordering::Relaxed);
177                    }
178                }
179            }
180        });
181
182        let errors = error_count.load(Ordering::Relaxed);
183        if errors > 0 {
184            warn!("Workspace scan completed with {} errors", errors);
185        }
186
187        info!("Workspace scan complete. Processed {} files", total_files);
188
189        // Phase 3: Scan modules imported by conftest.py files
190        // This ensures fixtures defined in separate modules (imported via star import) are discovered
191        self.scan_imported_fixture_modules(root_path);
192
193        // Also scan virtual environment for pytest plugins
194        self.scan_venv_fixtures(root_path);
195
196        info!("Total fixtures defined: {}", self.definitions.len());
197        info!("Total files with fixture usages: {}", self.usages.len());
198    }
199
200    /// Scan Python modules that are imported by conftest.py files.
201    /// This discovers fixtures defined in separate modules that are re-exported via star imports.
202    /// Handles transitive imports (A imports B, B imports C) by iteratively scanning until no new modules are found.
203    fn scan_imported_fixture_modules(&self, _root_path: &Path) {
204        use std::collections::HashSet;
205
206        info!("Scanning for imported fixture modules");
207
208        // Track all files we've already processed to find imports from
209        let mut processed_files: HashSet<std::path::PathBuf> = HashSet::new();
210
211        // Start with conftest.py files
212        let mut files_to_check: Vec<std::path::PathBuf> = self
213            .file_cache
214            .iter()
215            .filter(|entry| {
216                entry
217                    .key()
218                    .file_name()
219                    .map(|n| n == "conftest.py")
220                    .unwrap_or(false)
221            })
222            .map(|entry| entry.key().clone())
223            .collect();
224
225        if files_to_check.is_empty() {
226            debug!("No conftest.py files found, skipping import scan");
227            return;
228        }
229
230        info!(
231            "Starting import scan with {} conftest.py files",
232            files_to_check.len()
233        );
234
235        // Iteratively process files until no new modules are discovered
236        let mut iteration = 0;
237        while !files_to_check.is_empty() {
238            iteration += 1;
239            debug!(
240                "Import scan iteration {}: checking {} files",
241                iteration,
242                files_to_check.len()
243            );
244
245            let mut new_modules: HashSet<std::path::PathBuf> = HashSet::new();
246
247            for file_path in &files_to_check {
248                if processed_files.contains(file_path) {
249                    continue;
250                }
251                processed_files.insert(file_path.clone());
252
253                // Get the file content
254                let Some(content) = self.get_file_content(file_path) else {
255                    continue;
256                };
257
258                // Parse the AST
259                let Some(parsed) = self.get_parsed_ast(file_path, &content) else {
260                    continue;
261                };
262
263                let line_index = self.get_line_index(file_path, &content);
264
265                // Extract imports
266                if let rustpython_parser::ast::Mod::Module(module) = parsed.as_ref() {
267                    let imports =
268                        self.extract_fixture_imports(&module.body, file_path, &line_index);
269
270                    for import in imports {
271                        // Resolve the import to a file path
272                        if let Some(resolved_path) =
273                            self.resolve_module_to_file(&import.module_path, file_path)
274                        {
275                            let canonical = self.get_canonical_path(resolved_path);
276                            // Only add if not already processed and not in file cache
277                            if !processed_files.contains(&canonical)
278                                && !self.file_cache.contains_key(&canonical)
279                            {
280                                new_modules.insert(canonical);
281                            }
282                        }
283                    }
284                }
285            }
286
287            if new_modules.is_empty() {
288                debug!("No new modules found in iteration {}", iteration);
289                break;
290            }
291
292            info!(
293                "Iteration {}: found {} new modules to analyze",
294                iteration,
295                new_modules.len()
296            );
297
298            // Analyze the new modules
299            for module_path in &new_modules {
300                if module_path.exists() {
301                    debug!("Analyzing imported module: {:?}", module_path);
302                    match std::fs::read_to_string(module_path) {
303                        Ok(content) => {
304                            self.analyze_file_fresh(module_path.clone(), &content);
305                        }
306                        Err(err) => {
307                            debug!("Failed to read imported module {:?}: {}", module_path, err);
308                        }
309                    }
310                }
311            }
312
313            // Next iteration will check the newly analyzed modules for their imports
314            files_to_check = new_modules.into_iter().collect();
315        }
316
317        info!(
318            "Imported fixture module scan complete after {} iterations",
319            iteration
320        );
321    }
322
323    /// Scan virtual environment for pytest plugin fixtures.
324    fn scan_venv_fixtures(&self, root_path: &Path) {
325        info!("Scanning for pytest plugins in virtual environment");
326
327        // Try to find virtual environment
328        let venv_paths = vec![
329            root_path.join(".venv"),
330            root_path.join("venv"),
331            root_path.join("env"),
332        ];
333
334        info!("Checking for venv in: {:?}", root_path);
335        for venv_path in &venv_paths {
336            debug!("Checking venv path: {:?}", venv_path);
337            if venv_path.exists() {
338                info!("Found virtual environment at: {:?}", venv_path);
339                self.scan_venv_site_packages(venv_path);
340                return;
341            } else {
342                debug!("  Does not exist: {:?}", venv_path);
343            }
344        }
345
346        // Also check for system-wide VIRTUAL_ENV
347        if let Ok(venv) = std::env::var("VIRTUAL_ENV") {
348            info!("Found VIRTUAL_ENV environment variable: {}", venv);
349            let venv_path = std::path::PathBuf::from(venv);
350            if venv_path.exists() {
351                info!("Using VIRTUAL_ENV: {:?}", venv_path);
352                self.scan_venv_site_packages(&venv_path);
353                return;
354            } else {
355                warn!("VIRTUAL_ENV path does not exist: {:?}", venv_path);
356            }
357        } else {
358            debug!("No VIRTUAL_ENV environment variable set");
359        }
360
361        warn!("No virtual environment found - third-party fixtures will not be available");
362    }
363
364    fn scan_venv_site_packages(&self, venv_path: &Path) {
365        info!("Scanning venv site-packages in: {:?}", venv_path);
366
367        // Find site-packages directory
368        let lib_path = venv_path.join("lib");
369        debug!("Checking lib path: {:?}", lib_path);
370
371        if lib_path.exists() {
372            // Look for python* directories
373            if let Ok(entries) = std::fs::read_dir(&lib_path) {
374                for entry in entries.flatten() {
375                    let path = entry.path();
376                    let dirname = path.file_name().unwrap_or_default().to_string_lossy();
377                    debug!("Found in lib: {:?}", dirname);
378
379                    if path.is_dir() && dirname.starts_with("python") {
380                        let site_packages = path.join("site-packages");
381                        debug!("Checking site-packages: {:?}", site_packages);
382
383                        if site_packages.exists() {
384                            info!("Found site-packages: {:?}", site_packages);
385                            self.scan_pytest_plugins(&site_packages);
386                            return;
387                        }
388                    }
389                }
390            }
391        }
392
393        // Try Windows path
394        let windows_site_packages = venv_path.join("Lib/site-packages");
395        debug!("Checking Windows path: {:?}", windows_site_packages);
396        if windows_site_packages.exists() {
397            info!("Found site-packages (Windows): {:?}", windows_site_packages);
398            self.scan_pytest_plugins(&windows_site_packages);
399            return;
400        }
401
402        warn!("Could not find site-packages in venv: {:?}", venv_path);
403    }
404
405    fn scan_pytest_plugins(&self, site_packages: &Path) {
406        info!("Scanning pytest plugins in: {:?}", site_packages);
407
408        // List of known pytest plugin prefixes/packages
409        let pytest_packages = vec![
410            // Existing plugins
411            "pytest_mock",
412            "pytest-mock",
413            "pytest_asyncio",
414            "pytest-asyncio",
415            "pytest_django",
416            "pytest-django",
417            "pytest_cov",
418            "pytest-cov",
419            "pytest_xdist",
420            "pytest-xdist",
421            "pytest_fixtures",
422            // Additional popular plugins
423            "pytest_flask",
424            "pytest-flask",
425            "pytest_httpx",
426            "pytest-httpx",
427            "pytest_postgresql",
428            "pytest-postgresql",
429            "pytest_mongodb",
430            "pytest-mongodb",
431            "pytest_redis",
432            "pytest-redis",
433            "pytest_elasticsearch",
434            "pytest-elasticsearch",
435            "pytest_rabbitmq",
436            "pytest-rabbitmq",
437            "pytest_mysql",
438            "pytest-mysql",
439            "pytest_docker",
440            "pytest-docker",
441            "pytest_kubernetes",
442            "pytest-kubernetes",
443            "pytest_celery",
444            "pytest-celery",
445            "pytest_tornado",
446            "pytest-tornado",
447            "pytest_aiohttp",
448            "pytest-aiohttp",
449            "pytest_sanic",
450            "pytest-sanic",
451            "pytest_fastapi",
452            "pytest-fastapi",
453            "pytest_alembic",
454            "pytest-alembic",
455            "pytest_sqlalchemy",
456            "pytest-sqlalchemy",
457            "pytest_factoryboy",
458            "pytest-factoryboy",
459            "pytest_freezegun",
460            "pytest-freezegun",
461            "pytest_mimesis",
462            "pytest-mimesis",
463            "pytest_lazy_fixture",
464            "pytest-lazy-fixture",
465            "pytest_cases",
466            "pytest-cases",
467            "pytest_bdd",
468            "pytest-bdd",
469            "pytest_benchmark",
470            "pytest-benchmark",
471            "pytest_timeout",
472            "pytest-timeout",
473            "pytest_retry",
474            "pytest-retry",
475            "pytest_repeat",
476            "pytest-repeat",
477            "pytest_rerunfailures",
478            "pytest-rerunfailures",
479            "pytest_ordering",
480            "pytest-ordering",
481            "pytest_dependency",
482            "pytest-dependency",
483            "pytest_random_order",
484            "pytest-random-order",
485            "pytest_picked",
486            "pytest-picked",
487            "pytest_testmon",
488            "pytest-testmon",
489            "pytest_split",
490            "pytest-split",
491            "pytest_env",
492            "pytest-env",
493            "pytest_dotenv",
494            "pytest-dotenv",
495            "pytest_html",
496            "pytest-html",
497            "pytest_json_report",
498            "pytest-json-report",
499            "pytest_metadata",
500            "pytest-metadata",
501            "pytest_instafail",
502            "pytest-instafail",
503            "pytest_clarity",
504            "pytest-clarity",
505            "pytest_sugar",
506            "pytest-sugar",
507            "pytest_emoji",
508            "pytest-emoji",
509            "pytest_play",
510            "pytest-play",
511            "pytest_selenium",
512            "pytest-selenium",
513            "pytest_playwright",
514            "pytest-playwright",
515            "pytest_splinter",
516            "pytest-splinter",
517        ];
518
519        let mut plugin_count = 0;
520
521        for entry in std::fs::read_dir(site_packages).into_iter().flatten() {
522            let entry = match entry {
523                Ok(e) => e,
524                Err(_) => continue,
525            };
526
527            let path = entry.path();
528            let filename = path.file_name().unwrap_or_default().to_string_lossy();
529
530            // Check if this is a pytest-related package
531            let is_pytest_package = pytest_packages.iter().any(|pkg| filename.contains(pkg))
532                || filename.starts_with("pytest")
533                || filename.contains("_pytest");
534
535            if is_pytest_package && path.is_dir() {
536                // Skip .dist-info directories - they don't contain code
537                if filename.ends_with(".dist-info") || filename.ends_with(".egg-info") {
538                    debug!("Skipping dist-info directory: {:?}", filename);
539                    continue;
540                }
541
542                info!("Scanning pytest plugin: {:?}", path);
543                plugin_count += 1;
544                self.scan_plugin_directory(&path);
545            } else {
546                // Log packages we're skipping for debugging
547                if filename.contains("mock") {
548                    debug!("Found mock-related package (not scanning): {:?}", filename);
549                }
550            }
551        }
552
553        info!("Scanned {} pytest plugin packages", plugin_count);
554    }
555
556    fn scan_plugin_directory(&self, plugin_dir: &Path) {
557        // Recursively scan for Python files with fixtures
558        for entry in WalkDir::new(plugin_dir)
559            .max_depth(3) // Limit depth to avoid scanning too much
560            .into_iter()
561            .filter_map(|e| e.ok())
562        {
563            let path = entry.path();
564
565            if path.extension().and_then(|s| s.to_str()) == Some("py") {
566                // Only scan files that might have fixtures (not test files)
567                if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
568                    // Skip test files and __pycache__
569                    if filename.starts_with("test_") || filename.contains("__pycache__") {
570                        continue;
571                    }
572
573                    debug!("Scanning plugin file: {:?}", path);
574                    if let Ok(content) = std::fs::read_to_string(path) {
575                        self.analyze_file(path.to_path_buf(), &content);
576                    }
577                }
578            }
579        }
580    }
581}