pytest_language_server/fixtures/
scanner.rs

1//! Workspace and virtual environment scanning for fixture definitions.
2
3use super::FixtureDatabase;
4use glob::Pattern;
5use rayon::prelude::*;
6use std::path::Path;
7use std::sync::atomic::{AtomicUsize, Ordering};
8use tracing::{debug, error, info, warn};
9use walkdir::WalkDir;
10
11impl FixtureDatabase {
12    /// Directories that should be skipped during workspace scanning.
13    /// These are typically large directories that don't contain test files.
14    const SKIP_DIRECTORIES: &'static [&'static str] = &[
15        // Version control
16        ".git",
17        ".hg",
18        ".svn",
19        // Virtual environments (scanned separately for plugins)
20        ".venv",
21        "venv",
22        "env",
23        ".env",
24        // Python caches and build artifacts
25        "__pycache__",
26        ".pytest_cache",
27        ".mypy_cache",
28        ".ruff_cache",
29        ".tox",
30        ".nox",
31        "build",
32        "dist",
33        ".eggs",
34        // JavaScript/Node
35        "node_modules",
36        "bower_components",
37        // Rust (for mixed projects)
38        "target",
39        // IDE and editor directories
40        ".idea",
41        ".vscode",
42        // Other common large directories
43        ".cache",
44        ".local",
45        "vendor",
46        "site-packages",
47    ];
48
49    /// Check if a directory should be skipped during scanning.
50    pub(crate) fn should_skip_directory(dir_name: &str) -> bool {
51        // Check exact matches
52        if Self::SKIP_DIRECTORIES.contains(&dir_name) {
53            return true;
54        }
55        // Also skip directories ending with .egg-info
56        if dir_name.ends_with(".egg-info") {
57            return true;
58        }
59        false
60    }
61
62    /// Scan a workspace directory for test files and conftest.py files.
63    /// Optionally accepts exclude patterns from configuration.
64    pub fn scan_workspace(&self, root_path: &Path) {
65        self.scan_workspace_with_excludes(root_path, &[]);
66    }
67
68    /// Scan a workspace directory with custom exclude patterns.
69    pub fn scan_workspace_with_excludes(&self, root_path: &Path, exclude_patterns: &[Pattern]) {
70        info!("Scanning workspace: {:?}", root_path);
71
72        // Defensive check: ensure the root path exists
73        if !root_path.exists() {
74            warn!(
75                "Workspace path does not exist, skipping scan: {:?}",
76                root_path
77            );
78            return;
79        }
80
81        // Phase 1: Collect all file paths (sequential, fast)
82        let mut files_to_process: Vec<std::path::PathBuf> = Vec::new();
83        let mut skipped_dirs = 0;
84
85        // Use WalkDir with filter to skip large/irrelevant directories
86        let walker = WalkDir::new(root_path).into_iter().filter_entry(|entry| {
87            // Allow files to pass through
88            if entry.file_type().is_file() {
89                return true;
90            }
91            // For directories, check if we should skip them
92            if let Some(dir_name) = entry.file_name().to_str() {
93                !Self::should_skip_directory(dir_name)
94            } else {
95                true
96            }
97        });
98
99        for entry in walker {
100            let entry = match entry {
101                Ok(e) => e,
102                Err(err) => {
103                    // Log directory traversal errors (permission denied, etc.)
104                    if err
105                        .io_error()
106                        .is_some_and(|e| e.kind() == std::io::ErrorKind::PermissionDenied)
107                    {
108                        warn!(
109                            "Permission denied accessing path during workspace scan: {}",
110                            err
111                        );
112                    } else {
113                        debug!("Error during workspace scan: {}", err);
114                    }
115                    continue;
116                }
117            };
118
119            let path = entry.path();
120
121            // Skip files in filtered directories (shouldn't happen with filter_entry, but just in case)
122            if path.components().any(|c| {
123                c.as_os_str()
124                    .to_str()
125                    .is_some_and(Self::should_skip_directory)
126            }) {
127                skipped_dirs += 1;
128                continue;
129            }
130
131            // Skip files matching user-configured exclude patterns
132            // Patterns are matched against paths relative to workspace root
133            if !exclude_patterns.is_empty() {
134                if let Ok(relative_path) = path.strip_prefix(root_path) {
135                    let relative_str = relative_path.to_string_lossy();
136                    if exclude_patterns.iter().any(|p| p.matches(&relative_str)) {
137                        debug!("Skipping excluded path: {:?}", path);
138                        continue;
139                    }
140                }
141            }
142
143            // Look for conftest.py or test_*.py or *_test.py files
144            if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
145                if filename == "conftest.py"
146                    || filename.starts_with("test_") && filename.ends_with(".py")
147                    || filename.ends_with("_test.py")
148                {
149                    files_to_process.push(path.to_path_buf());
150                }
151            }
152        }
153
154        if skipped_dirs > 0 {
155            debug!("Skipped {} entries in filtered directories", skipped_dirs);
156        }
157
158        let total_files = files_to_process.len();
159        info!("Found {} test/conftest files to process", total_files);
160
161        // Phase 2: Process files in parallel using rayon
162        // Use analyze_file_fresh since this is initial scan (no previous definitions to clean)
163        let error_count = AtomicUsize::new(0);
164        let permission_denied_count = AtomicUsize::new(0);
165
166        files_to_process.par_iter().for_each(|path| {
167            debug!("Found test/conftest file: {:?}", path);
168            match std::fs::read_to_string(path) {
169                Ok(content) => {
170                    self.analyze_file_fresh(path.clone(), &content);
171                }
172                Err(err) => {
173                    if err.kind() == std::io::ErrorKind::PermissionDenied {
174                        debug!("Permission denied reading file: {:?}", path);
175                        permission_denied_count.fetch_add(1, Ordering::Relaxed);
176                    } else {
177                        error!("Failed to read file {:?}: {}", path, err);
178                        error_count.fetch_add(1, Ordering::Relaxed);
179                    }
180                }
181            }
182        });
183
184        let errors = error_count.load(Ordering::Relaxed);
185        let permission_errors = permission_denied_count.load(Ordering::Relaxed);
186
187        if errors > 0 {
188            warn!("Workspace scan completed with {} read errors", errors);
189        }
190        if permission_errors > 0 {
191            warn!(
192                "Workspace scan: skipped {} files due to permission denied",
193                permission_errors
194            );
195        }
196
197        info!(
198            "Workspace scan complete. Processed {} files ({} permission denied, {} errors)",
199            total_files, permission_errors, errors
200        );
201
202        // Phase 3: Scan modules imported by conftest.py files
203        // This ensures fixtures defined in separate modules (imported via star import) are discovered
204        self.scan_imported_fixture_modules(root_path);
205
206        // Also scan virtual environment for pytest plugins
207        self.scan_venv_fixtures(root_path);
208
209        info!("Total fixtures defined: {}", self.definitions.len());
210        info!("Total files with fixture usages: {}", self.usages.len());
211    }
212
213    /// Scan Python modules that are imported by conftest.py files.
214    /// This discovers fixtures defined in separate modules that are re-exported via star imports.
215    /// Handles transitive imports (A imports B, B imports C) by iteratively scanning until no new modules are found.
216    fn scan_imported_fixture_modules(&self, _root_path: &Path) {
217        use std::collections::HashSet;
218
219        info!("Scanning for imported fixture modules");
220
221        // Track all files we've already processed to find imports from
222        let mut processed_files: HashSet<std::path::PathBuf> = HashSet::new();
223
224        // Start with conftest.py files
225        let mut files_to_check: Vec<std::path::PathBuf> = self
226            .file_cache
227            .iter()
228            .filter(|entry| {
229                entry
230                    .key()
231                    .file_name()
232                    .map(|n| n == "conftest.py")
233                    .unwrap_or(false)
234            })
235            .map(|entry| entry.key().clone())
236            .collect();
237
238        if files_to_check.is_empty() {
239            debug!("No conftest.py files found, skipping import scan");
240            return;
241        }
242
243        info!(
244            "Starting import scan with {} conftest.py files",
245            files_to_check.len()
246        );
247
248        // Iteratively process files until no new modules are discovered
249        let mut iteration = 0;
250        while !files_to_check.is_empty() {
251            iteration += 1;
252            debug!(
253                "Import scan iteration {}: checking {} files",
254                iteration,
255                files_to_check.len()
256            );
257
258            let mut new_modules: HashSet<std::path::PathBuf> = HashSet::new();
259
260            for file_path in &files_to_check {
261                if processed_files.contains(file_path) {
262                    continue;
263                }
264                processed_files.insert(file_path.clone());
265
266                // Get the file content
267                let Some(content) = self.get_file_content(file_path) else {
268                    continue;
269                };
270
271                // Parse the AST
272                let Some(parsed) = self.get_parsed_ast(file_path, &content) else {
273                    continue;
274                };
275
276                let line_index = self.get_line_index(file_path, &content);
277
278                // Extract imports
279                if let rustpython_parser::ast::Mod::Module(module) = parsed.as_ref() {
280                    let imports =
281                        self.extract_fixture_imports(&module.body, file_path, &line_index);
282
283                    for import in imports {
284                        // Resolve the import to a file path
285                        if let Some(resolved_path) =
286                            self.resolve_module_to_file(&import.module_path, file_path)
287                        {
288                            let canonical = self.get_canonical_path(resolved_path);
289                            // Only add if not already processed and not in file cache
290                            if !processed_files.contains(&canonical)
291                                && !self.file_cache.contains_key(&canonical)
292                            {
293                                new_modules.insert(canonical);
294                            }
295                        }
296                    }
297                }
298            }
299
300            if new_modules.is_empty() {
301                debug!("No new modules found in iteration {}", iteration);
302                break;
303            }
304
305            info!(
306                "Iteration {}: found {} new modules to analyze",
307                iteration,
308                new_modules.len()
309            );
310
311            // Analyze the new modules
312            for module_path in &new_modules {
313                if module_path.exists() {
314                    debug!("Analyzing imported module: {:?}", module_path);
315                    match std::fs::read_to_string(module_path) {
316                        Ok(content) => {
317                            self.analyze_file_fresh(module_path.clone(), &content);
318                        }
319                        Err(err) => {
320                            debug!("Failed to read imported module {:?}: {}", module_path, err);
321                        }
322                    }
323                }
324            }
325
326            // Next iteration will check the newly analyzed modules for their imports
327            files_to_check = new_modules.into_iter().collect();
328        }
329
330        info!(
331            "Imported fixture module scan complete after {} iterations",
332            iteration
333        );
334    }
335
336    /// Scan virtual environment for pytest plugin fixtures.
337    fn scan_venv_fixtures(&self, root_path: &Path) {
338        info!("Scanning for pytest plugins in virtual environment");
339
340        // Try to find virtual environment
341        let venv_paths = vec![
342            root_path.join(".venv"),
343            root_path.join("venv"),
344            root_path.join("env"),
345        ];
346
347        info!("Checking for venv in: {:?}", root_path);
348        for venv_path in &venv_paths {
349            debug!("Checking venv path: {:?}", venv_path);
350            if venv_path.exists() {
351                info!("Found virtual environment at: {:?}", venv_path);
352                self.scan_venv_site_packages(venv_path);
353                return;
354            } else {
355                debug!("  Does not exist: {:?}", venv_path);
356            }
357        }
358
359        // Also check for system-wide VIRTUAL_ENV
360        if let Ok(venv) = std::env::var("VIRTUAL_ENV") {
361            info!("Found VIRTUAL_ENV environment variable: {}", venv);
362            let venv_path = std::path::PathBuf::from(venv);
363            if venv_path.exists() {
364                info!("Using VIRTUAL_ENV: {:?}", venv_path);
365                self.scan_venv_site_packages(&venv_path);
366                return;
367            } else {
368                warn!("VIRTUAL_ENV path does not exist: {:?}", venv_path);
369            }
370        } else {
371            debug!("No VIRTUAL_ENV environment variable set");
372        }
373
374        warn!("No virtual environment found - third-party fixtures will not be available");
375    }
376
377    fn scan_venv_site_packages(&self, venv_path: &Path) {
378        info!("Scanning venv site-packages in: {:?}", venv_path);
379
380        // Find site-packages directory
381        let lib_path = venv_path.join("lib");
382        debug!("Checking lib path: {:?}", lib_path);
383
384        if lib_path.exists() {
385            // Look for python* directories
386            if let Ok(entries) = std::fs::read_dir(&lib_path) {
387                for entry in entries.flatten() {
388                    let path = entry.path();
389                    let dirname = path.file_name().unwrap_or_default().to_string_lossy();
390                    debug!("Found in lib: {:?}", dirname);
391
392                    if path.is_dir() && dirname.starts_with("python") {
393                        let site_packages = path.join("site-packages");
394                        debug!("Checking site-packages: {:?}", site_packages);
395
396                        if site_packages.exists() {
397                            info!("Found site-packages: {:?}", site_packages);
398                            self.scan_pytest_plugins(&site_packages);
399                            return;
400                        }
401                    }
402                }
403            }
404        }
405
406        // Try Windows path
407        let windows_site_packages = venv_path.join("Lib/site-packages");
408        debug!("Checking Windows path: {:?}", windows_site_packages);
409        if windows_site_packages.exists() {
410            info!("Found site-packages (Windows): {:?}", windows_site_packages);
411            self.scan_pytest_plugins(&windows_site_packages);
412            return;
413        }
414
415        warn!("Could not find site-packages in venv: {:?}", venv_path);
416    }
417
418    fn scan_pytest_plugins(&self, site_packages: &Path) {
419        info!("Scanning pytest plugins in: {:?}", site_packages);
420
421        // List of known pytest plugin prefixes/packages
422        let pytest_packages = vec![
423            // Existing plugins
424            "pytest_mock",
425            "pytest-mock",
426            "pytest_asyncio",
427            "pytest-asyncio",
428            "pytest_django",
429            "pytest-django",
430            "pytest_cov",
431            "pytest-cov",
432            "pytest_xdist",
433            "pytest-xdist",
434            "pytest_fixtures",
435            // Additional popular plugins
436            "pytest_flask",
437            "pytest-flask",
438            "pytest_httpx",
439            "pytest-httpx",
440            "pytest_postgresql",
441            "pytest-postgresql",
442            "pytest_mongodb",
443            "pytest-mongodb",
444            "pytest_redis",
445            "pytest-redis",
446            "pytest_elasticsearch",
447            "pytest-elasticsearch",
448            "pytest_rabbitmq",
449            "pytest-rabbitmq",
450            "pytest_mysql",
451            "pytest-mysql",
452            "pytest_docker",
453            "pytest-docker",
454            "pytest_kubernetes",
455            "pytest-kubernetes",
456            "pytest_celery",
457            "pytest-celery",
458            "pytest_tornado",
459            "pytest-tornado",
460            "pytest_aiohttp",
461            "pytest-aiohttp",
462            "pytest_sanic",
463            "pytest-sanic",
464            "pytest_fastapi",
465            "pytest-fastapi",
466            "pytest_alembic",
467            "pytest-alembic",
468            "pytest_sqlalchemy",
469            "pytest-sqlalchemy",
470            "pytest_factoryboy",
471            "pytest-factoryboy",
472            "pytest_freezegun",
473            "pytest-freezegun",
474            "pytest_mimesis",
475            "pytest-mimesis",
476            "pytest_lazy_fixture",
477            "pytest-lazy-fixture",
478            "pytest_cases",
479            "pytest-cases",
480            "pytest_bdd",
481            "pytest-bdd",
482            "pytest_benchmark",
483            "pytest-benchmark",
484            "pytest_timeout",
485            "pytest-timeout",
486            "pytest_retry",
487            "pytest-retry",
488            "pytest_repeat",
489            "pytest-repeat",
490            "pytest_rerunfailures",
491            "pytest-rerunfailures",
492            "pytest_ordering",
493            "pytest-ordering",
494            "pytest_dependency",
495            "pytest-dependency",
496            "pytest_random_order",
497            "pytest-random-order",
498            "pytest_picked",
499            "pytest-picked",
500            "pytest_testmon",
501            "pytest-testmon",
502            "pytest_split",
503            "pytest-split",
504            "pytest_env",
505            "pytest-env",
506            "pytest_dotenv",
507            "pytest-dotenv",
508            "pytest_html",
509            "pytest-html",
510            "pytest_json_report",
511            "pytest-json-report",
512            "pytest_metadata",
513            "pytest-metadata",
514            "pytest_instafail",
515            "pytest-instafail",
516            "pytest_clarity",
517            "pytest-clarity",
518            "pytest_sugar",
519            "pytest-sugar",
520            "pytest_emoji",
521            "pytest-emoji",
522            "pytest_play",
523            "pytest-play",
524            "pytest_selenium",
525            "pytest-selenium",
526            "pytest_playwright",
527            "pytest-playwright",
528            "pytest_splinter",
529            "pytest-splinter",
530        ];
531
532        let mut plugin_count = 0;
533
534        for entry in std::fs::read_dir(site_packages).into_iter().flatten() {
535            let entry = match entry {
536                Ok(e) => e,
537                Err(_) => continue,
538            };
539
540            let path = entry.path();
541            let filename = path.file_name().unwrap_or_default().to_string_lossy();
542
543            // Check if this is a pytest-related package
544            let is_pytest_package = pytest_packages.iter().any(|pkg| filename.contains(pkg))
545                || filename.starts_with("pytest")
546                || filename.contains("_pytest");
547
548            if is_pytest_package && path.is_dir() {
549                // Skip .dist-info directories - they don't contain code
550                if filename.ends_with(".dist-info") || filename.ends_with(".egg-info") {
551                    debug!("Skipping dist-info directory: {:?}", filename);
552                    continue;
553                }
554
555                info!("Scanning pytest plugin: {:?}", path);
556                plugin_count += 1;
557                self.scan_plugin_directory(&path);
558            } else {
559                // Log packages we're skipping for debugging
560                if filename.contains("mock") {
561                    debug!("Found mock-related package (not scanning): {:?}", filename);
562                }
563            }
564        }
565
566        info!("Scanned {} pytest plugin packages", plugin_count);
567    }
568
569    fn scan_plugin_directory(&self, plugin_dir: &Path) {
570        // Recursively scan for Python files with fixtures
571        for entry in WalkDir::new(plugin_dir)
572            .max_depth(3) // Limit depth to avoid scanning too much
573            .into_iter()
574            .filter_map(|e| e.ok())
575        {
576            let path = entry.path();
577
578            if path.extension().and_then(|s| s.to_str()) == Some("py") {
579                // Only scan files that might have fixtures (not test files)
580                if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
581                    // Skip test files and __pycache__
582                    if filename.starts_with("test_") || filename.contains("__pycache__") {
583                        continue;
584                    }
585
586                    debug!("Scanning plugin file: {:?}", path);
587                    if let Ok(content) = std::fs::read_to_string(path) {
588                        self.analyze_file(path.to_path_buf(), &content);
589                    }
590                }
591            }
592        }
593    }
594}