pytest_language_server/fixtures/
scanner.rs

1//! Workspace and virtual environment scanning for fixture definitions.
2
3use super::FixtureDatabase;
4use glob::Pattern;
5use rayon::prelude::*;
6use std::path::Path;
7use std::sync::atomic::{AtomicUsize, Ordering};
8use tracing::{debug, error, info, warn};
9use walkdir::WalkDir;
10
11impl FixtureDatabase {
12    /// Directories that should be skipped during workspace scanning.
13    /// These are typically large directories that don't contain test files.
14    const SKIP_DIRECTORIES: &'static [&'static str] = &[
15        // Version control
16        ".git",
17        ".hg",
18        ".svn",
19        // Virtual environments (scanned separately for plugins)
20        ".venv",
21        "venv",
22        "env",
23        ".env",
24        // Python caches and build artifacts
25        "__pycache__",
26        ".pytest_cache",
27        ".mypy_cache",
28        ".ruff_cache",
29        ".tox",
30        ".nox",
31        "build",
32        "dist",
33        ".eggs",
34        // JavaScript/Node
35        "node_modules",
36        "bower_components",
37        // Rust (for mixed projects)
38        "target",
39        // IDE and editor directories
40        ".idea",
41        ".vscode",
42        // Other common large directories
43        ".cache",
44        ".local",
45        "vendor",
46        "site-packages",
47    ];
48
49    /// Check if a directory should be skipped during scanning.
50    pub(crate) fn should_skip_directory(dir_name: &str) -> bool {
51        // Check exact matches
52        if Self::SKIP_DIRECTORIES.contains(&dir_name) {
53            return true;
54        }
55        // Also skip directories ending with .egg-info
56        if dir_name.ends_with(".egg-info") {
57            return true;
58        }
59        false
60    }
61
62    /// Scan a workspace directory for test files and conftest.py files.
63    /// Optionally accepts exclude patterns from configuration.
64    pub fn scan_workspace(&self, root_path: &Path) {
65        self.scan_workspace_with_excludes(root_path, &[]);
66    }
67
68    /// Scan a workspace directory with custom exclude patterns.
69    pub fn scan_workspace_with_excludes(&self, root_path: &Path, exclude_patterns: &[Pattern]) {
70        info!("Scanning workspace: {:?}", root_path);
71
72        // Defensive check: ensure the root path exists
73        if !root_path.exists() {
74            warn!(
75                "Workspace path does not exist, skipping scan: {:?}",
76                root_path
77            );
78            return;
79        }
80
81        // Phase 1: Collect all file paths (sequential, fast)
82        let mut files_to_process: Vec<std::path::PathBuf> = Vec::new();
83        let mut skipped_dirs = 0;
84
85        // Use WalkDir with filter to skip large/irrelevant directories
86        let walker = WalkDir::new(root_path).into_iter().filter_entry(|entry| {
87            // Allow files to pass through
88            if entry.file_type().is_file() {
89                return true;
90            }
91            // For directories, check if we should skip them
92            if let Some(dir_name) = entry.file_name().to_str() {
93                !Self::should_skip_directory(dir_name)
94            } else {
95                true
96            }
97        });
98
99        for entry in walker {
100            let entry = match entry {
101                Ok(e) => e,
102                Err(err) => {
103                    // Log directory traversal errors (permission denied, etc.)
104                    if err
105                        .io_error()
106                        .is_some_and(|e| e.kind() == std::io::ErrorKind::PermissionDenied)
107                    {
108                        warn!(
109                            "Permission denied accessing path during workspace scan: {}",
110                            err
111                        );
112                    } else {
113                        debug!("Error during workspace scan: {}", err);
114                    }
115                    continue;
116                }
117            };
118
119            let path = entry.path();
120
121            // Skip files in filtered directories (shouldn't happen with filter_entry, but just in case)
122            if path.components().any(|c| {
123                c.as_os_str()
124                    .to_str()
125                    .is_some_and(Self::should_skip_directory)
126            }) {
127                skipped_dirs += 1;
128                continue;
129            }
130
131            // Skip files matching user-configured exclude patterns
132            // Patterns are matched against paths relative to workspace root
133            if !exclude_patterns.is_empty() {
134                if let Ok(relative_path) = path.strip_prefix(root_path) {
135                    let relative_str = relative_path.to_string_lossy();
136                    if exclude_patterns.iter().any(|p| p.matches(&relative_str)) {
137                        debug!("Skipping excluded path: {:?}", path);
138                        continue;
139                    }
140                }
141            }
142
143            // Look for conftest.py or test_*.py or *_test.py files
144            if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
145                if filename == "conftest.py"
146                    || filename.starts_with("test_") && filename.ends_with(".py")
147                    || filename.ends_with("_test.py")
148                {
149                    files_to_process.push(path.to_path_buf());
150                }
151            }
152        }
153
154        if skipped_dirs > 0 {
155            debug!("Skipped {} entries in filtered directories", skipped_dirs);
156        }
157
158        let total_files = files_to_process.len();
159        info!("Found {} test/conftest files to process", total_files);
160
161        // Phase 2: Process files in parallel using rayon
162        // Use analyze_file_fresh since this is initial scan (no previous definitions to clean)
163        let error_count = AtomicUsize::new(0);
164
165        files_to_process.par_iter().for_each(|path| {
166            debug!("Found test/conftest file: {:?}", path);
167            match std::fs::read_to_string(path) {
168                Ok(content) => {
169                    self.analyze_file_fresh(path.clone(), &content);
170                }
171                Err(err) => {
172                    if err.kind() == std::io::ErrorKind::PermissionDenied {
173                        warn!("Permission denied reading file: {:?}", path);
174                    } else {
175                        error!("Failed to read file {:?}: {}", path, err);
176                        error_count.fetch_add(1, Ordering::Relaxed);
177                    }
178                }
179            }
180        });
181
182        let errors = error_count.load(Ordering::Relaxed);
183        if errors > 0 {
184            warn!("Workspace scan completed with {} errors", errors);
185        }
186
187        info!("Workspace scan complete. Processed {} files", total_files);
188
189        // Also scan virtual environment for pytest plugins
190        self.scan_venv_fixtures(root_path);
191
192        info!("Total fixtures defined: {}", self.definitions.len());
193        info!("Total files with fixture usages: {}", self.usages.len());
194    }
195
196    /// Scan virtual environment for pytest plugin fixtures.
197    fn scan_venv_fixtures(&self, root_path: &Path) {
198        info!("Scanning for pytest plugins in virtual environment");
199
200        // Try to find virtual environment
201        let venv_paths = vec![
202            root_path.join(".venv"),
203            root_path.join("venv"),
204            root_path.join("env"),
205        ];
206
207        info!("Checking for venv in: {:?}", root_path);
208        for venv_path in &venv_paths {
209            debug!("Checking venv path: {:?}", venv_path);
210            if venv_path.exists() {
211                info!("Found virtual environment at: {:?}", venv_path);
212                self.scan_venv_site_packages(venv_path);
213                return;
214            } else {
215                debug!("  Does not exist: {:?}", venv_path);
216            }
217        }
218
219        // Also check for system-wide VIRTUAL_ENV
220        if let Ok(venv) = std::env::var("VIRTUAL_ENV") {
221            info!("Found VIRTUAL_ENV environment variable: {}", venv);
222            let venv_path = std::path::PathBuf::from(venv);
223            if venv_path.exists() {
224                info!("Using VIRTUAL_ENV: {:?}", venv_path);
225                self.scan_venv_site_packages(&venv_path);
226                return;
227            } else {
228                warn!("VIRTUAL_ENV path does not exist: {:?}", venv_path);
229            }
230        } else {
231            debug!("No VIRTUAL_ENV environment variable set");
232        }
233
234        warn!("No virtual environment found - third-party fixtures will not be available");
235    }
236
237    fn scan_venv_site_packages(&self, venv_path: &Path) {
238        info!("Scanning venv site-packages in: {:?}", venv_path);
239
240        // Find site-packages directory
241        let lib_path = venv_path.join("lib");
242        debug!("Checking lib path: {:?}", lib_path);
243
244        if lib_path.exists() {
245            // Look for python* directories
246            if let Ok(entries) = std::fs::read_dir(&lib_path) {
247                for entry in entries.flatten() {
248                    let path = entry.path();
249                    let dirname = path.file_name().unwrap_or_default().to_string_lossy();
250                    debug!("Found in lib: {:?}", dirname);
251
252                    if path.is_dir() && dirname.starts_with("python") {
253                        let site_packages = path.join("site-packages");
254                        debug!("Checking site-packages: {:?}", site_packages);
255
256                        if site_packages.exists() {
257                            info!("Found site-packages: {:?}", site_packages);
258                            self.scan_pytest_plugins(&site_packages);
259                            return;
260                        }
261                    }
262                }
263            }
264        }
265
266        // Try Windows path
267        let windows_site_packages = venv_path.join("Lib/site-packages");
268        debug!("Checking Windows path: {:?}", windows_site_packages);
269        if windows_site_packages.exists() {
270            info!("Found site-packages (Windows): {:?}", windows_site_packages);
271            self.scan_pytest_plugins(&windows_site_packages);
272            return;
273        }
274
275        warn!("Could not find site-packages in venv: {:?}", venv_path);
276    }
277
278    fn scan_pytest_plugins(&self, site_packages: &Path) {
279        info!("Scanning pytest plugins in: {:?}", site_packages);
280
281        // List of known pytest plugin prefixes/packages
282        let pytest_packages = vec![
283            // Existing plugins
284            "pytest_mock",
285            "pytest-mock",
286            "pytest_asyncio",
287            "pytest-asyncio",
288            "pytest_django",
289            "pytest-django",
290            "pytest_cov",
291            "pytest-cov",
292            "pytest_xdist",
293            "pytest-xdist",
294            "pytest_fixtures",
295            // Additional popular plugins
296            "pytest_flask",
297            "pytest-flask",
298            "pytest_httpx",
299            "pytest-httpx",
300            "pytest_postgresql",
301            "pytest-postgresql",
302            "pytest_mongodb",
303            "pytest-mongodb",
304            "pytest_redis",
305            "pytest-redis",
306            "pytest_elasticsearch",
307            "pytest-elasticsearch",
308            "pytest_rabbitmq",
309            "pytest-rabbitmq",
310            "pytest_mysql",
311            "pytest-mysql",
312            "pytest_docker",
313            "pytest-docker",
314            "pytest_kubernetes",
315            "pytest-kubernetes",
316            "pytest_celery",
317            "pytest-celery",
318            "pytest_tornado",
319            "pytest-tornado",
320            "pytest_aiohttp",
321            "pytest-aiohttp",
322            "pytest_sanic",
323            "pytest-sanic",
324            "pytest_fastapi",
325            "pytest-fastapi",
326            "pytest_alembic",
327            "pytest-alembic",
328            "pytest_sqlalchemy",
329            "pytest-sqlalchemy",
330            "pytest_factoryboy",
331            "pytest-factoryboy",
332            "pytest_freezegun",
333            "pytest-freezegun",
334            "pytest_mimesis",
335            "pytest-mimesis",
336            "pytest_lazy_fixture",
337            "pytest-lazy-fixture",
338            "pytest_cases",
339            "pytest-cases",
340            "pytest_bdd",
341            "pytest-bdd",
342            "pytest_benchmark",
343            "pytest-benchmark",
344            "pytest_timeout",
345            "pytest-timeout",
346            "pytest_retry",
347            "pytest-retry",
348            "pytest_repeat",
349            "pytest-repeat",
350            "pytest_rerunfailures",
351            "pytest-rerunfailures",
352            "pytest_ordering",
353            "pytest-ordering",
354            "pytest_dependency",
355            "pytest-dependency",
356            "pytest_random_order",
357            "pytest-random-order",
358            "pytest_picked",
359            "pytest-picked",
360            "pytest_testmon",
361            "pytest-testmon",
362            "pytest_split",
363            "pytest-split",
364            "pytest_env",
365            "pytest-env",
366            "pytest_dotenv",
367            "pytest-dotenv",
368            "pytest_html",
369            "pytest-html",
370            "pytest_json_report",
371            "pytest-json-report",
372            "pytest_metadata",
373            "pytest-metadata",
374            "pytest_instafail",
375            "pytest-instafail",
376            "pytest_clarity",
377            "pytest-clarity",
378            "pytest_sugar",
379            "pytest-sugar",
380            "pytest_emoji",
381            "pytest-emoji",
382            "pytest_play",
383            "pytest-play",
384            "pytest_selenium",
385            "pytest-selenium",
386            "pytest_playwright",
387            "pytest-playwright",
388            "pytest_splinter",
389            "pytest-splinter",
390        ];
391
392        let mut plugin_count = 0;
393
394        for entry in std::fs::read_dir(site_packages).into_iter().flatten() {
395            let entry = match entry {
396                Ok(e) => e,
397                Err(_) => continue,
398            };
399
400            let path = entry.path();
401            let filename = path.file_name().unwrap_or_default().to_string_lossy();
402
403            // Check if this is a pytest-related package
404            let is_pytest_package = pytest_packages.iter().any(|pkg| filename.contains(pkg))
405                || filename.starts_with("pytest")
406                || filename.contains("_pytest");
407
408            if is_pytest_package && path.is_dir() {
409                // Skip .dist-info directories - they don't contain code
410                if filename.ends_with(".dist-info") || filename.ends_with(".egg-info") {
411                    debug!("Skipping dist-info directory: {:?}", filename);
412                    continue;
413                }
414
415                info!("Scanning pytest plugin: {:?}", path);
416                plugin_count += 1;
417                self.scan_plugin_directory(&path);
418            } else {
419                // Log packages we're skipping for debugging
420                if filename.contains("mock") {
421                    debug!("Found mock-related package (not scanning): {:?}", filename);
422                }
423            }
424        }
425
426        info!("Scanned {} pytest plugin packages", plugin_count);
427    }
428
429    fn scan_plugin_directory(&self, plugin_dir: &Path) {
430        // Recursively scan for Python files with fixtures
431        for entry in WalkDir::new(plugin_dir)
432            .max_depth(3) // Limit depth to avoid scanning too much
433            .into_iter()
434            .filter_map(|e| e.ok())
435        {
436            let path = entry.path();
437
438            if path.extension().and_then(|s| s.to_str()) == Some("py") {
439                // Only scan files that might have fixtures (not test files)
440                if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
441                    // Skip test files and __pycache__
442                    if filename.starts_with("test_") || filename.contains("__pycache__") {
443                        continue;
444                    }
445
446                    debug!("Scanning plugin file: {:?}", path);
447                    if let Ok(content) = std::fs::read_to_string(path) {
448                        self.analyze_file(path.to_path_buf(), &content);
449                    }
450                }
451            }
452        }
453    }
454}