pytest_language_server/fixtures/
scanner.rs

1//! Workspace and virtual environment scanning for fixture definitions.
2
3use super::FixtureDatabase;
4use rayon::prelude::*;
5use std::path::Path;
6use std::sync::atomic::{AtomicUsize, Ordering};
7use tracing::{debug, error, info, warn};
8use walkdir::WalkDir;
9
10impl FixtureDatabase {
11    /// Directories that should be skipped during workspace scanning.
12    /// These are typically large directories that don't contain test files.
13    const SKIP_DIRECTORIES: &'static [&'static str] = &[
14        // Version control
15        ".git",
16        ".hg",
17        ".svn",
18        // Virtual environments (scanned separately for plugins)
19        ".venv",
20        "venv",
21        "env",
22        ".env",
23        // Python caches and build artifacts
24        "__pycache__",
25        ".pytest_cache",
26        ".mypy_cache",
27        ".ruff_cache",
28        ".tox",
29        ".nox",
30        "build",
31        "dist",
32        ".eggs",
33        // JavaScript/Node
34        "node_modules",
35        "bower_components",
36        // Rust (for mixed projects)
37        "target",
38        // IDE and editor directories
39        ".idea",
40        ".vscode",
41        // Other common large directories
42        ".cache",
43        ".local",
44        "vendor",
45        "site-packages",
46    ];
47
48    /// Check if a directory should be skipped during scanning.
49    pub(crate) fn should_skip_directory(dir_name: &str) -> bool {
50        // Check exact matches
51        if Self::SKIP_DIRECTORIES.contains(&dir_name) {
52            return true;
53        }
54        // Also skip directories ending with .egg-info
55        if dir_name.ends_with(".egg-info") {
56            return true;
57        }
58        false
59    }
60
61    /// Scan a workspace directory for test files and conftest.py files.
62    pub fn scan_workspace(&self, root_path: &Path) {
63        info!("Scanning workspace: {:?}", root_path);
64
65        // Defensive check: ensure the root path exists
66        if !root_path.exists() {
67            warn!(
68                "Workspace path does not exist, skipping scan: {:?}",
69                root_path
70            );
71            return;
72        }
73
74        // Phase 1: Collect all file paths (sequential, fast)
75        let mut files_to_process: Vec<std::path::PathBuf> = Vec::new();
76        let mut skipped_dirs = 0;
77
78        // Use WalkDir with filter to skip large/irrelevant directories
79        let walker = WalkDir::new(root_path).into_iter().filter_entry(|entry| {
80            // Allow files to pass through
81            if entry.file_type().is_file() {
82                return true;
83            }
84            // For directories, check if we should skip them
85            if let Some(dir_name) = entry.file_name().to_str() {
86                !Self::should_skip_directory(dir_name)
87            } else {
88                true
89            }
90        });
91
92        for entry in walker {
93            let entry = match entry {
94                Ok(e) => e,
95                Err(err) => {
96                    // Log directory traversal errors (permission denied, etc.)
97                    if err
98                        .io_error()
99                        .is_some_and(|e| e.kind() == std::io::ErrorKind::PermissionDenied)
100                    {
101                        warn!(
102                            "Permission denied accessing path during workspace scan: {}",
103                            err
104                        );
105                    } else {
106                        debug!("Error during workspace scan: {}", err);
107                    }
108                    continue;
109                }
110            };
111
112            let path = entry.path();
113
114            // Skip files in filtered directories (shouldn't happen with filter_entry, but just in case)
115            if path.components().any(|c| {
116                c.as_os_str()
117                    .to_str()
118                    .is_some_and(Self::should_skip_directory)
119            }) {
120                skipped_dirs += 1;
121                continue;
122            }
123
124            // Look for conftest.py or test_*.py or *_test.py files
125            if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
126                if filename == "conftest.py"
127                    || filename.starts_with("test_") && filename.ends_with(".py")
128                    || filename.ends_with("_test.py")
129                {
130                    files_to_process.push(path.to_path_buf());
131                }
132            }
133        }
134
135        if skipped_dirs > 0 {
136            debug!("Skipped {} entries in filtered directories", skipped_dirs);
137        }
138
139        let total_files = files_to_process.len();
140        info!("Found {} test/conftest files to process", total_files);
141
142        // Phase 2: Process files in parallel using rayon
143        // Use analyze_file_fresh since this is initial scan (no previous definitions to clean)
144        let error_count = AtomicUsize::new(0);
145
146        files_to_process.par_iter().for_each(|path| {
147            debug!("Found test/conftest file: {:?}", path);
148            match std::fs::read_to_string(path) {
149                Ok(content) => {
150                    self.analyze_file_fresh(path.clone(), &content);
151                }
152                Err(err) => {
153                    if err.kind() == std::io::ErrorKind::PermissionDenied {
154                        warn!("Permission denied reading file: {:?}", path);
155                    } else {
156                        error!("Failed to read file {:?}: {}", path, err);
157                        error_count.fetch_add(1, Ordering::Relaxed);
158                    }
159                }
160            }
161        });
162
163        let errors = error_count.load(Ordering::Relaxed);
164        if errors > 0 {
165            warn!("Workspace scan completed with {} errors", errors);
166        }
167
168        info!("Workspace scan complete. Processed {} files", total_files);
169
170        // Also scan virtual environment for pytest plugins
171        self.scan_venv_fixtures(root_path);
172
173        info!("Total fixtures defined: {}", self.definitions.len());
174        info!("Total files with fixture usages: {}", self.usages.len());
175    }
176
177    /// Scan virtual environment for pytest plugin fixtures.
178    fn scan_venv_fixtures(&self, root_path: &Path) {
179        info!("Scanning for pytest plugins in virtual environment");
180
181        // Try to find virtual environment
182        let venv_paths = vec![
183            root_path.join(".venv"),
184            root_path.join("venv"),
185            root_path.join("env"),
186        ];
187
188        info!("Checking for venv in: {:?}", root_path);
189        for venv_path in &venv_paths {
190            debug!("Checking venv path: {:?}", venv_path);
191            if venv_path.exists() {
192                info!("Found virtual environment at: {:?}", venv_path);
193                self.scan_venv_site_packages(venv_path);
194                return;
195            } else {
196                debug!("  Does not exist: {:?}", venv_path);
197            }
198        }
199
200        // Also check for system-wide VIRTUAL_ENV
201        if let Ok(venv) = std::env::var("VIRTUAL_ENV") {
202            info!("Found VIRTUAL_ENV environment variable: {}", venv);
203            let venv_path = std::path::PathBuf::from(venv);
204            if venv_path.exists() {
205                info!("Using VIRTUAL_ENV: {:?}", venv_path);
206                self.scan_venv_site_packages(&venv_path);
207                return;
208            } else {
209                warn!("VIRTUAL_ENV path does not exist: {:?}", venv_path);
210            }
211        } else {
212            debug!("No VIRTUAL_ENV environment variable set");
213        }
214
215        warn!("No virtual environment found - third-party fixtures will not be available");
216    }
217
218    fn scan_venv_site_packages(&self, venv_path: &Path) {
219        info!("Scanning venv site-packages in: {:?}", venv_path);
220
221        // Find site-packages directory
222        let lib_path = venv_path.join("lib");
223        debug!("Checking lib path: {:?}", lib_path);
224
225        if lib_path.exists() {
226            // Look for python* directories
227            if let Ok(entries) = std::fs::read_dir(&lib_path) {
228                for entry in entries.flatten() {
229                    let path = entry.path();
230                    let dirname = path.file_name().unwrap_or_default().to_string_lossy();
231                    debug!("Found in lib: {:?}", dirname);
232
233                    if path.is_dir() && dirname.starts_with("python") {
234                        let site_packages = path.join("site-packages");
235                        debug!("Checking site-packages: {:?}", site_packages);
236
237                        if site_packages.exists() {
238                            info!("Found site-packages: {:?}", site_packages);
239                            self.scan_pytest_plugins(&site_packages);
240                            return;
241                        }
242                    }
243                }
244            }
245        }
246
247        // Try Windows path
248        let windows_site_packages = venv_path.join("Lib/site-packages");
249        debug!("Checking Windows path: {:?}", windows_site_packages);
250        if windows_site_packages.exists() {
251            info!("Found site-packages (Windows): {:?}", windows_site_packages);
252            self.scan_pytest_plugins(&windows_site_packages);
253            return;
254        }
255
256        warn!("Could not find site-packages in venv: {:?}", venv_path);
257    }
258
259    fn scan_pytest_plugins(&self, site_packages: &Path) {
260        info!("Scanning pytest plugins in: {:?}", site_packages);
261
262        // List of known pytest plugin prefixes/packages
263        let pytest_packages = vec![
264            // Existing plugins
265            "pytest_mock",
266            "pytest-mock",
267            "pytest_asyncio",
268            "pytest-asyncio",
269            "pytest_django",
270            "pytest-django",
271            "pytest_cov",
272            "pytest-cov",
273            "pytest_xdist",
274            "pytest-xdist",
275            "pytest_fixtures",
276            // Additional popular plugins
277            "pytest_flask",
278            "pytest-flask",
279            "pytest_httpx",
280            "pytest-httpx",
281            "pytest_postgresql",
282            "pytest-postgresql",
283            "pytest_mongodb",
284            "pytest-mongodb",
285            "pytest_redis",
286            "pytest-redis",
287            "pytest_elasticsearch",
288            "pytest-elasticsearch",
289            "pytest_rabbitmq",
290            "pytest-rabbitmq",
291            "pytest_mysql",
292            "pytest-mysql",
293            "pytest_docker",
294            "pytest-docker",
295            "pytest_kubernetes",
296            "pytest-kubernetes",
297            "pytest_celery",
298            "pytest-celery",
299            "pytest_tornado",
300            "pytest-tornado",
301            "pytest_aiohttp",
302            "pytest-aiohttp",
303            "pytest_sanic",
304            "pytest-sanic",
305            "pytest_fastapi",
306            "pytest-fastapi",
307            "pytest_alembic",
308            "pytest-alembic",
309            "pytest_sqlalchemy",
310            "pytest-sqlalchemy",
311            "pytest_factoryboy",
312            "pytest-factoryboy",
313            "pytest_freezegun",
314            "pytest-freezegun",
315            "pytest_mimesis",
316            "pytest-mimesis",
317            "pytest_lazy_fixture",
318            "pytest-lazy-fixture",
319            "pytest_cases",
320            "pytest-cases",
321            "pytest_bdd",
322            "pytest-bdd",
323            "pytest_benchmark",
324            "pytest-benchmark",
325            "pytest_timeout",
326            "pytest-timeout",
327            "pytest_retry",
328            "pytest-retry",
329            "pytest_repeat",
330            "pytest-repeat",
331            "pytest_rerunfailures",
332            "pytest-rerunfailures",
333            "pytest_ordering",
334            "pytest-ordering",
335            "pytest_dependency",
336            "pytest-dependency",
337            "pytest_random_order",
338            "pytest-random-order",
339            "pytest_picked",
340            "pytest-picked",
341            "pytest_testmon",
342            "pytest-testmon",
343            "pytest_split",
344            "pytest-split",
345            "pytest_env",
346            "pytest-env",
347            "pytest_dotenv",
348            "pytest-dotenv",
349            "pytest_html",
350            "pytest-html",
351            "pytest_json_report",
352            "pytest-json-report",
353            "pytest_metadata",
354            "pytest-metadata",
355            "pytest_instafail",
356            "pytest-instafail",
357            "pytest_clarity",
358            "pytest-clarity",
359            "pytest_sugar",
360            "pytest-sugar",
361            "pytest_emoji",
362            "pytest-emoji",
363            "pytest_play",
364            "pytest-play",
365            "pytest_selenium",
366            "pytest-selenium",
367            "pytest_playwright",
368            "pytest-playwright",
369            "pytest_splinter",
370            "pytest-splinter",
371        ];
372
373        let mut plugin_count = 0;
374
375        for entry in std::fs::read_dir(site_packages).into_iter().flatten() {
376            let entry = match entry {
377                Ok(e) => e,
378                Err(_) => continue,
379            };
380
381            let path = entry.path();
382            let filename = path.file_name().unwrap_or_default().to_string_lossy();
383
384            // Check if this is a pytest-related package
385            let is_pytest_package = pytest_packages.iter().any(|pkg| filename.contains(pkg))
386                || filename.starts_with("pytest")
387                || filename.contains("_pytest");
388
389            if is_pytest_package && path.is_dir() {
390                // Skip .dist-info directories - they don't contain code
391                if filename.ends_with(".dist-info") || filename.ends_with(".egg-info") {
392                    debug!("Skipping dist-info directory: {:?}", filename);
393                    continue;
394                }
395
396                info!("Scanning pytest plugin: {:?}", path);
397                plugin_count += 1;
398                self.scan_plugin_directory(&path);
399            } else {
400                // Log packages we're skipping for debugging
401                if filename.contains("mock") {
402                    debug!("Found mock-related package (not scanning): {:?}", filename);
403                }
404            }
405        }
406
407        info!("Scanned {} pytest plugin packages", plugin_count);
408    }
409
410    fn scan_plugin_directory(&self, plugin_dir: &Path) {
411        // Recursively scan for Python files with fixtures
412        for entry in WalkDir::new(plugin_dir)
413            .max_depth(3) // Limit depth to avoid scanning too much
414            .into_iter()
415            .filter_map(|e| e.ok())
416        {
417            let path = entry.path();
418
419            if path.extension().and_then(|s| s.to_str()) == Some("py") {
420                // Only scan files that might have fixtures (not test files)
421                if let Some(filename) = path.file_name().and_then(|n| n.to_str()) {
422                    // Skip test files and __pycache__
423                    if filename.starts_with("test_") || filename.contains("__pycache__") {
424                        continue;
425                    }
426
427                    debug!("Scanning plugin file: {:?}", path);
428                    if let Ok(content) = std::fs::read_to_string(path) {
429                        self.analyze_file(path.to_path_buf(), &content);
430                    }
431                }
432            }
433        }
434    }
435}