Skip to main content

clean_dev_dirs/
scanner.rs

1//! Directory scanning and project detection functionality.
2//!
3//! This module provides the core scanning logic that traverses directory trees
4//! to find development projects and their build artifacts. It supports parallel
5//! processing for improved performance and handles various error conditions
6//! gracefully.
7
8use std::{
9    fs,
10    path::Path,
11    sync::{Arc, Mutex},
12};
13
14use colored::Colorize;
15use indicatif::{ProgressBar, ProgressStyle};
16use rayon::prelude::*;
17use serde_json::{Value, from_str};
18use walkdir::{DirEntry, WalkDir};
19
20use crate::{
21    config::{ProjectFilter, ScanOptions},
22    project::{BuildArtifacts, Project, ProjectType},
23};
24
25/// Directory scanner for detecting development projects.
26///
27/// The `Scanner` struct encapsulates the logic for traversing directory trees
28/// and identifying development projects (Rust and Node.js) along with their
29/// build artifacts. It supports configurable filtering and parallel processing
30/// for efficient scanning of large directory structures.
31pub struct Scanner {
32    /// Configuration options for scanning behavior
33    scan_options: ScanOptions,
34
35    /// Filter to restrict scanning to specific project types
36    project_filter: ProjectFilter,
37
38    /// When `true`, suppresses progress spinner output (used by `--json` mode).
39    quiet: bool,
40}
41
42impl Scanner {
43    /// Create a new scanner with the specified options.
44    ///
45    /// # Arguments
46    ///
47    /// * `scan_options` - Configuration for scanning behavior (threads, verbosity, etc.)
48    /// * `project_filter` - Filter to restrict scanning to specific project types
49    ///
50    /// # Returns
51    ///
52    /// A new `Scanner` instance configured with the provided options.
53    ///
54    /// # Examples
55    ///
56    /// ```
57    /// # use crate::{Scanner, ScanOptions, ProjectFilter};
58    /// let scan_options = ScanOptions {
59    ///     verbose: true,
60    ///     threads: 4,
61    ///     skip: vec![],
62    /// };
63    ///
64    /// let scanner = Scanner::new(scan_options, ProjectFilter::All);
65    /// ```
66    #[must_use]
67    pub const fn new(scan_options: ScanOptions, project_filter: ProjectFilter) -> Self {
68        Self {
69            scan_options,
70            project_filter,
71            quiet: false,
72        }
73    }
74
75    /// Enable or disable quiet mode (suppresses progress spinner).
76    ///
77    /// When quiet mode is active the scanning spinner is hidden, which is
78    /// required for `--json` output so that only the final JSON is printed.
79    #[must_use]
80    pub const fn with_quiet(mut self, quiet: bool) -> Self {
81        self.quiet = quiet;
82        self
83    }
84
85    /// Scan a directory tree for development projects.
86    ///
87    /// This method performs a recursive scan of the specified directory to find
88    /// development projects. It operates in two phases:
89    /// 1. Directory traversal to identify potential projects
90    /// 2. Parallel size calculation for build directories
91    ///
92    /// # Arguments
93    ///
94    /// * `root` - The root directory to start scanning from
95    ///
96    /// # Returns
97    ///
98    /// A vector of `Project` instances representing all detected projects with
99    /// non-zero build directory sizes.
100    ///
101    /// # Panics
102    ///
103    /// This method may panic if the progress bar template string is invalid,
104    /// though this should not occur under normal circumstances as the template
105    /// is hardcoded and valid.
106    ///
107    /// # Examples
108    ///
109    /// ```
110    /// # use std::path::Path;
111    /// # use crate::Scanner;
112    /// let projects = scanner.scan_directory(Path::new("/path/to/projects"));
113    /// println!("Found {} projects", projects.len());
114    /// ```
115    ///
116    /// # Performance
117    ///
118    /// This method uses parallel processing for both directory traversal and
119    /// size calculation to maximize performance on systems with multiple cores
120    /// and fast storage.
121    pub fn scan_directory(&self, root: &Path) -> Vec<Project> {
122        let errors = Arc::new(Mutex::new(Vec::<String>::new()));
123
124        let progress = if self.quiet {
125            ProgressBar::hidden()
126        } else {
127            let pb = ProgressBar::new_spinner();
128            pb.set_style(
129                ProgressStyle::default_spinner()
130                    .template("{spinner:.green} {msg}")
131                    .unwrap(),
132            );
133            pb.set_message("Scanning directories...");
134            pb
135        };
136
137        // Find all potential project directories
138        let potential_projects: Vec<_> = WalkDir::new(root)
139            .into_iter()
140            .filter_map(Result::ok)
141            .filter(|entry| self.should_scan_entry(entry))
142            .collect::<Vec<_>>()
143            .into_par_iter()
144            .filter_map(|entry| self.detect_project(&entry, &errors))
145            .collect();
146
147        progress.finish_with_message("✅ Directory scan complete");
148
149        // Process projects in parallel to calculate sizes
150        let projects_with_sizes: Vec<_> = potential_projects
151            .into_par_iter()
152            .filter_map(|mut project| {
153                let size = self.calculate_build_dir_size(&project.build_arts.path);
154                project.build_arts.size = size;
155
156                if size > 0 { Some(project) } else { None }
157            })
158            .collect();
159
160        // Print errors if verbose
161        if self.scan_options.verbose {
162            let errors = errors.lock().unwrap();
163            for error in errors.iter() {
164                eprintln!("{}", error.red());
165            }
166        }
167
168        projects_with_sizes
169    }
170
171    /// Calculate the total size of a build directory.
172    ///
173    /// This method recursively traverses the specified directory and sums up
174    /// the sizes of all files contained within it. It handles errors gracefully
175    /// and optionally reports them in verbose mode.
176    ///
177    /// # Arguments
178    ///
179    /// * `path` - Path to the build directory to measure
180    ///
181    /// # Returns
182    ///
183    /// The total size of all files in the directory, in bytes. Returns 0 if
184    /// the directory doesn't exist or cannot be accessed.
185    ///
186    /// # Performance
187    ///
188    /// This method can be CPU and I/O intensive for large directories with
189    /// many files. It's designed to be called in parallel for multiple
190    /// directories to maximize throughput.
191    fn calculate_build_dir_size(&self, path: &Path) -> u64 {
192        if !path.exists() {
193            return 0;
194        }
195
196        let mut total_size = 0u64;
197
198        for entry in WalkDir::new(path) {
199            match entry {
200                Ok(entry) => {
201                    if entry.file_type().is_file()
202                        && let Ok(metadata) = entry.metadata()
203                    {
204                        total_size += metadata.len();
205                    }
206                }
207                Err(e) => {
208                    if self.scan_options.verbose {
209                        eprintln!("Warning: {e}");
210                    }
211                }
212            }
213        }
214
215        total_size
216    }
217
218    /// Detect a Node.js project in the specified directory.
219    ///
220    /// This method checks for the presence of both `package.json` and `node_modules/`
221    /// directory to identify a Node.js project. If found, it attempts to extract
222    /// the project name from the `package.json` file.
223    ///
224    /// # Arguments
225    ///
226    /// * `path` - Directory path to check for Node.js project
227    /// * `errors` - Shared error collection for reporting parsing issues
228    ///
229    /// # Returns
230    ///
231    /// - `Some(Project)` if a valid Node.js project is detected
232    /// - `None` if the directory doesn't contain a Node.js project
233    ///
234    /// # Detection Criteria
235    ///
236    /// 1. `package.json` file exists in directory
237    /// 2. `node_modules/` subdirectory exists in directory
238    /// 3. The project name is extracted from `package.json` if possible
239    fn detect_node_project(
240        &self,
241        path: &Path,
242        errors: &Arc<Mutex<Vec<String>>>,
243    ) -> Option<Project> {
244        let package_json = path.join("package.json");
245        let node_modules = path.join("node_modules");
246
247        if package_json.exists() && node_modules.exists() {
248            let name = self.extract_node_project_name(&package_json, errors);
249
250            let build_arts = BuildArtifacts {
251                path: path.join("node_modules"),
252                size: 0, // Will be calculated later
253            };
254
255            return Some(Project::new(
256                ProjectType::Node,
257                path.to_path_buf(),
258                build_arts,
259                name,
260            ));
261        }
262
263        None
264    }
265
266    /// Detect if a directory entry represents a development project.
267    ///
268    /// This method examines a directory entry and determines if it contains
269    /// a development project based on the presence of characteristic files
270    /// and directories. It respects the project filter settings.
271    ///
272    /// # Arguments
273    ///
274    /// * `entry` - The directory entry to examine
275    /// * `errors` - Shared error collection for reporting issues
276    ///
277    /// # Returns
278    ///
279    /// - `Some(Project)` if a valid project is detected
280    /// - `None` if no project is found or the entry doesn't match filters
281    ///
282    /// # Project Detection Logic
283    ///
284    /// - **Rust projects**: Presence of both `Cargo.toml` and `target/` directory
285    /// - **Node.js projects**: Presence of both `package.json` and `node_modules/` directory
286    /// - **Python projects**: Presence of configuration files and cache directories
287    /// - **Go projects**: Presence of both `go.mod` and `vendor/` directory
288    /// - **Java/Kotlin projects**: Presence of `pom.xml` or `build.gradle` with `target/` or `build/`
289    /// - **C/C++ projects**: Presence of `CMakeLists.txt` or `Makefile` with `build/`
290    /// - **Swift projects**: Presence of `Package.swift` with `.build/`
291    /// - **.NET/C# projects**: Presence of `.csproj` files with `bin/` or `obj/`
292    fn detect_project(
293        &self,
294        entry: &DirEntry,
295        errors: &Arc<Mutex<Vec<String>>>,
296    ) -> Option<Project> {
297        let path = entry.path();
298
299        if !entry.file_type().is_dir() {
300            return None;
301        }
302
303        // Detectors are tried in order; the first match wins.
304        // More specific ecosystems are checked before more generic ones
305        // (e.g. Java before C/C++, since both can use `build/`).
306        self.try_detect(ProjectFilter::Rust, || {
307            self.detect_rust_project(path, errors)
308        })
309        .or_else(|| {
310            self.try_detect(ProjectFilter::Node, || {
311                self.detect_node_project(path, errors)
312            })
313        })
314        .or_else(|| {
315            self.try_detect(ProjectFilter::Java, || {
316                self.detect_java_project(path, errors)
317            })
318        })
319        .or_else(|| {
320            self.try_detect(ProjectFilter::Swift, || {
321                self.detect_swift_project(path, errors)
322            })
323        })
324        .or_else(|| self.try_detect(ProjectFilter::DotNet, || Self::detect_dotnet_project(path)))
325        .or_else(|| {
326            self.try_detect(ProjectFilter::Python, || {
327                self.detect_python_project(path, errors)
328            })
329        })
330        .or_else(|| self.try_detect(ProjectFilter::Go, || self.detect_go_project(path, errors)))
331        .or_else(|| self.try_detect(ProjectFilter::Cpp, || self.detect_cpp_project(path, errors)))
332    }
333
334    /// Run a detector only if the current project filter allows it.
335    ///
336    /// Returns `None` immediately (without calling `detect`) when the
337    /// active filter doesn't include `filter`.
338    fn try_detect(
339        &self,
340        filter: ProjectFilter,
341        detect: impl FnOnce() -> Option<Project>,
342    ) -> Option<Project> {
343        if self.project_filter == ProjectFilter::All || self.project_filter == filter {
344            detect()
345        } else {
346            None
347        }
348    }
349
350    /// Detect a Rust project in the specified directory.
351    ///
352    /// This method checks for the presence of both `Cargo.toml` and `target/`
353    /// directory to identify a Rust project. If found, it attempts to extract
354    /// the project name from the `Cargo.toml` file.
355    ///
356    /// # Arguments
357    ///
358    /// * `path` - Directory path to check for a Rust project
359    /// * `errors` - Shared error collection for reporting parsing issues
360    ///
361    /// # Returns
362    ///
363    /// - `Some(Project)` if a valid Rust project is detected
364    /// - `None` if the directory doesn't contain a Rust project
365    ///
366    /// # Detection Criteria
367    ///
368    /// 1. `Cargo.toml` file exists in directory
369    /// 2. `target/` subdirectory exists in directory
370    /// 3. The project name is extracted from `Cargo.toml` if possible
371    fn detect_rust_project(
372        &self,
373        path: &Path,
374        errors: &Arc<Mutex<Vec<String>>>,
375    ) -> Option<Project> {
376        let cargo_toml = path.join("Cargo.toml");
377        let target_dir = path.join("target");
378
379        if cargo_toml.exists() && target_dir.exists() {
380            let name = self.extract_rust_project_name(&cargo_toml, errors);
381
382            let build_arts = BuildArtifacts {
383                path: path.join("target"),
384                size: 0, // Will be calculated later
385            };
386
387            return Some(Project::new(
388                ProjectType::Rust,
389                path.to_path_buf(),
390                build_arts,
391                name,
392            ));
393        }
394
395        None
396    }
397
398    /// Extract the project name from a Cargo.toml file.
399    ///
400    /// This method performs simple TOML parsing to extract the project name
401    /// from a Rust project's `Cargo.toml` file. It uses a line-by-line approach
402    /// rather than a full TOML parser for simplicity and performance.
403    ///
404    /// # Arguments
405    ///
406    /// * `cargo_toml` - Path to the Cargo.toml file
407    /// * `errors` - Shared error collection for reporting parsing issues
408    ///
409    /// # Returns
410    ///
411    /// - `Some(String)` containing the project name if successfully extracted
412    /// - `None` if the name cannot be found or parsed
413    ///
414    /// # Parsing Strategy
415    ///
416    /// The method looks for lines matching the pattern `name = "project_name"`
417    /// and extracts the quoted string value. This trivial approach handles
418    /// most common cases without requiring a full TOML parser.
419    fn extract_rust_project_name(
420        &self,
421        cargo_toml: &Path,
422        errors: &Arc<Mutex<Vec<String>>>,
423    ) -> Option<String> {
424        let content = self.read_file_content(cargo_toml, errors)?;
425        Self::parse_toml_name_field(&content)
426    }
427
428    /// Extract a quoted string value from a line.
429    fn extract_quoted_value(line: &str) -> Option<String> {
430        let start = line.find('"')?;
431        let end = line.rfind('"')?;
432
433        if start == end {
434            return None;
435        }
436
437        Some(line[start + 1..end].to_string())
438    }
439
440    /// Extract the name from a single TOML line if it contains a name field.
441    fn extract_name_from_line(line: &str) -> Option<String> {
442        if !Self::is_name_line(line) {
443            return None;
444        }
445
446        Self::extract_quoted_value(line)
447    }
448
449    /// Extract the project name from a package.json file.
450    ///
451    /// This method parses a Node.js project's `package.json` file to extract
452    /// the project name. It uses full JSON parsing to handle the file format
453    /// correctly and safely.
454    ///
455    /// # Arguments
456    ///
457    /// * `package_json` - Path to the package.json file
458    /// * `errors` - Shared error collection for reporting parsing issues
459    ///
460    /// # Returns
461    ///
462    /// - `Some(String)` containing the project name if successfully extracted
463    /// - `None` if the name cannot be found, parsed, or the file is invalid
464    ///
465    /// # Error Handling
466    ///
467    /// This method handles both file I/O errors and JSON parsing errors gracefully.
468    /// Errors are optionally reported to the shared error collection in verbose mode.
469    fn extract_node_project_name(
470        &self,
471        package_json: &Path,
472        errors: &Arc<Mutex<Vec<String>>>,
473    ) -> Option<String> {
474        match fs::read_to_string(package_json) {
475            Ok(content) => match from_str::<Value>(&content) {
476                Ok(json) => json
477                    .get("name")
478                    .and_then(|v| v.as_str())
479                    .map(std::string::ToString::to_string),
480                Err(e) => {
481                    if self.scan_options.verbose {
482                        errors
483                            .lock()
484                            .unwrap()
485                            .push(format!("Error parsing {}: {e}", package_json.display()));
486                    }
487                    None
488                }
489            },
490            Err(e) => {
491                if self.scan_options.verbose {
492                    errors
493                        .lock()
494                        .unwrap()
495                        .push(format!("Error reading {}: {e}", package_json.display()));
496                }
497                None
498            }
499        }
500    }
501
502    /// Check if a line contains a name field assignment.
503    fn is_name_line(line: &str) -> bool {
504        line.starts_with("name") && line.contains('=')
505    }
506
507    /// Log a file reading error if verbose mode is enabled.
508    fn log_file_error(
509        &self,
510        file_path: &Path,
511        error: &std::io::Error,
512        errors: &Arc<Mutex<Vec<String>>>,
513    ) {
514        if self.scan_options.verbose {
515            errors
516                .lock()
517                .unwrap()
518                .push(format!("Error reading {}: {error}", file_path.display()));
519        }
520    }
521
522    /// Parse the name field from TOML content.
523    fn parse_toml_name_field(content: &str) -> Option<String> {
524        for line in content.lines() {
525            if let Some(name) = Self::extract_name_from_line(line.trim()) {
526                return Some(name);
527            }
528        }
529        None
530    }
531
532    /// Read the content of a file and handle errors appropriately.
533    fn read_file_content(
534        &self,
535        file_path: &Path,
536        errors: &Arc<Mutex<Vec<String>>>,
537    ) -> Option<String> {
538        match fs::read_to_string(file_path) {
539            Ok(content) => Some(content),
540            Err(e) => {
541                self.log_file_error(file_path, &e, errors);
542                None
543            }
544        }
545    }
546
547    /// Determine if a directory entry should be scanned for projects.
548    ///
549    /// This method implements the filtering logic to decide whether a directory
550    /// should be traversed during the scanning process. It applies various
551    /// exclusion rules to improve performance and avoid scanning irrelevant
552    /// directories.
553    ///
554    /// # Arguments
555    ///
556    /// * `entry` - The directory entry to evaluate
557    ///
558    /// # Returns
559    ///
560    /// - `true` if the directory should be scanned
561    /// - `false` if the directory should be skipped
562    ///
563    /// # Exclusion Rules
564    ///
565    /// The following directories are excluded from scanning:
566    /// - Directories in the user-specified skip list
567    /// - Any directory inside a `node_modules/` directory (to avoid deep nesting)
568    /// - Hidden directories (starting with `.`) except `.cargo`
569    /// - Common build/temporary directories: `target`, `build`, `dist`, `out`, etc.
570    /// - Version control directories: `.git`, `.svn`, `.hg`
571    /// - Python cache and virtual environment directories
572    /// - Temporary directories: `temp`, `tmp`
573    /// - Go vendor directory
574    /// - Python pytest cache
575    /// - Python tox environments
576    /// - Python setuptools
577    /// - Python coverage files
578    /// - Node.js modules (already handled above but added for completeness)
579    /// - .NET `obj/` directory
580    fn should_scan_entry(&self, entry: &DirEntry) -> bool {
581        let path = entry.path();
582
583        // Early return if path is in skip list
584        if self.is_path_in_skip_list(path) {
585            return false;
586        }
587
588        // Skip any directory inside a node_modules directory
589        if path
590            .ancestors()
591            .any(|ancestor| ancestor.file_name().and_then(|n| n.to_str()) == Some("node_modules"))
592        {
593            return false;
594        }
595
596        // Skip hidden directories (except .cargo for Rust)
597        if Self::is_hidden_directory_to_skip(path) {
598            return false;
599        }
600
601        // Skip common non-project directories
602        !Self::is_excluded_directory(path)
603    }
604
605    /// Check if a path is in the skip list
606    fn is_path_in_skip_list(&self, path: &Path) -> bool {
607        self.scan_options.skip.iter().any(|skip| {
608            path.components().any(|component| {
609                component
610                    .as_os_str()
611                    .to_str()
612                    .is_some_and(|name| name == skip.to_string_lossy())
613            })
614        })
615    }
616
617    /// Check if directory is hidden and should be skipped
618    fn is_hidden_directory_to_skip(path: &Path) -> bool {
619        path.file_name()
620            .and_then(|n| n.to_str())
621            .is_some_and(|name| name.starts_with('.') && name != ".cargo")
622    }
623
624    /// Check if directory is in the excluded list
625    fn is_excluded_directory(path: &Path) -> bool {
626        let excluded_dirs = [
627            "target",
628            "build",
629            "dist",
630            "out",
631            ".git",
632            ".svn",
633            ".hg",
634            "__pycache__",
635            "venv",
636            ".venv",
637            "env",
638            ".env",
639            "temp",
640            "tmp",
641            "vendor",
642            ".pytest_cache",
643            ".tox",
644            ".eggs",
645            ".coverage",
646            "node_modules",
647            "obj",
648        ];
649
650        path.file_name()
651            .and_then(|n| n.to_str())
652            .is_some_and(|name| excluded_dirs.contains(&name))
653    }
654
655    /// Detect a Python project in the specified directory.
656    ///
657    /// This method checks for Python configuration files and associated cache directories.
658    /// It looks for multiple build artifacts that can be cleaned.
659    ///
660    /// # Arguments
661    ///
662    /// * `path` - Directory path to check for a Python project
663    /// * `errors` - Shared error collection for reporting parsing issues
664    ///
665    /// # Returns
666    ///
667    /// - `Some(Project)` if a valid Python project is detected
668    /// - `None` if the directory doesn't contain a Python project
669    ///
670    /// # Detection Criteria
671    ///
672    /// A Python project is identified by having:
673    /// 1. At least one of: requirements.txt, setup.py, pyproject.toml, setup.cfg, Pipfile
674    /// 2. At least one of the cache/build directories: `__pycache__`, `.pytest_cache`, venv, .venv, build, dist, .eggs
675    fn detect_python_project(
676        &self,
677        path: &Path,
678        errors: &Arc<Mutex<Vec<String>>>,
679    ) -> Option<Project> {
680        let config_files = [
681            "requirements.txt",
682            "setup.py",
683            "pyproject.toml",
684            "setup.cfg",
685            "Pipfile",
686            "pipenv.lock",
687            "poetry.lock",
688        ];
689
690        let build_dirs = [
691            "__pycache__",
692            ".pytest_cache",
693            "venv",
694            ".venv",
695            "build",
696            "dist",
697            ".eggs",
698            ".tox",
699            ".coverage",
700        ];
701
702        // Check if any config file exists
703        let has_config = config_files.iter().any(|&file| path.join(file).exists());
704
705        if !has_config {
706            return None;
707        }
708
709        // Find the largest cache/build directory that exists
710        let mut largest_build_dir = None;
711        let mut largest_size = 0;
712
713        for &dir_name in &build_dirs {
714            let dir_path = path.join(dir_name);
715
716            if dir_path.exists()
717                && dir_path.is_dir()
718                && let Ok(size) = Self::calculate_directory_size(&dir_path)
719                && size > largest_size
720            {
721                largest_size = size;
722                largest_build_dir = Some(dir_path);
723            }
724        }
725
726        if let Some(build_path) = largest_build_dir {
727            let name = self.extract_python_project_name(path, errors);
728
729            let build_arts = BuildArtifacts {
730                path: build_path,
731                size: 0, // Will be calculated later
732            };
733
734            return Some(Project::new(
735                ProjectType::Python,
736                path.to_path_buf(),
737                build_arts,
738                name,
739            ));
740        }
741
742        None
743    }
744
745    /// Detect a Go project in the specified directory.
746    ///
747    /// This method checks for the presence of both `go.mod` and `vendor/`
748    /// directory to identify a Go project. If found, it attempts to extract
749    /// the project name from the `go.mod` file.
750    ///
751    /// # Arguments
752    ///
753    /// * `path` - Directory path to check for a Go project
754    /// * `errors` - Shared error collection for reporting parsing issues
755    ///
756    /// # Returns
757    ///
758    /// - `Some(Project)` if a valid Go project is detected
759    /// - `None` if the directory doesn't contain a Go project
760    ///
761    /// # Detection Criteria
762    ///
763    /// 1. `go.mod` file exists in directory
764    /// 2. `vendor/` subdirectory exists in directory
765    /// 3. The project name is extracted from `go.mod` if possible
766    fn detect_go_project(&self, path: &Path, errors: &Arc<Mutex<Vec<String>>>) -> Option<Project> {
767        let go_mod = path.join("go.mod");
768        let vendor_dir = path.join("vendor");
769
770        if go_mod.exists() && vendor_dir.exists() {
771            let name = self.extract_go_project_name(&go_mod, errors);
772
773            let build_arts = BuildArtifacts {
774                path: path.join("vendor"),
775                size: 0, // Will be calculated later
776            };
777
778            return Some(Project::new(
779                ProjectType::Go,
780                path.to_path_buf(),
781                build_arts,
782                name,
783            ));
784        }
785
786        None
787    }
788
789    /// Extract the project name from a Python project directory.
790    ///
791    /// This method attempts to extract the project name from various Python
792    /// configuration files in order of preference.
793    ///
794    /// # Arguments
795    ///
796    /// * `path` - Path to the Python project directory
797    /// * `errors` - Shared error collection for reporting parsing issues
798    ///
799    /// # Returns
800    ///
801    /// - `Some(String)` containing the project name if successfully extracted
802    /// - `None` if the name cannot be found or parsed
803    ///
804    /// # Extraction Order
805    ///
806    /// 1. pyproject.toml (from [project] name or [tool.poetry] name)
807    /// 2. setup.py (from name= parameter)
808    /// 3. setup.cfg (from [metadata] name)
809    /// 4. Use directory name as a fallback
810    fn extract_python_project_name(
811        &self,
812        path: &Path,
813        errors: &Arc<Mutex<Vec<String>>>,
814    ) -> Option<String> {
815        // Try files in order of preference
816        self.try_extract_from_pyproject_toml(path, errors)
817            .or_else(|| self.try_extract_from_setup_py(path, errors))
818            .or_else(|| self.try_extract_from_setup_cfg(path, errors))
819            .or_else(|| Self::fallback_to_directory_name(path))
820    }
821
822    /// Try to extract project name from pyproject.toml
823    fn try_extract_from_pyproject_toml(
824        &self,
825        path: &Path,
826        errors: &Arc<Mutex<Vec<String>>>,
827    ) -> Option<String> {
828        let pyproject_toml = path.join("pyproject.toml");
829        if !pyproject_toml.exists() {
830            return None;
831        }
832
833        let content = self.read_file_content(&pyproject_toml, errors)?;
834        Self::extract_name_from_toml_like_content(&content)
835    }
836
837    /// Try to extract project name from setup.py
838    fn try_extract_from_setup_py(
839        &self,
840        path: &Path,
841        errors: &Arc<Mutex<Vec<String>>>,
842    ) -> Option<String> {
843        let setup_py = path.join("setup.py");
844        if !setup_py.exists() {
845            return None;
846        }
847
848        let content = self.read_file_content(&setup_py, errors)?;
849        Self::extract_name_from_python_content(&content)
850    }
851
852    /// Try to extract project name from setup.cfg
853    fn try_extract_from_setup_cfg(
854        &self,
855        path: &Path,
856        errors: &Arc<Mutex<Vec<String>>>,
857    ) -> Option<String> {
858        let setup_cfg = path.join("setup.cfg");
859        if !setup_cfg.exists() {
860            return None;
861        }
862
863        let content = self.read_file_content(&setup_cfg, errors)?;
864        Self::extract_name_from_cfg_content(&content)
865    }
866
867    /// Extract name from TOML-like content (pyproject.toml)
868    fn extract_name_from_toml_like_content(content: &str) -> Option<String> {
869        content
870            .lines()
871            .map(str::trim)
872            .find(|line| line.starts_with("name") && line.contains('='))
873            .and_then(Self::extract_quoted_value)
874    }
875
876    /// Extract name from Python content (setup.py)
877    fn extract_name_from_python_content(content: &str) -> Option<String> {
878        content
879            .lines()
880            .map(str::trim)
881            .find(|line| line.contains("name") && line.contains('='))
882            .and_then(Self::extract_quoted_value)
883    }
884
885    /// Extract name from INI-style configuration content (setup.cfg)
886    fn extract_name_from_cfg_content(content: &str) -> Option<String> {
887        let mut in_metadata_section = false;
888
889        for line in content.lines() {
890            let line = line.trim();
891
892            if line == "[metadata]" {
893                in_metadata_section = true;
894            } else if line.starts_with('[') && line.ends_with(']') {
895                in_metadata_section = false;
896            } else if in_metadata_section && line.starts_with("name") && line.contains('=') {
897                return line.split('=').nth(1).map(|name| name.trim().to_string());
898            }
899        }
900
901        None
902    }
903
904    /// Fallback to directory name
905    fn fallback_to_directory_name(path: &Path) -> Option<String> {
906        path.file_name()
907            .and_then(|name| name.to_str())
908            .map(std::string::ToString::to_string)
909    }
910
911    /// Extract the project name from a `go.mod` file.
912    ///
913    /// This method parses a Go project's `go.mod` file to extract
914    /// the module name, which typically represents the project.
915    ///
916    /// # Arguments
917    ///
918    /// * `go_mod` - Path to the `go.mod` file
919    /// * `errors` - Shared error collection for reporting parsing issues
920    ///
921    /// # Returns
922    ///
923    /// - `Some(String)` containing the module name if successfully extracted
924    /// - `None` if the name cannot be found or parsed
925    ///
926    /// # Parsing Strategy
927    ///
928    /// The method looks for the first line starting with `module ` and extracts
929    /// the module path. For better display, it takes the last component of the path.
930    fn extract_go_project_name(
931        &self,
932        go_mod: &Path,
933        errors: &Arc<Mutex<Vec<String>>>,
934    ) -> Option<String> {
935        let content = self.read_file_content(go_mod, errors)?;
936
937        for line in content.lines() {
938            let line = line.trim();
939            if line.starts_with("module ") {
940                let module_path = line.strip_prefix("module ")?.trim();
941
942                // Take the last component of the module path for a cleaner name
943                if let Some(name) = module_path.split('/').next_back() {
944                    return Some(name.to_string());
945                }
946
947                return Some(module_path.to_string());
948            }
949        }
950
951        None
952    }
953
954    /// Detect a Java/Kotlin project in the specified directory.
955    ///
956    /// This method checks for Maven (`pom.xml`) or Gradle (`build.gradle`,
957    /// `build.gradle.kts`) configuration files and their associated build output
958    /// directories (`target/` for Maven, `build/` for Gradle).
959    ///
960    /// # Detection Criteria
961    ///
962    /// 1. `pom.xml` + `target/` directory (Maven)
963    /// 2. `build.gradle` or `build.gradle.kts` + `build/` directory (Gradle)
964    fn detect_java_project(
965        &self,
966        path: &Path,
967        errors: &Arc<Mutex<Vec<String>>>,
968    ) -> Option<Project> {
969        let pom_xml = path.join("pom.xml");
970        let target_dir = path.join("target");
971
972        // Maven project: pom.xml + target/
973        if pom_xml.exists() && target_dir.exists() {
974            let name = self.extract_java_maven_project_name(&pom_xml, errors);
975
976            let build_arts = BuildArtifacts {
977                path: target_dir,
978                size: 0,
979            };
980
981            return Some(Project::new(
982                ProjectType::Java,
983                path.to_path_buf(),
984                build_arts,
985                name,
986            ));
987        }
988
989        // Gradle project: build.gradle(.kts) + build/
990        let has_gradle =
991            path.join("build.gradle").exists() || path.join("build.gradle.kts").exists();
992        let build_dir = path.join("build");
993
994        if has_gradle && build_dir.exists() {
995            let name = self.extract_java_gradle_project_name(path, errors);
996
997            let build_arts = BuildArtifacts {
998                path: build_dir,
999                size: 0,
1000            };
1001
1002            return Some(Project::new(
1003                ProjectType::Java,
1004                path.to_path_buf(),
1005                build_arts,
1006                name,
1007            ));
1008        }
1009
1010        None
1011    }
1012
1013    /// Extract the project name from a Maven `pom.xml` file.
1014    ///
1015    /// Looks for `<artifactId>` tags and extracts the text content.
1016    fn extract_java_maven_project_name(
1017        &self,
1018        pom_xml: &Path,
1019        errors: &Arc<Mutex<Vec<String>>>,
1020    ) -> Option<String> {
1021        let content = self.read_file_content(pom_xml, errors)?;
1022
1023        for line in content.lines() {
1024            let trimmed = line.trim();
1025            if trimmed.starts_with("<artifactId>") && trimmed.ends_with("</artifactId>") {
1026                let name = trimmed
1027                    .strip_prefix("<artifactId>")?
1028                    .strip_suffix("</artifactId>")?;
1029                return Some(name.to_string());
1030            }
1031        }
1032
1033        None
1034    }
1035
1036    /// Extract the project name from a Gradle project.
1037    ///
1038    /// Looks for `settings.gradle` or `settings.gradle.kts` and extracts
1039    /// the `rootProject.name` value. Falls back to directory name.
1040    fn extract_java_gradle_project_name(
1041        &self,
1042        path: &Path,
1043        errors: &Arc<Mutex<Vec<String>>>,
1044    ) -> Option<String> {
1045        for settings_file in &["settings.gradle", "settings.gradle.kts"] {
1046            let settings_path = path.join(settings_file);
1047            if settings_path.exists()
1048                && let Some(content) = self.read_file_content(&settings_path, errors)
1049            {
1050                for line in content.lines() {
1051                    let trimmed = line.trim();
1052                    if trimmed.contains("rootProject.name") && trimmed.contains('=') {
1053                        return Self::extract_quoted_value(trimmed).or_else(|| {
1054                            trimmed
1055                                .split('=')
1056                                .nth(1)
1057                                .map(|s| s.trim().trim_matches('\'').to_string())
1058                        });
1059                    }
1060                }
1061            }
1062        }
1063
1064        Self::fallback_to_directory_name(path)
1065    }
1066
1067    /// Detect a C/C++ project in the specified directory.
1068    ///
1069    /// This method checks for `CMakeLists.txt` or `Makefile` alongside a `build/`
1070    /// directory to identify C/C++ projects.
1071    ///
1072    /// # Detection Criteria
1073    ///
1074    /// 1. `CMakeLists.txt` + `build/` directory (`CMake`)
1075    /// 2. `Makefile` + `build/` directory (`Make`)
1076    fn detect_cpp_project(&self, path: &Path, errors: &Arc<Mutex<Vec<String>>>) -> Option<Project> {
1077        let build_dir = path.join("build");
1078
1079        if !build_dir.exists() {
1080            return None;
1081        }
1082
1083        let cmake_file = path.join("CMakeLists.txt");
1084        let makefile = path.join("Makefile");
1085
1086        if cmake_file.exists() || makefile.exists() {
1087            let name = if cmake_file.exists() {
1088                self.extract_cpp_cmake_project_name(&cmake_file, errors)
1089            } else {
1090                Self::fallback_to_directory_name(path)
1091            };
1092
1093            let build_arts = BuildArtifacts {
1094                path: build_dir,
1095                size: 0,
1096            };
1097
1098            return Some(Project::new(
1099                ProjectType::Cpp,
1100                path.to_path_buf(),
1101                build_arts,
1102                name,
1103            ));
1104        }
1105
1106        None
1107    }
1108
1109    /// Extract the project name from a `CMakeLists.txt` file.
1110    ///
1111    /// Looks for `project(name` patterns and extracts the project name.
1112    fn extract_cpp_cmake_project_name(
1113        &self,
1114        cmake_file: &Path,
1115        errors: &Arc<Mutex<Vec<String>>>,
1116    ) -> Option<String> {
1117        let content = self.read_file_content(cmake_file, errors)?;
1118
1119        for line in content.lines() {
1120            let trimmed = line.trim();
1121            if trimmed.starts_with("project(") || trimmed.starts_with("PROJECT(") {
1122                let inner = trimmed
1123                    .trim_start_matches("project(")
1124                    .trim_start_matches("PROJECT(")
1125                    .trim_end_matches(')')
1126                    .trim();
1127
1128                // The project name is the first word/token
1129                let name = inner.split_whitespace().next()?;
1130                // Remove possible surrounding quotes
1131                let name = name.trim_matches('"').trim_matches('\'');
1132                if !name.is_empty() {
1133                    return Some(name.to_string());
1134                }
1135            }
1136        }
1137
1138        Self::fallback_to_directory_name(cmake_file.parent()?)
1139    }
1140
1141    /// Detect a Swift project in the specified directory.
1142    ///
1143    /// This method checks for a `Package.swift` manifest and the `.build/`
1144    /// directory to identify Swift Package Manager projects.
1145    ///
1146    /// # Detection Criteria
1147    ///
1148    /// 1. `Package.swift` file exists
1149    /// 2. `.build/` directory exists
1150    fn detect_swift_project(
1151        &self,
1152        path: &Path,
1153        errors: &Arc<Mutex<Vec<String>>>,
1154    ) -> Option<Project> {
1155        let package_swift = path.join("Package.swift");
1156        let build_dir = path.join(".build");
1157
1158        if package_swift.exists() && build_dir.exists() {
1159            let name = self.extract_swift_project_name(&package_swift, errors);
1160
1161            let build_arts = BuildArtifacts {
1162                path: build_dir,
1163                size: 0,
1164            };
1165
1166            return Some(Project::new(
1167                ProjectType::Swift,
1168                path.to_path_buf(),
1169                build_arts,
1170                name,
1171            ));
1172        }
1173
1174        None
1175    }
1176
1177    /// Extract the project name from a `Package.swift` file.
1178    ///
1179    /// Looks for `name:` inside the `Package(` initializer.
1180    fn extract_swift_project_name(
1181        &self,
1182        package_swift: &Path,
1183        errors: &Arc<Mutex<Vec<String>>>,
1184    ) -> Option<String> {
1185        let content = self.read_file_content(package_swift, errors)?;
1186
1187        for line in content.lines() {
1188            let trimmed = line.trim();
1189            if trimmed.contains("name:") {
1190                return Self::extract_quoted_value(trimmed);
1191            }
1192        }
1193
1194        Self::fallback_to_directory_name(package_swift.parent()?)
1195    }
1196
1197    /// Detect a .NET/C# project in the specified directory.
1198    ///
1199    /// This method checks for `.csproj` files alongside `bin/` and/or `obj/`
1200    /// directories to identify .NET projects.
1201    ///
1202    /// # Detection Criteria
1203    ///
1204    /// 1. At least one `.csproj` file exists in the directory
1205    /// 2. At least one of `bin/` or `obj/` directories exists
1206    fn detect_dotnet_project(path: &Path) -> Option<Project> {
1207        let bin_dir = path.join("bin");
1208        let obj_dir = path.join("obj");
1209
1210        let has_build_dir = bin_dir.exists() || obj_dir.exists();
1211        if !has_build_dir {
1212            return None;
1213        }
1214
1215        let csproj_file = Self::find_file_with_extension(path, "csproj")?;
1216
1217        // Pick the larger of bin/ and obj/ as the primary build artifact
1218        let build_path = match (bin_dir.exists(), obj_dir.exists()) {
1219            (true, true) => {
1220                let bin_size = Self::calculate_directory_size(&bin_dir).unwrap_or(0);
1221                let obj_size = Self::calculate_directory_size(&obj_dir).unwrap_or(0);
1222                if obj_size >= bin_size {
1223                    obj_dir
1224                } else {
1225                    bin_dir
1226                }
1227            }
1228            (true, false) => bin_dir,
1229            (false, true) => obj_dir,
1230            (false, false) => return None,
1231        };
1232
1233        let name = csproj_file
1234            .file_stem()
1235            .and_then(|s| s.to_str())
1236            .map(std::string::ToString::to_string);
1237
1238        let build_arts = BuildArtifacts {
1239            path: build_path,
1240            size: 0,
1241        };
1242
1243        Some(Project::new(
1244            ProjectType::DotNet,
1245            path.to_path_buf(),
1246            build_arts,
1247            name,
1248        ))
1249    }
1250
1251    /// Find the first file with a given extension in a directory.
1252    fn find_file_with_extension(dir: &Path, extension: &str) -> Option<std::path::PathBuf> {
1253        let entries = fs::read_dir(dir).ok()?;
1254        for entry in entries.flatten() {
1255            let path = entry.path();
1256            if path.is_file() && path.extension().and_then(|e| e.to_str()) == Some(extension) {
1257                return Some(path);
1258            }
1259        }
1260        None
1261    }
1262
1263    /// Calculate the size of a directory recursively.
1264    ///
1265    /// This is a helper method used for Python and .NET projects to determine which
1266    /// cache directory is the largest and should be the primary cleanup target.
1267    fn calculate_directory_size(dir_path: &Path) -> std::io::Result<u64> {
1268        let mut total_size = 0;
1269
1270        for entry in fs::read_dir(dir_path)? {
1271            let entry = entry?;
1272            let path = entry.path();
1273            if path.is_dir() {
1274                total_size += Self::calculate_directory_size(&path).unwrap_or(0);
1275            } else {
1276                total_size += entry.metadata()?.len();
1277            }
1278        }
1279
1280        Ok(total_size)
1281    }
1282}
1283
1284#[cfg(test)]
1285mod tests {
1286    use super::*;
1287    use std::path::PathBuf;
1288    use tempfile::TempDir;
1289
1290    /// Create a scanner with default options and the given filter.
1291    fn default_scanner(filter: ProjectFilter) -> Scanner {
1292        Scanner::new(
1293            ScanOptions {
1294                verbose: false,
1295                threads: 1,
1296                skip: vec![],
1297            },
1298            filter,
1299        )
1300    }
1301
1302    /// Helper to create a file with content, ensuring parent dirs exist.
1303    fn create_file(path: &Path, content: &str) {
1304        if let Some(parent) = path.parent() {
1305            fs::create_dir_all(parent).unwrap();
1306        }
1307        fs::write(path, content).unwrap();
1308    }
1309
1310    // ── Static helper method tests ──────────────────────────────────────
1311
1312    #[test]
1313    fn test_is_hidden_directory_to_skip() {
1314        // Hidden directories should be skipped
1315        assert!(Scanner::is_hidden_directory_to_skip(Path::new(
1316            "/some/.hidden"
1317        )));
1318        assert!(Scanner::is_hidden_directory_to_skip(Path::new(
1319            "/some/.git"
1320        )));
1321        assert!(Scanner::is_hidden_directory_to_skip(Path::new(
1322            "/some/.svn"
1323        )));
1324        assert!(Scanner::is_hidden_directory_to_skip(Path::new(".env")));
1325
1326        // .cargo is the special exception — should NOT be skipped
1327        assert!(!Scanner::is_hidden_directory_to_skip(Path::new(
1328            "/home/user/.cargo"
1329        )));
1330        assert!(!Scanner::is_hidden_directory_to_skip(Path::new(".cargo")));
1331
1332        // Non-hidden directories should not be skipped
1333        assert!(!Scanner::is_hidden_directory_to_skip(Path::new(
1334            "/some/visible"
1335        )));
1336        assert!(!Scanner::is_hidden_directory_to_skip(Path::new("src")));
1337    }
1338
1339    #[test]
1340    fn test_is_excluded_directory() {
1341        // Build/artifact directories should be excluded
1342        assert!(Scanner::is_excluded_directory(Path::new("/some/target")));
1343        assert!(Scanner::is_excluded_directory(Path::new(
1344            "/some/node_modules"
1345        )));
1346        assert!(Scanner::is_excluded_directory(Path::new(
1347            "/some/__pycache__"
1348        )));
1349        assert!(Scanner::is_excluded_directory(Path::new("/some/vendor")));
1350        assert!(Scanner::is_excluded_directory(Path::new("/some/build")));
1351        assert!(Scanner::is_excluded_directory(Path::new("/some/dist")));
1352        assert!(Scanner::is_excluded_directory(Path::new("/some/out")));
1353
1354        // VCS directories should be excluded
1355        assert!(Scanner::is_excluded_directory(Path::new("/some/.git")));
1356        assert!(Scanner::is_excluded_directory(Path::new("/some/.svn")));
1357        assert!(Scanner::is_excluded_directory(Path::new("/some/.hg")));
1358
1359        // Python-specific directories
1360        assert!(Scanner::is_excluded_directory(Path::new(
1361            "/some/.pytest_cache"
1362        )));
1363        assert!(Scanner::is_excluded_directory(Path::new("/some/.tox")));
1364        assert!(Scanner::is_excluded_directory(Path::new("/some/.eggs")));
1365        assert!(Scanner::is_excluded_directory(Path::new("/some/.coverage")));
1366
1367        // Virtual environments
1368        assert!(Scanner::is_excluded_directory(Path::new("/some/venv")));
1369        assert!(Scanner::is_excluded_directory(Path::new("/some/.venv")));
1370        assert!(Scanner::is_excluded_directory(Path::new("/some/env")));
1371        assert!(Scanner::is_excluded_directory(Path::new("/some/.env")));
1372
1373        // Temp directories
1374        assert!(Scanner::is_excluded_directory(Path::new("/some/temp")));
1375        assert!(Scanner::is_excluded_directory(Path::new("/some/tmp")));
1376
1377        // Non-excluded directories
1378        assert!(!Scanner::is_excluded_directory(Path::new("/some/src")));
1379        assert!(!Scanner::is_excluded_directory(Path::new("/some/lib")));
1380        assert!(!Scanner::is_excluded_directory(Path::new("/some/app")));
1381        assert!(!Scanner::is_excluded_directory(Path::new("/some/tests")));
1382    }
1383
1384    #[test]
1385    fn test_extract_quoted_value() {
1386        assert_eq!(
1387            Scanner::extract_quoted_value(r#"name = "my-project""#),
1388            Some("my-project".to_string())
1389        );
1390        assert_eq!(
1391            Scanner::extract_quoted_value(r#"name = "with spaces""#),
1392            Some("with spaces".to_string())
1393        );
1394        assert_eq!(Scanner::extract_quoted_value("no quotes here"), None);
1395        // Single quote mark is not a pair
1396        assert_eq!(Scanner::extract_quoted_value(r#"only "one"#), None);
1397    }
1398
1399    #[test]
1400    fn test_is_name_line() {
1401        assert!(Scanner::is_name_line("name = \"test\""));
1402        assert!(Scanner::is_name_line("name=\"test\""));
1403        assert!(!Scanner::is_name_line("version = \"1.0\""));
1404        assert!(!Scanner::is_name_line("# name = \"commented\""));
1405        assert!(!Scanner::is_name_line("name: \"yaml style\""));
1406    }
1407
1408    #[test]
1409    fn test_parse_toml_name_field() {
1410        let content = "[package]\nname = \"test-project\"\nversion = \"0.1.0\"\n";
1411        assert_eq!(
1412            Scanner::parse_toml_name_field(content),
1413            Some("test-project".to_string())
1414        );
1415
1416        let no_name = "[package]\nversion = \"0.1.0\"\n";
1417        assert_eq!(Scanner::parse_toml_name_field(no_name), None);
1418
1419        let empty = "";
1420        assert_eq!(Scanner::parse_toml_name_field(empty), None);
1421    }
1422
1423    #[test]
1424    fn test_extract_name_from_cfg_content() {
1425        let content = "[metadata]\nname = my-package\nversion = 1.0\n";
1426        assert_eq!(
1427            Scanner::extract_name_from_cfg_content(content),
1428            Some("my-package".to_string())
1429        );
1430
1431        // Name in wrong section should not be found
1432        let wrong_section = "[options]\nname = not-this\n";
1433        assert_eq!(Scanner::extract_name_from_cfg_content(wrong_section), None);
1434
1435        // Multiple sections — name must be in [metadata]
1436        let multi = "[options]\nkey = val\n\n[metadata]\nname = correct\n\n[other]\nname = wrong\n";
1437        assert_eq!(
1438            Scanner::extract_name_from_cfg_content(multi),
1439            Some("correct".to_string())
1440        );
1441    }
1442
1443    #[test]
1444    fn test_extract_name_from_python_content() {
1445        let content = "from setuptools import setup\nsetup(\n    name=\"my-pkg\",\n)\n";
1446        assert_eq!(
1447            Scanner::extract_name_from_python_content(content),
1448            Some("my-pkg".to_string())
1449        );
1450
1451        let no_name = "from setuptools import setup\nsetup(version=\"1.0\")\n";
1452        assert_eq!(Scanner::extract_name_from_python_content(no_name), None);
1453    }
1454
1455    #[test]
1456    fn test_fallback_to_directory_name() {
1457        assert_eq!(
1458            Scanner::fallback_to_directory_name(Path::new("/some/project-name")),
1459            Some("project-name".to_string())
1460        );
1461        assert_eq!(
1462            Scanner::fallback_to_directory_name(Path::new("/some/my_app")),
1463            Some("my_app".to_string())
1464        );
1465    }
1466
1467    #[test]
1468    fn test_is_path_in_skip_list() {
1469        let scanner = Scanner::new(
1470            ScanOptions {
1471                verbose: false,
1472                threads: 1,
1473                skip: vec![PathBuf::from("skip-me"), PathBuf::from("also-skip")],
1474            },
1475            ProjectFilter::All,
1476        );
1477
1478        assert!(scanner.is_path_in_skip_list(Path::new("/root/skip-me/project")));
1479        assert!(scanner.is_path_in_skip_list(Path::new("/root/also-skip")));
1480        assert!(!scanner.is_path_in_skip_list(Path::new("/root/keep-me")));
1481        assert!(!scanner.is_path_in_skip_list(Path::new("/root/src")));
1482    }
1483
1484    #[test]
1485    fn test_is_path_in_empty_skip_list() {
1486        let scanner = default_scanner(ProjectFilter::All);
1487        assert!(!scanner.is_path_in_skip_list(Path::new("/any/path")));
1488    }
1489
1490    // ── Scanning with special path characters ───────────────────────────
1491
1492    #[test]
1493    fn test_scan_directory_with_spaces_in_path() {
1494        let tmp = TempDir::new().unwrap();
1495        let base = tmp.path().join("path with spaces");
1496        fs::create_dir_all(&base).unwrap();
1497
1498        let project = base.join("my project");
1499        create_file(
1500            &project.join("Cargo.toml"),
1501            "[package]\nname = \"spaced\"\nversion = \"0.1.0\"",
1502        );
1503        create_file(&project.join("target/dummy"), "content");
1504
1505        let scanner = default_scanner(ProjectFilter::Rust);
1506        let projects = scanner.scan_directory(&base);
1507        assert_eq!(projects.len(), 1);
1508        assert_eq!(projects[0].name.as_deref(), Some("spaced"));
1509    }
1510
1511    #[test]
1512    fn test_scan_directory_with_unicode_names() {
1513        let tmp = TempDir::new().unwrap();
1514        let base = tmp.path();
1515
1516        let project = base.join("プロジェクト");
1517        create_file(
1518            &project.join("package.json"),
1519            r#"{"name": "unicode-project"}"#,
1520        );
1521        create_file(&project.join("node_modules/dep.js"), "module.exports = {};");
1522
1523        let scanner = default_scanner(ProjectFilter::Node);
1524        let projects = scanner.scan_directory(base);
1525        assert_eq!(projects.len(), 1);
1526        assert_eq!(projects[0].name.as_deref(), Some("unicode-project"));
1527    }
1528
1529    #[test]
1530    fn test_scan_directory_with_special_characters_in_name() {
1531        let tmp = TempDir::new().unwrap();
1532        let base = tmp.path();
1533
1534        let project = base.join("project-with-dashes_and_underscores.v2");
1535        create_file(
1536            &project.join("Cargo.toml"),
1537            "[package]\nname = \"special-chars\"\nversion = \"0.1.0\"",
1538        );
1539        create_file(&project.join("target/dummy"), "content");
1540
1541        let scanner = default_scanner(ProjectFilter::Rust);
1542        let projects = scanner.scan_directory(base);
1543        assert_eq!(projects.len(), 1);
1544        assert_eq!(projects[0].name.as_deref(), Some("special-chars"));
1545    }
1546
1547    // ── Unix-specific scanning tests ────────────────────────────────────
1548
1549    #[test]
1550    #[cfg(unix)]
1551    fn test_hidden_directory_itself_not_detected_as_project_unix() {
1552        let tmp = TempDir::new().unwrap();
1553        let base = tmp.path();
1554
1555        // A hidden directory with Cargo.toml + target/ directly inside it
1556        // should NOT be detected because the .hidden entry is filtered by
1557        // is_hidden_directory_to_skip. However, non-hidden children inside
1558        // hidden dirs CAN still be found because WalkDir descends into them.
1559        let hidden = base.join(".hidden-project");
1560        create_file(
1561            &hidden.join("Cargo.toml"),
1562            "[package]\nname = \"hidden\"\nversion = \"0.1.0\"",
1563        );
1564        create_file(&hidden.join("target/dummy"), "content");
1565
1566        // A visible project should be found
1567        let visible = base.join("visible-project");
1568        create_file(
1569            &visible.join("Cargo.toml"),
1570            "[package]\nname = \"visible\"\nversion = \"0.1.0\"",
1571        );
1572        create_file(&visible.join("target/dummy"), "content");
1573
1574        let scanner = default_scanner(ProjectFilter::Rust);
1575        let projects = scanner.scan_directory(base);
1576
1577        // Only the visible project should be found; the hidden one is excluded
1578        // because its directory name starts with '.'
1579        assert_eq!(projects.len(), 1);
1580        assert_eq!(projects[0].name.as_deref(), Some("visible"));
1581    }
1582
1583    #[test]
1584    #[cfg(unix)]
1585    fn test_projects_inside_hidden_dirs_are_still_traversed_unix() {
1586        let tmp = TempDir::new().unwrap();
1587        let base = tmp.path();
1588
1589        // A non-hidden project nested inside a hidden directory.
1590        // WalkDir still descends into .hidden, so the child project IS found.
1591        let nested = base.join(".hidden-parent/visible-child");
1592        create_file(
1593            &nested.join("Cargo.toml"),
1594            "[package]\nname = \"nested\"\nversion = \"0.1.0\"",
1595        );
1596        create_file(&nested.join("target/dummy"), "content");
1597
1598        let scanner = default_scanner(ProjectFilter::Rust);
1599        let projects = scanner.scan_directory(base);
1600
1601        // The child project has a non-hidden name, so it IS detected
1602        assert_eq!(projects.len(), 1);
1603        assert_eq!(projects[0].name.as_deref(), Some("nested"));
1604    }
1605
1606    #[test]
1607    #[cfg(unix)]
1608    fn test_dotcargo_directory_not_skipped_unix() {
1609        // .cargo is the exception — hidden but should NOT be skipped.
1610        // Verify via the static method.
1611        assert!(!Scanner::is_hidden_directory_to_skip(Path::new(
1612            "/home/user/.cargo"
1613        )));
1614
1615        // Other dot-dirs ARE skipped
1616        assert!(Scanner::is_hidden_directory_to_skip(Path::new(
1617            "/home/user/.local"
1618        )));
1619        assert!(Scanner::is_hidden_directory_to_skip(Path::new(
1620            "/home/user/.npm"
1621        )));
1622    }
1623
1624    // ── Python project detection tests ──────────────────────────────────
1625
1626    #[test]
1627    fn test_detect_python_with_pyproject_toml() {
1628        let tmp = TempDir::new().unwrap();
1629        let base = tmp.path();
1630
1631        let project = base.join("py-project");
1632        create_file(
1633            &project.join("pyproject.toml"),
1634            "[project]\nname = \"my-py-lib\"\nversion = \"1.0.0\"\n",
1635        );
1636        let pycache = project.join("__pycache__");
1637        fs::create_dir_all(&pycache).unwrap();
1638        create_file(&pycache.join("module.pyc"), "bytecode");
1639
1640        let scanner = default_scanner(ProjectFilter::Python);
1641        let projects = scanner.scan_directory(base);
1642        assert_eq!(projects.len(), 1);
1643        assert_eq!(projects[0].kind, ProjectType::Python);
1644    }
1645
1646    #[test]
1647    fn test_detect_python_with_setup_py() {
1648        let tmp = TempDir::new().unwrap();
1649        let base = tmp.path();
1650
1651        let project = base.join("setup-project");
1652        create_file(
1653            &project.join("setup.py"),
1654            "from setuptools import setup\nsetup(name=\"setup-lib\")\n",
1655        );
1656        let pycache = project.join("__pycache__");
1657        fs::create_dir_all(&pycache).unwrap();
1658        create_file(&pycache.join("module.pyc"), "bytecode");
1659
1660        let scanner = default_scanner(ProjectFilter::Python);
1661        let projects = scanner.scan_directory(base);
1662        assert_eq!(projects.len(), 1);
1663    }
1664
1665    #[test]
1666    fn test_detect_python_with_pipfile() {
1667        let tmp = TempDir::new().unwrap();
1668        let base = tmp.path();
1669
1670        let project = base.join("pipenv-project");
1671        create_file(
1672            &project.join("Pipfile"),
1673            "[[source]]\nurl = \"https://pypi.org/simple\"",
1674        );
1675        let pycache = project.join("__pycache__");
1676        fs::create_dir_all(&pycache).unwrap();
1677        create_file(&pycache.join("module.pyc"), "bytecode");
1678
1679        let scanner = default_scanner(ProjectFilter::Python);
1680        let projects = scanner.scan_directory(base);
1681        assert_eq!(projects.len(), 1);
1682    }
1683
1684    // ── Go project detection tests ──────────────────────────────────────
1685
1686    #[test]
1687    fn test_detect_go_extracts_module_name() {
1688        let tmp = TempDir::new().unwrap();
1689        let base = tmp.path();
1690
1691        let project = base.join("go-service");
1692        create_file(
1693            &project.join("go.mod"),
1694            "module github.com/user/my-service\n\ngo 1.21\n",
1695        );
1696        let vendor = project.join("vendor");
1697        fs::create_dir_all(&vendor).unwrap();
1698        create_file(&vendor.join("modules.txt"), "vendor manifest");
1699
1700        let scanner = default_scanner(ProjectFilter::Go);
1701        let projects = scanner.scan_directory(base);
1702        assert_eq!(projects.len(), 1);
1703        // Should extract last path component as name
1704        assert_eq!(projects[0].name.as_deref(), Some("my-service"));
1705    }
1706
1707    // ── Java/Kotlin project detection tests ────────────────────────────
1708
1709    #[test]
1710    fn test_detect_java_maven_project() {
1711        let tmp = TempDir::new().unwrap();
1712        let base = tmp.path();
1713
1714        let project = base.join("java-maven");
1715        create_file(
1716            &project.join("pom.xml"),
1717            "<project>\n  <artifactId>my-java-app</artifactId>\n</project>",
1718        );
1719        create_file(&project.join("target/classes/Main.class"), "bytecode");
1720
1721        let scanner = default_scanner(ProjectFilter::Java);
1722        let projects = scanner.scan_directory(base);
1723        assert_eq!(projects.len(), 1);
1724        assert_eq!(projects[0].kind, ProjectType::Java);
1725        assert_eq!(projects[0].name.as_deref(), Some("my-java-app"));
1726    }
1727
1728    #[test]
1729    fn test_detect_java_gradle_project() {
1730        let tmp = TempDir::new().unwrap();
1731        let base = tmp.path();
1732
1733        let project = base.join("java-gradle");
1734        create_file(&project.join("build.gradle"), "apply plugin: 'java'");
1735        create_file(
1736            &project.join("settings.gradle"),
1737            "rootProject.name = \"my-gradle-app\"",
1738        );
1739        create_file(&project.join("build/classes/main/Main.class"), "bytecode");
1740
1741        let scanner = default_scanner(ProjectFilter::Java);
1742        let projects = scanner.scan_directory(base);
1743        assert_eq!(projects.len(), 1);
1744        assert_eq!(projects[0].kind, ProjectType::Java);
1745        assert_eq!(projects[0].name.as_deref(), Some("my-gradle-app"));
1746    }
1747
1748    #[test]
1749    fn test_detect_java_gradle_kts_project() {
1750        let tmp = TempDir::new().unwrap();
1751        let base = tmp.path();
1752
1753        let project = base.join("kotlin-gradle");
1754        create_file(
1755            &project.join("build.gradle.kts"),
1756            "plugins { kotlin(\"jvm\") }",
1757        );
1758        create_file(
1759            &project.join("settings.gradle.kts"),
1760            "rootProject.name = \"my-kotlin-app\"",
1761        );
1762        create_file(
1763            &project.join("build/classes/kotlin/main/MainKt.class"),
1764            "bytecode",
1765        );
1766
1767        let scanner = default_scanner(ProjectFilter::Java);
1768        let projects = scanner.scan_directory(base);
1769        assert_eq!(projects.len(), 1);
1770        assert_eq!(projects[0].kind, ProjectType::Java);
1771        assert_eq!(projects[0].name.as_deref(), Some("my-kotlin-app"));
1772    }
1773
1774    // ── C/C++ project detection tests ────────────────────────────────────
1775
1776    #[test]
1777    fn test_detect_cpp_cmake_project() {
1778        let tmp = TempDir::new().unwrap();
1779        let base = tmp.path();
1780
1781        let project = base.join("cpp-cmake");
1782        create_file(
1783            &project.join("CMakeLists.txt"),
1784            "project(my-cpp-lib)\ncmake_minimum_required(VERSION 3.10)",
1785        );
1786        create_file(&project.join("build/CMakeCache.txt"), "cache");
1787
1788        let scanner = default_scanner(ProjectFilter::Cpp);
1789        let projects = scanner.scan_directory(base);
1790        assert_eq!(projects.len(), 1);
1791        assert_eq!(projects[0].kind, ProjectType::Cpp);
1792        assert_eq!(projects[0].name.as_deref(), Some("my-cpp-lib"));
1793    }
1794
1795    #[test]
1796    fn test_detect_cpp_makefile_project() {
1797        let tmp = TempDir::new().unwrap();
1798        let base = tmp.path();
1799
1800        let project = base.join("cpp-make");
1801        create_file(&project.join("Makefile"), "all:\n\tg++ -o main main.cpp");
1802        create_file(&project.join("build/main.o"), "object");
1803
1804        let scanner = default_scanner(ProjectFilter::Cpp);
1805        let projects = scanner.scan_directory(base);
1806        assert_eq!(projects.len(), 1);
1807        assert_eq!(projects[0].kind, ProjectType::Cpp);
1808    }
1809
1810    // ── Swift project detection tests ────────────────────────────────────
1811
1812    #[test]
1813    fn test_detect_swift_project() {
1814        let tmp = TempDir::new().unwrap();
1815        let base = tmp.path();
1816
1817        let project = base.join("swift-pkg");
1818        create_file(
1819            &project.join("Package.swift"),
1820            "let package = Package(\n    name: \"my-swift-lib\",\n    targets: []\n)",
1821        );
1822        create_file(&project.join(".build/debug/my-swift-lib"), "binary");
1823
1824        let scanner = default_scanner(ProjectFilter::Swift);
1825        let projects = scanner.scan_directory(base);
1826        assert_eq!(projects.len(), 1);
1827        assert_eq!(projects[0].kind, ProjectType::Swift);
1828        assert_eq!(projects[0].name.as_deref(), Some("my-swift-lib"));
1829    }
1830
1831    // ── .NET/C# project detection tests ──────────────────────────────────
1832
1833    #[test]
1834    fn test_detect_dotnet_project() {
1835        let tmp = TempDir::new().unwrap();
1836        let base = tmp.path();
1837
1838        let project = base.join("dotnet-app");
1839        create_file(
1840            &project.join("MyApp.csproj"),
1841            "<Project Sdk=\"Microsoft.NET.Sdk\">\n</Project>",
1842        );
1843        create_file(&project.join("bin/Debug/net8.0/MyApp.dll"), "assembly");
1844        create_file(&project.join("obj/Debug/net8.0/MyApp.dll"), "intermediate");
1845
1846        let scanner = default_scanner(ProjectFilter::DotNet);
1847        let projects = scanner.scan_directory(base);
1848        assert_eq!(projects.len(), 1);
1849        assert_eq!(projects[0].kind, ProjectType::DotNet);
1850        assert_eq!(projects[0].name.as_deref(), Some("MyApp"));
1851    }
1852
1853    #[test]
1854    fn test_detect_dotnet_project_obj_only() {
1855        let tmp = TempDir::new().unwrap();
1856        let base = tmp.path();
1857
1858        let project = base.join("dotnet-obj-only");
1859        create_file(
1860            &project.join("Lib.csproj"),
1861            "<Project Sdk=\"Microsoft.NET.Sdk\">\n</Project>",
1862        );
1863        create_file(&project.join("obj/Debug/net8.0/Lib.dll"), "intermediate");
1864
1865        let scanner = default_scanner(ProjectFilter::DotNet);
1866        let projects = scanner.scan_directory(base);
1867        assert_eq!(projects.len(), 1);
1868        assert_eq!(projects[0].kind, ProjectType::DotNet);
1869        assert_eq!(projects[0].name.as_deref(), Some("Lib"));
1870    }
1871
1872    // ── Excluded directory tests ─────────────────────────────────────────
1873
1874    #[test]
1875    fn test_obj_directory_is_excluded() {
1876        assert!(Scanner::is_excluded_directory(Path::new("/some/obj")));
1877    }
1878
1879    // ── Cross-platform calculate_build_dir_size ─────────────────────────
1880
1881    #[test]
1882    fn test_calculate_build_dir_size_empty() {
1883        let tmp = TempDir::new().unwrap();
1884        let empty_dir = tmp.path().join("empty");
1885        fs::create_dir_all(&empty_dir).unwrap();
1886
1887        let scanner = default_scanner(ProjectFilter::All);
1888        assert_eq!(scanner.calculate_build_dir_size(&empty_dir), 0);
1889    }
1890
1891    #[test]
1892    fn test_calculate_build_dir_size_nonexistent() {
1893        let scanner = default_scanner(ProjectFilter::All);
1894        assert_eq!(
1895            scanner.calculate_build_dir_size(Path::new("/nonexistent/path")),
1896            0
1897        );
1898    }
1899
1900    #[test]
1901    fn test_calculate_build_dir_size_with_nested_files() {
1902        let tmp = TempDir::new().unwrap();
1903        let dir = tmp.path().join("nested");
1904
1905        create_file(&dir.join("file1.txt"), "hello"); // 5 bytes
1906        create_file(&dir.join("sub/file2.txt"), "world!"); // 6 bytes
1907        create_file(&dir.join("sub/deep/file3.txt"), "!"); // 1 byte
1908
1909        let scanner = default_scanner(ProjectFilter::All);
1910        let size = scanner.calculate_build_dir_size(&dir);
1911        assert_eq!(size, 12);
1912    }
1913
1914    // ── Quiet mode ──────────────────────────────────────────────────────
1915
1916    #[test]
1917    fn test_scanner_quiet_mode() {
1918        let tmp = TempDir::new().unwrap();
1919        let base = tmp.path();
1920
1921        let project = base.join("quiet-project");
1922        create_file(
1923            &project.join("Cargo.toml"),
1924            "[package]\nname = \"quiet\"\nversion = \"0.1.0\"",
1925        );
1926        create_file(&project.join("target/dummy"), "content");
1927
1928        let scanner = default_scanner(ProjectFilter::Rust).with_quiet(true);
1929        let projects = scanner.scan_directory(base);
1930        assert_eq!(projects.len(), 1);
1931    }
1932}