Skip to main content

clean_dev_dirs/
scanner.rs

1//! Directory scanning and project detection functionality.
2//!
3//! This module provides the core scanning logic that traverses directory trees
4//! to find development projects and their build artifacts. It supports parallel
5//! processing for improved performance and handles various error conditions
6//! gracefully.
7
8use std::{
9    fs,
10    path::Path,
11    sync::{Arc, Mutex},
12};
13
14use colored::Colorize;
15use indicatif::{ProgressBar, ProgressStyle};
16use rayon::prelude::*;
17use serde_json::{Value, from_str};
18use walkdir::{DirEntry, WalkDir};
19
20use crate::{
21    config::{ProjectFilter, ScanOptions},
22    project::{BuildArtifacts, Project, ProjectType},
23};
24
25/// Directory scanner for detecting development projects.
26///
27/// The `Scanner` struct encapsulates the logic for traversing directory trees
28/// and identifying development projects (Rust and Node.js) along with their
29/// build artifacts. It supports configurable filtering and parallel processing
30/// for efficient scanning of large directory structures.
31pub struct Scanner {
32    /// Configuration options for scanning behavior
33    scan_options: ScanOptions,
34
35    /// Filter to restrict scanning to specific project types
36    project_filter: ProjectFilter,
37
38    /// When `true`, suppresses progress spinner output (used by `--json` mode).
39    quiet: bool,
40}
41
42impl Scanner {
43    /// Create a new scanner with the specified options.
44    ///
45    /// # Arguments
46    ///
47    /// * `scan_options` - Configuration for scanning behavior (threads, verbosity, etc.)
48    /// * `project_filter` - Filter to restrict scanning to specific project types
49    ///
50    /// # Returns
51    ///
52    /// A new `Scanner` instance configured with the provided options.
53    ///
54    /// # Examples
55    ///
56    /// ```
57    /// # use crate::{Scanner, ScanOptions, ProjectFilter};
58    /// let scan_options = ScanOptions {
59    ///     verbose: true,
60    ///     threads: 4,
61    ///     skip: vec![],
62    /// };
63    ///
64    /// let scanner = Scanner::new(scan_options, ProjectFilter::All);
65    /// ```
66    #[must_use]
67    pub const fn new(scan_options: ScanOptions, project_filter: ProjectFilter) -> Self {
68        Self {
69            scan_options,
70            project_filter,
71            quiet: false,
72        }
73    }
74
75    /// Enable or disable quiet mode (suppresses progress spinner).
76    ///
77    /// When quiet mode is active the scanning spinner is hidden, which is
78    /// required for `--json` output so that only the final JSON is printed.
79    #[must_use]
80    pub const fn with_quiet(mut self, quiet: bool) -> Self {
81        self.quiet = quiet;
82        self
83    }
84
85    /// Scan a directory tree for development projects.
86    ///
87    /// This method performs a recursive scan of the specified directory to find
88    /// development projects. It operates in two phases:
89    /// 1. Directory traversal to identify potential projects
90    /// 2. Parallel size calculation for build directories
91    ///
92    /// # Arguments
93    ///
94    /// * `root` - The root directory to start scanning from
95    ///
96    /// # Returns
97    ///
98    /// A vector of `Project` instances representing all detected projects with
99    /// non-zero build directory sizes.
100    ///
101    /// # Panics
102    ///
103    /// This method may panic if the progress bar template string is invalid,
104    /// though this should not occur under normal circumstances as the template
105    /// is hardcoded and valid.
106    ///
107    /// # Examples
108    ///
109    /// ```
110    /// # use std::path::Path;
111    /// # use crate::Scanner;
112    /// let projects = scanner.scan_directory(Path::new("/path/to/projects"));
113    /// println!("Found {} projects", projects.len());
114    /// ```
115    ///
116    /// # Performance
117    ///
118    /// This method uses parallel processing for both directory traversal and
119    /// size calculation to maximize performance on systems with multiple cores
120    /// and fast storage.
121    pub fn scan_directory(&self, root: &Path) -> Vec<Project> {
122        let errors = Arc::new(Mutex::new(Vec::<String>::new()));
123
124        let progress = if self.quiet {
125            ProgressBar::hidden()
126        } else {
127            let pb = ProgressBar::new_spinner();
128            pb.set_style(
129                ProgressStyle::default_spinner()
130                    .template("{spinner:.green} {msg}")
131                    .unwrap(),
132            );
133            pb.set_message("Scanning directories...");
134            pb
135        };
136
137        // Find all potential project directories
138        let potential_projects: Vec<_> = WalkDir::new(root)
139            .into_iter()
140            .filter_map(Result::ok)
141            .filter(|entry| self.should_scan_entry(entry))
142            .collect::<Vec<_>>()
143            .into_par_iter()
144            .filter_map(|entry| self.detect_project(&entry, &errors))
145            .collect();
146
147        progress.finish_with_message("✅ Directory scan complete");
148
149        // Process projects in parallel to calculate sizes
150        let projects_with_sizes: Vec<_> = potential_projects
151            .into_par_iter()
152            .filter_map(|mut project| {
153                if project.build_arts.size == 0 {
154                    project.build_arts.size =
155                        self.calculate_build_dir_size(&project.build_arts.path);
156                }
157
158                if project.build_arts.size > 0 {
159                    Some(project)
160                } else {
161                    None
162                }
163            })
164            .collect();
165
166        // Print errors if verbose
167        if self.scan_options.verbose {
168            let errors = errors.lock().unwrap();
169            for error in errors.iter() {
170                eprintln!("{}", error.red());
171            }
172        }
173
174        projects_with_sizes
175    }
176
177    /// Calculate the total size of a build directory.
178    ///
179    /// This method recursively traverses the specified directory and sums up
180    /// the sizes of all files contained within it. It handles errors gracefully
181    /// and optionally reports them in verbose mode.
182    ///
183    /// # Arguments
184    ///
185    /// * `path` - Path to the build directory to measure
186    ///
187    /// # Returns
188    ///
189    /// The total size of all files in the directory, in bytes. Returns 0 if
190    /// the directory doesn't exist or cannot be accessed.
191    ///
192    /// # Performance
193    ///
194    /// This method can be CPU and I/O intensive for large directories with
195    /// many files. It's designed to be called in parallel for multiple
196    /// directories to maximize throughput.
197    fn calculate_build_dir_size(&self, path: &Path) -> u64 {
198        if !path.exists() {
199            return 0;
200        }
201
202        let mut total_size = 0u64;
203
204        for entry in WalkDir::new(path) {
205            match entry {
206                Ok(entry) => {
207                    if entry.file_type().is_file()
208                        && let Ok(metadata) = entry.metadata()
209                    {
210                        total_size += metadata.len();
211                    }
212                }
213                Err(e) => {
214                    if self.scan_options.verbose {
215                        eprintln!("Warning: {e}");
216                    }
217                }
218            }
219        }
220
221        total_size
222    }
223
224    /// Detect a Node.js project in the specified directory.
225    ///
226    /// This method checks for the presence of both `package.json` and `node_modules/`
227    /// directory to identify a Node.js project. If found, it attempts to extract
228    /// the project name from the `package.json` file.
229    ///
230    /// # Arguments
231    ///
232    /// * `path` - Directory path to check for Node.js project
233    /// * `errors` - Shared error collection for reporting parsing issues
234    ///
235    /// # Returns
236    ///
237    /// - `Some(Project)` if a valid Node.js project is detected
238    /// - `None` if the directory doesn't contain a Node.js project
239    ///
240    /// # Detection Criteria
241    ///
242    /// 1. `package.json` file exists in directory
243    /// 2. `node_modules/` subdirectory exists in directory
244    /// 3. The project name is extracted from `package.json` if possible
245    fn detect_node_project(
246        &self,
247        path: &Path,
248        errors: &Arc<Mutex<Vec<String>>>,
249    ) -> Option<Project> {
250        let package_json = path.join("package.json");
251        let node_modules = path.join("node_modules");
252
253        if package_json.exists() && node_modules.exists() {
254            let name = self.extract_node_project_name(&package_json, errors);
255
256            let build_arts = BuildArtifacts {
257                path: path.join("node_modules"),
258                size: 0, // Will be calculated later
259            };
260
261            return Some(Project::new(
262                ProjectType::Node,
263                path.to_path_buf(),
264                build_arts,
265                name,
266            ));
267        }
268
269        None
270    }
271
272    /// Detect if a directory entry represents a development project.
273    ///
274    /// This method examines a directory entry and determines if it contains
275    /// a development project based on the presence of characteristic files
276    /// and directories. It respects the project filter settings.
277    ///
278    /// # Arguments
279    ///
280    /// * `entry` - The directory entry to examine
281    /// * `errors` - Shared error collection for reporting issues
282    ///
283    /// # Returns
284    ///
285    /// - `Some(Project)` if a valid project is detected
286    /// - `None` if no project is found or the entry doesn't match filters
287    ///
288    /// # Project Detection Logic
289    ///
290    /// - **Rust projects**: Presence of both `Cargo.toml` and `target/` directory
291    /// - **Node.js projects**: Presence of both `package.json` and `node_modules/` directory
292    /// - **Python projects**: Presence of configuration files and cache directories
293    /// - **Go projects**: Presence of both `go.mod` and `vendor/` directory
294    /// - **Java/Kotlin projects**: Presence of `pom.xml` or `build.gradle` with `target/` or `build/`
295    /// - **C/C++ projects**: Presence of `CMakeLists.txt` or `Makefile` with `build/`
296    /// - **Swift projects**: Presence of `Package.swift` with `.build/`
297    /// - **.NET/C# projects**: Presence of `.csproj` files with `bin/` or `obj/`
298    fn detect_project(
299        &self,
300        entry: &DirEntry,
301        errors: &Arc<Mutex<Vec<String>>>,
302    ) -> Option<Project> {
303        let path = entry.path();
304
305        if !entry.file_type().is_dir() {
306            return None;
307        }
308
309        // Detectors are tried in order; the first match wins.
310        // More specific ecosystems are checked before more generic ones
311        // (e.g. Java before C/C++, since both can use `build/`).
312        self.try_detect(ProjectFilter::Rust, || {
313            self.detect_rust_project(path, errors)
314        })
315        .or_else(|| {
316            self.try_detect(ProjectFilter::Node, || {
317                self.detect_node_project(path, errors)
318            })
319        })
320        .or_else(|| {
321            self.try_detect(ProjectFilter::Java, || {
322                self.detect_java_project(path, errors)
323            })
324        })
325        .or_else(|| {
326            self.try_detect(ProjectFilter::Swift, || {
327                self.detect_swift_project(path, errors)
328            })
329        })
330        .or_else(|| self.try_detect(ProjectFilter::DotNet, || Self::detect_dotnet_project(path)))
331        .or_else(|| {
332            self.try_detect(ProjectFilter::Python, || {
333                self.detect_python_project(path, errors)
334            })
335        })
336        .or_else(|| self.try_detect(ProjectFilter::Go, || self.detect_go_project(path, errors)))
337        .or_else(|| self.try_detect(ProjectFilter::Cpp, || self.detect_cpp_project(path, errors)))
338    }
339
340    /// Run a detector only if the current project filter allows it.
341    ///
342    /// Returns `None` immediately (without calling `detect`) when the
343    /// active filter doesn't include `filter`.
344    fn try_detect(
345        &self,
346        filter: ProjectFilter,
347        detect: impl FnOnce() -> Option<Project>,
348    ) -> Option<Project> {
349        if self.project_filter == ProjectFilter::All || self.project_filter == filter {
350            detect()
351        } else {
352            None
353        }
354    }
355
356    /// Detect a Rust project in the specified directory.
357    ///
358    /// This method checks for the presence of both `Cargo.toml` and `target/`
359    /// directory to identify a Rust project. If found, it attempts to extract
360    /// the project name from the `Cargo.toml` file.
361    ///
362    /// # Arguments
363    ///
364    /// * `path` - Directory path to check for a Rust project
365    /// * `errors` - Shared error collection for reporting parsing issues
366    ///
367    /// # Returns
368    ///
369    /// - `Some(Project)` if a valid Rust project is detected
370    /// - `None` if the directory doesn't contain a Rust project
371    ///
372    /// # Detection Criteria
373    ///
374    /// 1. `Cargo.toml` file exists in directory
375    /// 2. `target/` subdirectory exists in directory
376    /// 3. The project name is extracted from `Cargo.toml` if possible
377    fn detect_rust_project(
378        &self,
379        path: &Path,
380        errors: &Arc<Mutex<Vec<String>>>,
381    ) -> Option<Project> {
382        let cargo_toml = path.join("Cargo.toml");
383        let target_dir = path.join("target");
384
385        if cargo_toml.exists() && target_dir.exists() {
386            let name = self.extract_rust_project_name(&cargo_toml, errors);
387
388            let build_arts = BuildArtifacts {
389                path: path.join("target"),
390                size: 0, // Will be calculated later
391            };
392
393            return Some(Project::new(
394                ProjectType::Rust,
395                path.to_path_buf(),
396                build_arts,
397                name,
398            ));
399        }
400
401        None
402    }
403
404    /// Extract the project name from a Cargo.toml file.
405    ///
406    /// This method performs simple TOML parsing to extract the project name
407    /// from a Rust project's `Cargo.toml` file. It uses a line-by-line approach
408    /// rather than a full TOML parser for simplicity and performance.
409    ///
410    /// # Arguments
411    ///
412    /// * `cargo_toml` - Path to the Cargo.toml file
413    /// * `errors` - Shared error collection for reporting parsing issues
414    ///
415    /// # Returns
416    ///
417    /// - `Some(String)` containing the project name if successfully extracted
418    /// - `None` if the name cannot be found or parsed
419    ///
420    /// # Parsing Strategy
421    ///
422    /// The method looks for lines matching the pattern `name = "project_name"`
423    /// and extracts the quoted string value. This trivial approach handles
424    /// most common cases without requiring a full TOML parser.
425    fn extract_rust_project_name(
426        &self,
427        cargo_toml: &Path,
428        errors: &Arc<Mutex<Vec<String>>>,
429    ) -> Option<String> {
430        let content = self.read_file_content(cargo_toml, errors)?;
431        Self::parse_toml_name_field(&content)
432    }
433
434    /// Extract a quoted string value from a line.
435    fn extract_quoted_value(line: &str) -> Option<String> {
436        let start = line.find('"')?;
437        let end = line.rfind('"')?;
438
439        if start == end {
440            return None;
441        }
442
443        Some(line[start + 1..end].to_string())
444    }
445
446    /// Extract the name from a single TOML line if it contains a name field.
447    fn extract_name_from_line(line: &str) -> Option<String> {
448        if !Self::is_name_line(line) {
449            return None;
450        }
451
452        Self::extract_quoted_value(line)
453    }
454
455    /// Extract the project name from a package.json file.
456    ///
457    /// This method parses a Node.js project's `package.json` file to extract
458    /// the project name. It uses full JSON parsing to handle the file format
459    /// correctly and safely.
460    ///
461    /// # Arguments
462    ///
463    /// * `package_json` - Path to the package.json file
464    /// * `errors` - Shared error collection for reporting parsing issues
465    ///
466    /// # Returns
467    ///
468    /// - `Some(String)` containing the project name if successfully extracted
469    /// - `None` if the name cannot be found, parsed, or the file is invalid
470    ///
471    /// # Error Handling
472    ///
473    /// This method handles both file I/O errors and JSON parsing errors gracefully.
474    /// Errors are optionally reported to the shared error collection in verbose mode.
475    fn extract_node_project_name(
476        &self,
477        package_json: &Path,
478        errors: &Arc<Mutex<Vec<String>>>,
479    ) -> Option<String> {
480        match fs::read_to_string(package_json) {
481            Ok(content) => match from_str::<Value>(&content) {
482                Ok(json) => json
483                    .get("name")
484                    .and_then(|v| v.as_str())
485                    .map(std::string::ToString::to_string),
486                Err(e) => {
487                    if self.scan_options.verbose {
488                        errors
489                            .lock()
490                            .unwrap()
491                            .push(format!("Error parsing {}: {e}", package_json.display()));
492                    }
493                    None
494                }
495            },
496            Err(e) => {
497                if self.scan_options.verbose {
498                    errors
499                        .lock()
500                        .unwrap()
501                        .push(format!("Error reading {}: {e}", package_json.display()));
502                }
503                None
504            }
505        }
506    }
507
508    /// Check if a line contains a name field assignment.
509    fn is_name_line(line: &str) -> bool {
510        line.starts_with("name") && line.contains('=')
511    }
512
513    /// Log a file reading error if verbose mode is enabled.
514    fn log_file_error(
515        &self,
516        file_path: &Path,
517        error: &std::io::Error,
518        errors: &Arc<Mutex<Vec<String>>>,
519    ) {
520        if self.scan_options.verbose {
521            errors
522                .lock()
523                .unwrap()
524                .push(format!("Error reading {}: {error}", file_path.display()));
525        }
526    }
527
528    /// Parse the name field from TOML content.
529    fn parse_toml_name_field(content: &str) -> Option<String> {
530        for line in content.lines() {
531            if let Some(name) = Self::extract_name_from_line(line.trim()) {
532                return Some(name);
533            }
534        }
535        None
536    }
537
538    /// Read the content of a file and handle errors appropriately.
539    fn read_file_content(
540        &self,
541        file_path: &Path,
542        errors: &Arc<Mutex<Vec<String>>>,
543    ) -> Option<String> {
544        match fs::read_to_string(file_path) {
545            Ok(content) => Some(content),
546            Err(e) => {
547                self.log_file_error(file_path, &e, errors);
548                None
549            }
550        }
551    }
552
553    /// Determine if a directory entry should be scanned for projects.
554    ///
555    /// This method implements the filtering logic to decide whether a directory
556    /// should be traversed during the scanning process. It applies various
557    /// exclusion rules to improve performance and avoid scanning irrelevant
558    /// directories.
559    ///
560    /// # Arguments
561    ///
562    /// * `entry` - The directory entry to evaluate
563    ///
564    /// # Returns
565    ///
566    /// - `true` if the directory should be scanned
567    /// - `false` if the directory should be skipped
568    ///
569    /// # Exclusion Rules
570    ///
571    /// The following directories are excluded from scanning:
572    /// - Directories in the user-specified skip list
573    /// - Any directory inside a `node_modules/` directory (to avoid deep nesting)
574    /// - Hidden directories (starting with `.`) except `.cargo`
575    /// - Common build/temporary directories: `target`, `build`, `dist`, `out`, etc.
576    /// - Version control directories: `.git`, `.svn`, `.hg`
577    /// - Python cache and virtual environment directories
578    /// - Temporary directories: `temp`, `tmp`
579    /// - Go vendor directory
580    /// - Python pytest cache
581    /// - Python tox environments
582    /// - Python setuptools
583    /// - Python coverage files
584    /// - Node.js modules (already handled above but added for completeness)
585    /// - .NET `obj/` directory
586    fn should_scan_entry(&self, entry: &DirEntry) -> bool {
587        let path = entry.path();
588
589        // Early return if path is in skip list
590        if self.is_path_in_skip_list(path) {
591            return false;
592        }
593
594        // Skip any directory inside a node_modules directory
595        if path
596            .ancestors()
597            .any(|ancestor| ancestor.file_name().and_then(|n| n.to_str()) == Some("node_modules"))
598        {
599            return false;
600        }
601
602        // Skip hidden directories (except .cargo for Rust)
603        if Self::is_hidden_directory_to_skip(path) {
604            return false;
605        }
606
607        // Skip common non-project directories
608        !Self::is_excluded_directory(path)
609    }
610
611    /// Check if a path is in the skip list
612    fn is_path_in_skip_list(&self, path: &Path) -> bool {
613        self.scan_options.skip.iter().any(|skip| {
614            path.components().any(|component| {
615                component
616                    .as_os_str()
617                    .to_str()
618                    .is_some_and(|name| name == skip.to_string_lossy())
619            })
620        })
621    }
622
623    /// Check if directory is hidden and should be skipped
624    fn is_hidden_directory_to_skip(path: &Path) -> bool {
625        path.file_name()
626            .and_then(|n| n.to_str())
627            .is_some_and(|name| name.starts_with('.') && name != ".cargo")
628    }
629
630    /// Check if directory is in the excluded list
631    fn is_excluded_directory(path: &Path) -> bool {
632        let excluded_dirs = [
633            "target",
634            "build",
635            "dist",
636            "out",
637            ".git",
638            ".svn",
639            ".hg",
640            "__pycache__",
641            "venv",
642            ".venv",
643            "env",
644            ".env",
645            "temp",
646            "tmp",
647            "vendor",
648            ".pytest_cache",
649            ".tox",
650            ".eggs",
651            ".coverage",
652            "node_modules",
653            "obj",
654        ];
655
656        path.file_name()
657            .and_then(|n| n.to_str())
658            .is_some_and(|name| excluded_dirs.contains(&name))
659    }
660
661    /// Detect a Python project in the specified directory.
662    ///
663    /// This method checks for Python configuration files and associated cache directories.
664    /// It looks for multiple build artifacts that can be cleaned.
665    ///
666    /// # Arguments
667    ///
668    /// * `path` - Directory path to check for a Python project
669    /// * `errors` - Shared error collection for reporting parsing issues
670    ///
671    /// # Returns
672    ///
673    /// - `Some(Project)` if a valid Python project is detected
674    /// - `None` if the directory doesn't contain a Python project
675    ///
676    /// # Detection Criteria
677    ///
678    /// A Python project is identified by having:
679    /// 1. At least one of: requirements.txt, setup.py, pyproject.toml, setup.cfg, Pipfile
680    /// 2. At least one of the cache/build directories: `__pycache__`, `.pytest_cache`, venv, .venv, build, dist, .eggs
681    fn detect_python_project(
682        &self,
683        path: &Path,
684        errors: &Arc<Mutex<Vec<String>>>,
685    ) -> Option<Project> {
686        let config_files = [
687            "requirements.txt",
688            "setup.py",
689            "pyproject.toml",
690            "setup.cfg",
691            "Pipfile",
692            "pipenv.lock",
693            "poetry.lock",
694        ];
695
696        let build_dirs = [
697            "__pycache__",
698            ".pytest_cache",
699            "venv",
700            ".venv",
701            "build",
702            "dist",
703            ".eggs",
704            ".tox",
705            ".coverage",
706        ];
707
708        // Check if any config file exists
709        let has_config = config_files.iter().any(|&file| path.join(file).exists());
710
711        if !has_config {
712            return None;
713        }
714
715        // Find the largest cache/build directory that exists
716        let mut largest_build_dir = None;
717        let mut largest_size = 0;
718
719        for &dir_name in &build_dirs {
720            let dir_path = path.join(dir_name);
721
722            if dir_path.exists()
723                && dir_path.is_dir()
724                && let Ok(size) = Self::calculate_directory_size(&dir_path)
725                && size > largest_size
726            {
727                largest_size = size;
728                largest_build_dir = Some(dir_path);
729            }
730        }
731
732        if let Some(build_path) = largest_build_dir {
733            let name = self.extract_python_project_name(path, errors);
734
735            let build_arts = BuildArtifacts {
736                path: build_path,
737                size: largest_size,
738            };
739
740            return Some(Project::new(
741                ProjectType::Python,
742                path.to_path_buf(),
743                build_arts,
744                name,
745            ));
746        }
747
748        None
749    }
750
751    /// Detect a Go project in the specified directory.
752    ///
753    /// This method checks for the presence of both `go.mod` and `vendor/`
754    /// directory to identify a Go project. If found, it attempts to extract
755    /// the project name from the `go.mod` file.
756    ///
757    /// # Arguments
758    ///
759    /// * `path` - Directory path to check for a Go project
760    /// * `errors` - Shared error collection for reporting parsing issues
761    ///
762    /// # Returns
763    ///
764    /// - `Some(Project)` if a valid Go project is detected
765    /// - `None` if the directory doesn't contain a Go project
766    ///
767    /// # Detection Criteria
768    ///
769    /// 1. `go.mod` file exists in directory
770    /// 2. `vendor/` subdirectory exists in directory
771    /// 3. The project name is extracted from `go.mod` if possible
772    fn detect_go_project(&self, path: &Path, errors: &Arc<Mutex<Vec<String>>>) -> Option<Project> {
773        let go_mod = path.join("go.mod");
774        let vendor_dir = path.join("vendor");
775
776        if go_mod.exists() && vendor_dir.exists() {
777            let name = self.extract_go_project_name(&go_mod, errors);
778
779            let build_arts = BuildArtifacts {
780                path: path.join("vendor"),
781                size: 0, // Will be calculated later
782            };
783
784            return Some(Project::new(
785                ProjectType::Go,
786                path.to_path_buf(),
787                build_arts,
788                name,
789            ));
790        }
791
792        None
793    }
794
795    /// Extract the project name from a Python project directory.
796    ///
797    /// This method attempts to extract the project name from various Python
798    /// configuration files in order of preference.
799    ///
800    /// # Arguments
801    ///
802    /// * `path` - Path to the Python project directory
803    /// * `errors` - Shared error collection for reporting parsing issues
804    ///
805    /// # Returns
806    ///
807    /// - `Some(String)` containing the project name if successfully extracted
808    /// - `None` if the name cannot be found or parsed
809    ///
810    /// # Extraction Order
811    ///
812    /// 1. pyproject.toml (from [project] name or [tool.poetry] name)
813    /// 2. setup.py (from name= parameter)
814    /// 3. setup.cfg (from [metadata] name)
815    /// 4. Use directory name as a fallback
816    fn extract_python_project_name(
817        &self,
818        path: &Path,
819        errors: &Arc<Mutex<Vec<String>>>,
820    ) -> Option<String> {
821        // Try files in order of preference
822        self.try_extract_from_pyproject_toml(path, errors)
823            .or_else(|| self.try_extract_from_setup_py(path, errors))
824            .or_else(|| self.try_extract_from_setup_cfg(path, errors))
825            .or_else(|| Self::fallback_to_directory_name(path))
826    }
827
828    /// Try to extract project name from pyproject.toml
829    fn try_extract_from_pyproject_toml(
830        &self,
831        path: &Path,
832        errors: &Arc<Mutex<Vec<String>>>,
833    ) -> Option<String> {
834        let pyproject_toml = path.join("pyproject.toml");
835        if !pyproject_toml.exists() {
836            return None;
837        }
838
839        let content = self.read_file_content(&pyproject_toml, errors)?;
840        Self::extract_name_from_toml_like_content(&content)
841    }
842
843    /// Try to extract project name from setup.py
844    fn try_extract_from_setup_py(
845        &self,
846        path: &Path,
847        errors: &Arc<Mutex<Vec<String>>>,
848    ) -> Option<String> {
849        let setup_py = path.join("setup.py");
850        if !setup_py.exists() {
851            return None;
852        }
853
854        let content = self.read_file_content(&setup_py, errors)?;
855        Self::extract_name_from_python_content(&content)
856    }
857
858    /// Try to extract project name from setup.cfg
859    fn try_extract_from_setup_cfg(
860        &self,
861        path: &Path,
862        errors: &Arc<Mutex<Vec<String>>>,
863    ) -> Option<String> {
864        let setup_cfg = path.join("setup.cfg");
865        if !setup_cfg.exists() {
866            return None;
867        }
868
869        let content = self.read_file_content(&setup_cfg, errors)?;
870        Self::extract_name_from_cfg_content(&content)
871    }
872
873    /// Extract name from TOML-like content (pyproject.toml)
874    fn extract_name_from_toml_like_content(content: &str) -> Option<String> {
875        content
876            .lines()
877            .map(str::trim)
878            .find(|line| line.starts_with("name") && line.contains('='))
879            .and_then(Self::extract_quoted_value)
880    }
881
882    /// Extract name from Python content (setup.py)
883    fn extract_name_from_python_content(content: &str) -> Option<String> {
884        content
885            .lines()
886            .map(str::trim)
887            .find(|line| line.contains("name") && line.contains('='))
888            .and_then(Self::extract_quoted_value)
889    }
890
891    /// Extract name from INI-style configuration content (setup.cfg)
892    fn extract_name_from_cfg_content(content: &str) -> Option<String> {
893        let mut in_metadata_section = false;
894
895        for line in content.lines() {
896            let line = line.trim();
897
898            if line == "[metadata]" {
899                in_metadata_section = true;
900            } else if line.starts_with('[') && line.ends_with(']') {
901                in_metadata_section = false;
902            } else if in_metadata_section && line.starts_with("name") && line.contains('=') {
903                return line.split('=').nth(1).map(|name| name.trim().to_string());
904            }
905        }
906
907        None
908    }
909
910    /// Fallback to directory name
911    fn fallback_to_directory_name(path: &Path) -> Option<String> {
912        path.file_name()
913            .and_then(|name| name.to_str())
914            .map(std::string::ToString::to_string)
915    }
916
917    /// Extract the project name from a `go.mod` file.
918    ///
919    /// This method parses a Go project's `go.mod` file to extract
920    /// the module name, which typically represents the project.
921    ///
922    /// # Arguments
923    ///
924    /// * `go_mod` - Path to the `go.mod` file
925    /// * `errors` - Shared error collection for reporting parsing issues
926    ///
927    /// # Returns
928    ///
929    /// - `Some(String)` containing the module name if successfully extracted
930    /// - `None` if the name cannot be found or parsed
931    ///
932    /// # Parsing Strategy
933    ///
934    /// The method looks for the first line starting with `module ` and extracts
935    /// the module path. For better display, it takes the last component of the path.
936    fn extract_go_project_name(
937        &self,
938        go_mod: &Path,
939        errors: &Arc<Mutex<Vec<String>>>,
940    ) -> Option<String> {
941        let content = self.read_file_content(go_mod, errors)?;
942
943        for line in content.lines() {
944            let line = line.trim();
945            if line.starts_with("module ") {
946                let module_path = line.strip_prefix("module ")?.trim();
947
948                // Take the last component of the module path for a cleaner name
949                if let Some(name) = module_path.split('/').next_back() {
950                    return Some(name.to_string());
951                }
952
953                return Some(module_path.to_string());
954            }
955        }
956
957        None
958    }
959
960    /// Detect a Java/Kotlin project in the specified directory.
961    ///
962    /// This method checks for Maven (`pom.xml`) or Gradle (`build.gradle`,
963    /// `build.gradle.kts`) configuration files and their associated build output
964    /// directories (`target/` for Maven, `build/` for Gradle).
965    ///
966    /// # Detection Criteria
967    ///
968    /// 1. `pom.xml` + `target/` directory (Maven)
969    /// 2. `build.gradle` or `build.gradle.kts` + `build/` directory (Gradle)
970    fn detect_java_project(
971        &self,
972        path: &Path,
973        errors: &Arc<Mutex<Vec<String>>>,
974    ) -> Option<Project> {
975        let pom_xml = path.join("pom.xml");
976        let target_dir = path.join("target");
977
978        // Maven project: pom.xml + target/
979        if pom_xml.exists() && target_dir.exists() {
980            let name = self.extract_java_maven_project_name(&pom_xml, errors);
981
982            let build_arts = BuildArtifacts {
983                path: target_dir,
984                size: 0,
985            };
986
987            return Some(Project::new(
988                ProjectType::Java,
989                path.to_path_buf(),
990                build_arts,
991                name,
992            ));
993        }
994
995        // Gradle project: build.gradle(.kts) + build/
996        let has_gradle =
997            path.join("build.gradle").exists() || path.join("build.gradle.kts").exists();
998        let build_dir = path.join("build");
999
1000        if has_gradle && build_dir.exists() {
1001            let name = self.extract_java_gradle_project_name(path, errors);
1002
1003            let build_arts = BuildArtifacts {
1004                path: build_dir,
1005                size: 0,
1006            };
1007
1008            return Some(Project::new(
1009                ProjectType::Java,
1010                path.to_path_buf(),
1011                build_arts,
1012                name,
1013            ));
1014        }
1015
1016        None
1017    }
1018
1019    /// Extract the project name from a Maven `pom.xml` file.
1020    ///
1021    /// Looks for `<artifactId>` tags and extracts the text content.
1022    fn extract_java_maven_project_name(
1023        &self,
1024        pom_xml: &Path,
1025        errors: &Arc<Mutex<Vec<String>>>,
1026    ) -> Option<String> {
1027        let content = self.read_file_content(pom_xml, errors)?;
1028
1029        for line in content.lines() {
1030            let trimmed = line.trim();
1031            if trimmed.starts_with("<artifactId>") && trimmed.ends_with("</artifactId>") {
1032                let name = trimmed
1033                    .strip_prefix("<artifactId>")?
1034                    .strip_suffix("</artifactId>")?;
1035                return Some(name.to_string());
1036            }
1037        }
1038
1039        None
1040    }
1041
1042    /// Extract the project name from a Gradle project.
1043    ///
1044    /// Looks for `settings.gradle` or `settings.gradle.kts` and extracts
1045    /// the `rootProject.name` value. Falls back to directory name.
1046    fn extract_java_gradle_project_name(
1047        &self,
1048        path: &Path,
1049        errors: &Arc<Mutex<Vec<String>>>,
1050    ) -> Option<String> {
1051        for settings_file in &["settings.gradle", "settings.gradle.kts"] {
1052            let settings_path = path.join(settings_file);
1053            if settings_path.exists()
1054                && let Some(content) = self.read_file_content(&settings_path, errors)
1055            {
1056                for line in content.lines() {
1057                    let trimmed = line.trim();
1058                    if trimmed.contains("rootProject.name") && trimmed.contains('=') {
1059                        return Self::extract_quoted_value(trimmed).or_else(|| {
1060                            trimmed
1061                                .split('=')
1062                                .nth(1)
1063                                .map(|s| s.trim().trim_matches('\'').to_string())
1064                        });
1065                    }
1066                }
1067            }
1068        }
1069
1070        Self::fallback_to_directory_name(path)
1071    }
1072
1073    /// Detect a C/C++ project in the specified directory.
1074    ///
1075    /// This method checks for `CMakeLists.txt` or `Makefile` alongside a `build/`
1076    /// directory to identify C/C++ projects.
1077    ///
1078    /// # Detection Criteria
1079    ///
1080    /// 1. `CMakeLists.txt` + `build/` directory (`CMake`)
1081    /// 2. `Makefile` + `build/` directory (`Make`)
1082    fn detect_cpp_project(&self, path: &Path, errors: &Arc<Mutex<Vec<String>>>) -> Option<Project> {
1083        let build_dir = path.join("build");
1084
1085        if !build_dir.exists() {
1086            return None;
1087        }
1088
1089        let cmake_file = path.join("CMakeLists.txt");
1090        let makefile = path.join("Makefile");
1091
1092        if cmake_file.exists() || makefile.exists() {
1093            let name = if cmake_file.exists() {
1094                self.extract_cpp_cmake_project_name(&cmake_file, errors)
1095            } else {
1096                Self::fallback_to_directory_name(path)
1097            };
1098
1099            let build_arts = BuildArtifacts {
1100                path: build_dir,
1101                size: 0,
1102            };
1103
1104            return Some(Project::new(
1105                ProjectType::Cpp,
1106                path.to_path_buf(),
1107                build_arts,
1108                name,
1109            ));
1110        }
1111
1112        None
1113    }
1114
1115    /// Extract the project name from a `CMakeLists.txt` file.
1116    ///
1117    /// Looks for `project(name` patterns and extracts the project name.
1118    fn extract_cpp_cmake_project_name(
1119        &self,
1120        cmake_file: &Path,
1121        errors: &Arc<Mutex<Vec<String>>>,
1122    ) -> Option<String> {
1123        let content = self.read_file_content(cmake_file, errors)?;
1124
1125        for line in content.lines() {
1126            let trimmed = line.trim();
1127            if trimmed.starts_with("project(") || trimmed.starts_with("PROJECT(") {
1128                let inner = trimmed
1129                    .trim_start_matches("project(")
1130                    .trim_start_matches("PROJECT(")
1131                    .trim_end_matches(')')
1132                    .trim();
1133
1134                // The project name is the first word/token
1135                let name = inner.split_whitespace().next()?;
1136                // Remove possible surrounding quotes
1137                let name = name.trim_matches('"').trim_matches('\'');
1138                if !name.is_empty() {
1139                    return Some(name.to_string());
1140                }
1141            }
1142        }
1143
1144        Self::fallback_to_directory_name(cmake_file.parent()?)
1145    }
1146
1147    /// Detect a Swift project in the specified directory.
1148    ///
1149    /// This method checks for a `Package.swift` manifest and the `.build/`
1150    /// directory to identify Swift Package Manager projects.
1151    ///
1152    /// # Detection Criteria
1153    ///
1154    /// 1. `Package.swift` file exists
1155    /// 2. `.build/` directory exists
1156    fn detect_swift_project(
1157        &self,
1158        path: &Path,
1159        errors: &Arc<Mutex<Vec<String>>>,
1160    ) -> Option<Project> {
1161        let package_swift = path.join("Package.swift");
1162        let build_dir = path.join(".build");
1163
1164        if package_swift.exists() && build_dir.exists() {
1165            let name = self.extract_swift_project_name(&package_swift, errors);
1166
1167            let build_arts = BuildArtifacts {
1168                path: build_dir,
1169                size: 0,
1170            };
1171
1172            return Some(Project::new(
1173                ProjectType::Swift,
1174                path.to_path_buf(),
1175                build_arts,
1176                name,
1177            ));
1178        }
1179
1180        None
1181    }
1182
1183    /// Extract the project name from a `Package.swift` file.
1184    ///
1185    /// Looks for `name:` inside the `Package(` initializer.
1186    fn extract_swift_project_name(
1187        &self,
1188        package_swift: &Path,
1189        errors: &Arc<Mutex<Vec<String>>>,
1190    ) -> Option<String> {
1191        let content = self.read_file_content(package_swift, errors)?;
1192
1193        for line in content.lines() {
1194            let trimmed = line.trim();
1195            if trimmed.contains("name:") {
1196                return Self::extract_quoted_value(trimmed);
1197            }
1198        }
1199
1200        Self::fallback_to_directory_name(package_swift.parent()?)
1201    }
1202
1203    /// Detect a .NET/C# project in the specified directory.
1204    ///
1205    /// This method checks for `.csproj` files alongside `bin/` and/or `obj/`
1206    /// directories to identify .NET projects.
1207    ///
1208    /// # Detection Criteria
1209    ///
1210    /// 1. At least one `.csproj` file exists in the directory
1211    /// 2. At least one of `bin/` or `obj/` directories exists
1212    fn detect_dotnet_project(path: &Path) -> Option<Project> {
1213        let bin_dir = path.join("bin");
1214        let obj_dir = path.join("obj");
1215
1216        let has_build_dir = bin_dir.exists() || obj_dir.exists();
1217        if !has_build_dir {
1218            return None;
1219        }
1220
1221        let csproj_file = Self::find_file_with_extension(path, "csproj")?;
1222
1223        // Pick the larger of bin/ and obj/ as the primary build artifact
1224        let (build_path, precomputed_size) = match (bin_dir.exists(), obj_dir.exists()) {
1225            (true, true) => {
1226                let bin_size = Self::calculate_directory_size(&bin_dir).unwrap_or(0);
1227                let obj_size = Self::calculate_directory_size(&obj_dir).unwrap_or(0);
1228                if obj_size >= bin_size {
1229                    (obj_dir, obj_size)
1230                } else {
1231                    (bin_dir, bin_size)
1232                }
1233            }
1234            (true, false) => (bin_dir, 0),
1235            (false, true) => (obj_dir, 0),
1236            (false, false) => return None,
1237        };
1238
1239        let name = csproj_file
1240            .file_stem()
1241            .and_then(|s| s.to_str())
1242            .map(std::string::ToString::to_string);
1243
1244        let build_arts = BuildArtifacts {
1245            path: build_path,
1246            size: precomputed_size,
1247        };
1248
1249        Some(Project::new(
1250            ProjectType::DotNet,
1251            path.to_path_buf(),
1252            build_arts,
1253            name,
1254        ))
1255    }
1256
1257    /// Find the first file with a given extension in a directory.
1258    fn find_file_with_extension(dir: &Path, extension: &str) -> Option<std::path::PathBuf> {
1259        let entries = fs::read_dir(dir).ok()?;
1260        for entry in entries.flatten() {
1261            let path = entry.path();
1262            if path.is_file() && path.extension().and_then(|e| e.to_str()) == Some(extension) {
1263                return Some(path);
1264            }
1265        }
1266        None
1267    }
1268
1269    /// Calculate the size of a directory recursively.
1270    ///
1271    /// This is a helper method used for Python and .NET projects to determine which
1272    /// cache directory is the largest and should be the primary cleanup target.
1273    fn calculate_directory_size(dir_path: &Path) -> std::io::Result<u64> {
1274        let mut total_size = 0;
1275
1276        for entry in fs::read_dir(dir_path)? {
1277            let entry = entry?;
1278            let path = entry.path();
1279            if path.is_dir() {
1280                total_size += Self::calculate_directory_size(&path).unwrap_or(0);
1281            } else {
1282                total_size += entry.metadata()?.len();
1283            }
1284        }
1285
1286        Ok(total_size)
1287    }
1288}
1289
1290#[cfg(test)]
1291mod tests {
1292    use super::*;
1293    use std::path::PathBuf;
1294    use tempfile::TempDir;
1295
1296    /// Create a scanner with default options and the given filter.
1297    fn default_scanner(filter: ProjectFilter) -> Scanner {
1298        Scanner::new(
1299            ScanOptions {
1300                verbose: false,
1301                threads: 1,
1302                skip: vec![],
1303            },
1304            filter,
1305        )
1306    }
1307
1308    /// Helper to create a file with content, ensuring parent dirs exist.
1309    fn create_file(path: &Path, content: &str) {
1310        if let Some(parent) = path.parent() {
1311            fs::create_dir_all(parent).unwrap();
1312        }
1313        fs::write(path, content).unwrap();
1314    }
1315
1316    // ── Static helper method tests ──────────────────────────────────────
1317
1318    #[test]
1319    fn test_is_hidden_directory_to_skip() {
1320        // Hidden directories should be skipped
1321        assert!(Scanner::is_hidden_directory_to_skip(Path::new(
1322            "/some/.hidden"
1323        )));
1324        assert!(Scanner::is_hidden_directory_to_skip(Path::new(
1325            "/some/.git"
1326        )));
1327        assert!(Scanner::is_hidden_directory_to_skip(Path::new(
1328            "/some/.svn"
1329        )));
1330        assert!(Scanner::is_hidden_directory_to_skip(Path::new(".env")));
1331
1332        // .cargo is the special exception — should NOT be skipped
1333        assert!(!Scanner::is_hidden_directory_to_skip(Path::new(
1334            "/home/user/.cargo"
1335        )));
1336        assert!(!Scanner::is_hidden_directory_to_skip(Path::new(".cargo")));
1337
1338        // Non-hidden directories should not be skipped
1339        assert!(!Scanner::is_hidden_directory_to_skip(Path::new(
1340            "/some/visible"
1341        )));
1342        assert!(!Scanner::is_hidden_directory_to_skip(Path::new("src")));
1343    }
1344
1345    #[test]
1346    fn test_is_excluded_directory() {
1347        // Build/artifact directories should be excluded
1348        assert!(Scanner::is_excluded_directory(Path::new("/some/target")));
1349        assert!(Scanner::is_excluded_directory(Path::new(
1350            "/some/node_modules"
1351        )));
1352        assert!(Scanner::is_excluded_directory(Path::new(
1353            "/some/__pycache__"
1354        )));
1355        assert!(Scanner::is_excluded_directory(Path::new("/some/vendor")));
1356        assert!(Scanner::is_excluded_directory(Path::new("/some/build")));
1357        assert!(Scanner::is_excluded_directory(Path::new("/some/dist")));
1358        assert!(Scanner::is_excluded_directory(Path::new("/some/out")));
1359
1360        // VCS directories should be excluded
1361        assert!(Scanner::is_excluded_directory(Path::new("/some/.git")));
1362        assert!(Scanner::is_excluded_directory(Path::new("/some/.svn")));
1363        assert!(Scanner::is_excluded_directory(Path::new("/some/.hg")));
1364
1365        // Python-specific directories
1366        assert!(Scanner::is_excluded_directory(Path::new(
1367            "/some/.pytest_cache"
1368        )));
1369        assert!(Scanner::is_excluded_directory(Path::new("/some/.tox")));
1370        assert!(Scanner::is_excluded_directory(Path::new("/some/.eggs")));
1371        assert!(Scanner::is_excluded_directory(Path::new("/some/.coverage")));
1372
1373        // Virtual environments
1374        assert!(Scanner::is_excluded_directory(Path::new("/some/venv")));
1375        assert!(Scanner::is_excluded_directory(Path::new("/some/.venv")));
1376        assert!(Scanner::is_excluded_directory(Path::new("/some/env")));
1377        assert!(Scanner::is_excluded_directory(Path::new("/some/.env")));
1378
1379        // Temp directories
1380        assert!(Scanner::is_excluded_directory(Path::new("/some/temp")));
1381        assert!(Scanner::is_excluded_directory(Path::new("/some/tmp")));
1382
1383        // Non-excluded directories
1384        assert!(!Scanner::is_excluded_directory(Path::new("/some/src")));
1385        assert!(!Scanner::is_excluded_directory(Path::new("/some/lib")));
1386        assert!(!Scanner::is_excluded_directory(Path::new("/some/app")));
1387        assert!(!Scanner::is_excluded_directory(Path::new("/some/tests")));
1388    }
1389
1390    #[test]
1391    fn test_extract_quoted_value() {
1392        assert_eq!(
1393            Scanner::extract_quoted_value(r#"name = "my-project""#),
1394            Some("my-project".to_string())
1395        );
1396        assert_eq!(
1397            Scanner::extract_quoted_value(r#"name = "with spaces""#),
1398            Some("with spaces".to_string())
1399        );
1400        assert_eq!(Scanner::extract_quoted_value("no quotes here"), None);
1401        // Single quote mark is not a pair
1402        assert_eq!(Scanner::extract_quoted_value(r#"only "one"#), None);
1403    }
1404
1405    #[test]
1406    fn test_is_name_line() {
1407        assert!(Scanner::is_name_line("name = \"test\""));
1408        assert!(Scanner::is_name_line("name=\"test\""));
1409        assert!(!Scanner::is_name_line("version = \"1.0\""));
1410        assert!(!Scanner::is_name_line("# name = \"commented\""));
1411        assert!(!Scanner::is_name_line("name: \"yaml style\""));
1412    }
1413
1414    #[test]
1415    fn test_parse_toml_name_field() {
1416        let content = "[package]\nname = \"test-project\"\nversion = \"0.1.0\"\n";
1417        assert_eq!(
1418            Scanner::parse_toml_name_field(content),
1419            Some("test-project".to_string())
1420        );
1421
1422        let no_name = "[package]\nversion = \"0.1.0\"\n";
1423        assert_eq!(Scanner::parse_toml_name_field(no_name), None);
1424
1425        let empty = "";
1426        assert_eq!(Scanner::parse_toml_name_field(empty), None);
1427    }
1428
1429    #[test]
1430    fn test_extract_name_from_cfg_content() {
1431        let content = "[metadata]\nname = my-package\nversion = 1.0\n";
1432        assert_eq!(
1433            Scanner::extract_name_from_cfg_content(content),
1434            Some("my-package".to_string())
1435        );
1436
1437        // Name in wrong section should not be found
1438        let wrong_section = "[options]\nname = not-this\n";
1439        assert_eq!(Scanner::extract_name_from_cfg_content(wrong_section), None);
1440
1441        // Multiple sections — name must be in [metadata]
1442        let multi = "[options]\nkey = val\n\n[metadata]\nname = correct\n\n[other]\nname = wrong\n";
1443        assert_eq!(
1444            Scanner::extract_name_from_cfg_content(multi),
1445            Some("correct".to_string())
1446        );
1447    }
1448
1449    #[test]
1450    fn test_extract_name_from_python_content() {
1451        let content = "from setuptools import setup\nsetup(\n    name=\"my-pkg\",\n)\n";
1452        assert_eq!(
1453            Scanner::extract_name_from_python_content(content),
1454            Some("my-pkg".to_string())
1455        );
1456
1457        let no_name = "from setuptools import setup\nsetup(version=\"1.0\")\n";
1458        assert_eq!(Scanner::extract_name_from_python_content(no_name), None);
1459    }
1460
1461    #[test]
1462    fn test_fallback_to_directory_name() {
1463        assert_eq!(
1464            Scanner::fallback_to_directory_name(Path::new("/some/project-name")),
1465            Some("project-name".to_string())
1466        );
1467        assert_eq!(
1468            Scanner::fallback_to_directory_name(Path::new("/some/my_app")),
1469            Some("my_app".to_string())
1470        );
1471    }
1472
1473    #[test]
1474    fn test_is_path_in_skip_list() {
1475        let scanner = Scanner::new(
1476            ScanOptions {
1477                verbose: false,
1478                threads: 1,
1479                skip: vec![PathBuf::from("skip-me"), PathBuf::from("also-skip")],
1480            },
1481            ProjectFilter::All,
1482        );
1483
1484        assert!(scanner.is_path_in_skip_list(Path::new("/root/skip-me/project")));
1485        assert!(scanner.is_path_in_skip_list(Path::new("/root/also-skip")));
1486        assert!(!scanner.is_path_in_skip_list(Path::new("/root/keep-me")));
1487        assert!(!scanner.is_path_in_skip_list(Path::new("/root/src")));
1488    }
1489
1490    #[test]
1491    fn test_is_path_in_empty_skip_list() {
1492        let scanner = default_scanner(ProjectFilter::All);
1493        assert!(!scanner.is_path_in_skip_list(Path::new("/any/path")));
1494    }
1495
1496    // ── Scanning with special path characters ───────────────────────────
1497
1498    #[test]
1499    fn test_scan_directory_with_spaces_in_path() {
1500        let tmp = TempDir::new().unwrap();
1501        let base = tmp.path().join("path with spaces");
1502        fs::create_dir_all(&base).unwrap();
1503
1504        let project = base.join("my project");
1505        create_file(
1506            &project.join("Cargo.toml"),
1507            "[package]\nname = \"spaced\"\nversion = \"0.1.0\"",
1508        );
1509        create_file(&project.join("target/dummy"), "content");
1510
1511        let scanner = default_scanner(ProjectFilter::Rust);
1512        let projects = scanner.scan_directory(&base);
1513        assert_eq!(projects.len(), 1);
1514        assert_eq!(projects[0].name.as_deref(), Some("spaced"));
1515    }
1516
1517    #[test]
1518    fn test_scan_directory_with_unicode_names() {
1519        let tmp = TempDir::new().unwrap();
1520        let base = tmp.path();
1521
1522        let project = base.join("プロジェクト");
1523        create_file(
1524            &project.join("package.json"),
1525            r#"{"name": "unicode-project"}"#,
1526        );
1527        create_file(&project.join("node_modules/dep.js"), "module.exports = {};");
1528
1529        let scanner = default_scanner(ProjectFilter::Node);
1530        let projects = scanner.scan_directory(base);
1531        assert_eq!(projects.len(), 1);
1532        assert_eq!(projects[0].name.as_deref(), Some("unicode-project"));
1533    }
1534
1535    #[test]
1536    fn test_scan_directory_with_special_characters_in_name() {
1537        let tmp = TempDir::new().unwrap();
1538        let base = tmp.path();
1539
1540        let project = base.join("project-with-dashes_and_underscores.v2");
1541        create_file(
1542            &project.join("Cargo.toml"),
1543            "[package]\nname = \"special-chars\"\nversion = \"0.1.0\"",
1544        );
1545        create_file(&project.join("target/dummy"), "content");
1546
1547        let scanner = default_scanner(ProjectFilter::Rust);
1548        let projects = scanner.scan_directory(base);
1549        assert_eq!(projects.len(), 1);
1550        assert_eq!(projects[0].name.as_deref(), Some("special-chars"));
1551    }
1552
1553    // ── Unix-specific scanning tests ────────────────────────────────────
1554
1555    #[test]
1556    #[cfg(unix)]
1557    fn test_hidden_directory_itself_not_detected_as_project_unix() {
1558        let tmp = TempDir::new().unwrap();
1559        let base = tmp.path();
1560
1561        // A hidden directory with Cargo.toml + target/ directly inside it
1562        // should NOT be detected because the .hidden entry is filtered by
1563        // is_hidden_directory_to_skip. However, non-hidden children inside
1564        // hidden dirs CAN still be found because WalkDir descends into them.
1565        let hidden = base.join(".hidden-project");
1566        create_file(
1567            &hidden.join("Cargo.toml"),
1568            "[package]\nname = \"hidden\"\nversion = \"0.1.0\"",
1569        );
1570        create_file(&hidden.join("target/dummy"), "content");
1571
1572        // A visible project should be found
1573        let visible = base.join("visible-project");
1574        create_file(
1575            &visible.join("Cargo.toml"),
1576            "[package]\nname = \"visible\"\nversion = \"0.1.0\"",
1577        );
1578        create_file(&visible.join("target/dummy"), "content");
1579
1580        let scanner = default_scanner(ProjectFilter::Rust);
1581        let projects = scanner.scan_directory(base);
1582
1583        // Only the visible project should be found; the hidden one is excluded
1584        // because its directory name starts with '.'
1585        assert_eq!(projects.len(), 1);
1586        assert_eq!(projects[0].name.as_deref(), Some("visible"));
1587    }
1588
1589    #[test]
1590    #[cfg(unix)]
1591    fn test_projects_inside_hidden_dirs_are_still_traversed_unix() {
1592        let tmp = TempDir::new().unwrap();
1593        let base = tmp.path();
1594
1595        // A non-hidden project nested inside a hidden directory.
1596        // WalkDir still descends into .hidden, so the child project IS found.
1597        let nested = base.join(".hidden-parent/visible-child");
1598        create_file(
1599            &nested.join("Cargo.toml"),
1600            "[package]\nname = \"nested\"\nversion = \"0.1.0\"",
1601        );
1602        create_file(&nested.join("target/dummy"), "content");
1603
1604        let scanner = default_scanner(ProjectFilter::Rust);
1605        let projects = scanner.scan_directory(base);
1606
1607        // The child project has a non-hidden name, so it IS detected
1608        assert_eq!(projects.len(), 1);
1609        assert_eq!(projects[0].name.as_deref(), Some("nested"));
1610    }
1611
1612    #[test]
1613    #[cfg(unix)]
1614    fn test_dotcargo_directory_not_skipped_unix() {
1615        // .cargo is the exception — hidden but should NOT be skipped.
1616        // Verify via the static method.
1617        assert!(!Scanner::is_hidden_directory_to_skip(Path::new(
1618            "/home/user/.cargo"
1619        )));
1620
1621        // Other dot-dirs ARE skipped
1622        assert!(Scanner::is_hidden_directory_to_skip(Path::new(
1623            "/home/user/.local"
1624        )));
1625        assert!(Scanner::is_hidden_directory_to_skip(Path::new(
1626            "/home/user/.npm"
1627        )));
1628    }
1629
1630    // ── Python project detection tests ──────────────────────────────────
1631
1632    #[test]
1633    fn test_detect_python_with_pyproject_toml() {
1634        let tmp = TempDir::new().unwrap();
1635        let base = tmp.path();
1636
1637        let project = base.join("py-project");
1638        create_file(
1639            &project.join("pyproject.toml"),
1640            "[project]\nname = \"my-py-lib\"\nversion = \"1.0.0\"\n",
1641        );
1642        let pycache = project.join("__pycache__");
1643        fs::create_dir_all(&pycache).unwrap();
1644        create_file(&pycache.join("module.pyc"), "bytecode");
1645
1646        let scanner = default_scanner(ProjectFilter::Python);
1647        let projects = scanner.scan_directory(base);
1648        assert_eq!(projects.len(), 1);
1649        assert_eq!(projects[0].kind, ProjectType::Python);
1650    }
1651
1652    #[test]
1653    fn test_detect_python_with_setup_py() {
1654        let tmp = TempDir::new().unwrap();
1655        let base = tmp.path();
1656
1657        let project = base.join("setup-project");
1658        create_file(
1659            &project.join("setup.py"),
1660            "from setuptools import setup\nsetup(name=\"setup-lib\")\n",
1661        );
1662        let pycache = project.join("__pycache__");
1663        fs::create_dir_all(&pycache).unwrap();
1664        create_file(&pycache.join("module.pyc"), "bytecode");
1665
1666        let scanner = default_scanner(ProjectFilter::Python);
1667        let projects = scanner.scan_directory(base);
1668        assert_eq!(projects.len(), 1);
1669    }
1670
1671    #[test]
1672    fn test_detect_python_with_pipfile() {
1673        let tmp = TempDir::new().unwrap();
1674        let base = tmp.path();
1675
1676        let project = base.join("pipenv-project");
1677        create_file(
1678            &project.join("Pipfile"),
1679            "[[source]]\nurl = \"https://pypi.org/simple\"",
1680        );
1681        let pycache = project.join("__pycache__");
1682        fs::create_dir_all(&pycache).unwrap();
1683        create_file(&pycache.join("module.pyc"), "bytecode");
1684
1685        let scanner = default_scanner(ProjectFilter::Python);
1686        let projects = scanner.scan_directory(base);
1687        assert_eq!(projects.len(), 1);
1688    }
1689
1690    // ── Go project detection tests ──────────────────────────────────────
1691
1692    #[test]
1693    fn test_detect_go_extracts_module_name() {
1694        let tmp = TempDir::new().unwrap();
1695        let base = tmp.path();
1696
1697        let project = base.join("go-service");
1698        create_file(
1699            &project.join("go.mod"),
1700            "module github.com/user/my-service\n\ngo 1.21\n",
1701        );
1702        let vendor = project.join("vendor");
1703        fs::create_dir_all(&vendor).unwrap();
1704        create_file(&vendor.join("modules.txt"), "vendor manifest");
1705
1706        let scanner = default_scanner(ProjectFilter::Go);
1707        let projects = scanner.scan_directory(base);
1708        assert_eq!(projects.len(), 1);
1709        // Should extract last path component as name
1710        assert_eq!(projects[0].name.as_deref(), Some("my-service"));
1711    }
1712
1713    // ── Java/Kotlin project detection tests ────────────────────────────
1714
1715    #[test]
1716    fn test_detect_java_maven_project() {
1717        let tmp = TempDir::new().unwrap();
1718        let base = tmp.path();
1719
1720        let project = base.join("java-maven");
1721        create_file(
1722            &project.join("pom.xml"),
1723            "<project>\n  <artifactId>my-java-app</artifactId>\n</project>",
1724        );
1725        create_file(&project.join("target/classes/Main.class"), "bytecode");
1726
1727        let scanner = default_scanner(ProjectFilter::Java);
1728        let projects = scanner.scan_directory(base);
1729        assert_eq!(projects.len(), 1);
1730        assert_eq!(projects[0].kind, ProjectType::Java);
1731        assert_eq!(projects[0].name.as_deref(), Some("my-java-app"));
1732    }
1733
1734    #[test]
1735    fn test_detect_java_gradle_project() {
1736        let tmp = TempDir::new().unwrap();
1737        let base = tmp.path();
1738
1739        let project = base.join("java-gradle");
1740        create_file(&project.join("build.gradle"), "apply plugin: 'java'");
1741        create_file(
1742            &project.join("settings.gradle"),
1743            "rootProject.name = \"my-gradle-app\"",
1744        );
1745        create_file(&project.join("build/classes/main/Main.class"), "bytecode");
1746
1747        let scanner = default_scanner(ProjectFilter::Java);
1748        let projects = scanner.scan_directory(base);
1749        assert_eq!(projects.len(), 1);
1750        assert_eq!(projects[0].kind, ProjectType::Java);
1751        assert_eq!(projects[0].name.as_deref(), Some("my-gradle-app"));
1752    }
1753
1754    #[test]
1755    fn test_detect_java_gradle_kts_project() {
1756        let tmp = TempDir::new().unwrap();
1757        let base = tmp.path();
1758
1759        let project = base.join("kotlin-gradle");
1760        create_file(
1761            &project.join("build.gradle.kts"),
1762            "plugins { kotlin(\"jvm\") }",
1763        );
1764        create_file(
1765            &project.join("settings.gradle.kts"),
1766            "rootProject.name = \"my-kotlin-app\"",
1767        );
1768        create_file(
1769            &project.join("build/classes/kotlin/main/MainKt.class"),
1770            "bytecode",
1771        );
1772
1773        let scanner = default_scanner(ProjectFilter::Java);
1774        let projects = scanner.scan_directory(base);
1775        assert_eq!(projects.len(), 1);
1776        assert_eq!(projects[0].kind, ProjectType::Java);
1777        assert_eq!(projects[0].name.as_deref(), Some("my-kotlin-app"));
1778    }
1779
1780    // ── C/C++ project detection tests ────────────────────────────────────
1781
1782    #[test]
1783    fn test_detect_cpp_cmake_project() {
1784        let tmp = TempDir::new().unwrap();
1785        let base = tmp.path();
1786
1787        let project = base.join("cpp-cmake");
1788        create_file(
1789            &project.join("CMakeLists.txt"),
1790            "project(my-cpp-lib)\ncmake_minimum_required(VERSION 3.10)",
1791        );
1792        create_file(&project.join("build/CMakeCache.txt"), "cache");
1793
1794        let scanner = default_scanner(ProjectFilter::Cpp);
1795        let projects = scanner.scan_directory(base);
1796        assert_eq!(projects.len(), 1);
1797        assert_eq!(projects[0].kind, ProjectType::Cpp);
1798        assert_eq!(projects[0].name.as_deref(), Some("my-cpp-lib"));
1799    }
1800
1801    #[test]
1802    fn test_detect_cpp_makefile_project() {
1803        let tmp = TempDir::new().unwrap();
1804        let base = tmp.path();
1805
1806        let project = base.join("cpp-make");
1807        create_file(&project.join("Makefile"), "all:\n\tg++ -o main main.cpp");
1808        create_file(&project.join("build/main.o"), "object");
1809
1810        let scanner = default_scanner(ProjectFilter::Cpp);
1811        let projects = scanner.scan_directory(base);
1812        assert_eq!(projects.len(), 1);
1813        assert_eq!(projects[0].kind, ProjectType::Cpp);
1814    }
1815
1816    // ── Swift project detection tests ────────────────────────────────────
1817
1818    #[test]
1819    fn test_detect_swift_project() {
1820        let tmp = TempDir::new().unwrap();
1821        let base = tmp.path();
1822
1823        let project = base.join("swift-pkg");
1824        create_file(
1825            &project.join("Package.swift"),
1826            "let package = Package(\n    name: \"my-swift-lib\",\n    targets: []\n)",
1827        );
1828        create_file(&project.join(".build/debug/my-swift-lib"), "binary");
1829
1830        let scanner = default_scanner(ProjectFilter::Swift);
1831        let projects = scanner.scan_directory(base);
1832        assert_eq!(projects.len(), 1);
1833        assert_eq!(projects[0].kind, ProjectType::Swift);
1834        assert_eq!(projects[0].name.as_deref(), Some("my-swift-lib"));
1835    }
1836
1837    // ── .NET/C# project detection tests ──────────────────────────────────
1838
1839    #[test]
1840    fn test_detect_dotnet_project() {
1841        let tmp = TempDir::new().unwrap();
1842        let base = tmp.path();
1843
1844        let project = base.join("dotnet-app");
1845        create_file(
1846            &project.join("MyApp.csproj"),
1847            "<Project Sdk=\"Microsoft.NET.Sdk\">\n</Project>",
1848        );
1849        create_file(&project.join("bin/Debug/net8.0/MyApp.dll"), "assembly");
1850        create_file(&project.join("obj/Debug/net8.0/MyApp.dll"), "intermediate");
1851
1852        let scanner = default_scanner(ProjectFilter::DotNet);
1853        let projects = scanner.scan_directory(base);
1854        assert_eq!(projects.len(), 1);
1855        assert_eq!(projects[0].kind, ProjectType::DotNet);
1856        assert_eq!(projects[0].name.as_deref(), Some("MyApp"));
1857    }
1858
1859    #[test]
1860    fn test_detect_dotnet_project_obj_only() {
1861        let tmp = TempDir::new().unwrap();
1862        let base = tmp.path();
1863
1864        let project = base.join("dotnet-obj-only");
1865        create_file(
1866            &project.join("Lib.csproj"),
1867            "<Project Sdk=\"Microsoft.NET.Sdk\">\n</Project>",
1868        );
1869        create_file(&project.join("obj/Debug/net8.0/Lib.dll"), "intermediate");
1870
1871        let scanner = default_scanner(ProjectFilter::DotNet);
1872        let projects = scanner.scan_directory(base);
1873        assert_eq!(projects.len(), 1);
1874        assert_eq!(projects[0].kind, ProjectType::DotNet);
1875        assert_eq!(projects[0].name.as_deref(), Some("Lib"));
1876    }
1877
1878    // ── Excluded directory tests ─────────────────────────────────────────
1879
1880    #[test]
1881    fn test_obj_directory_is_excluded() {
1882        assert!(Scanner::is_excluded_directory(Path::new("/some/obj")));
1883    }
1884
1885    // ── Cross-platform calculate_build_dir_size ─────────────────────────
1886
1887    #[test]
1888    fn test_calculate_build_dir_size_empty() {
1889        let tmp = TempDir::new().unwrap();
1890        let empty_dir = tmp.path().join("empty");
1891        fs::create_dir_all(&empty_dir).unwrap();
1892
1893        let scanner = default_scanner(ProjectFilter::All);
1894        assert_eq!(scanner.calculate_build_dir_size(&empty_dir), 0);
1895    }
1896
1897    #[test]
1898    fn test_calculate_build_dir_size_nonexistent() {
1899        let scanner = default_scanner(ProjectFilter::All);
1900        assert_eq!(
1901            scanner.calculate_build_dir_size(Path::new("/nonexistent/path")),
1902            0
1903        );
1904    }
1905
1906    #[test]
1907    fn test_calculate_build_dir_size_with_nested_files() {
1908        let tmp = TempDir::new().unwrap();
1909        let dir = tmp.path().join("nested");
1910
1911        create_file(&dir.join("file1.txt"), "hello"); // 5 bytes
1912        create_file(&dir.join("sub/file2.txt"), "world!"); // 6 bytes
1913        create_file(&dir.join("sub/deep/file3.txt"), "!"); // 1 byte
1914
1915        let scanner = default_scanner(ProjectFilter::All);
1916        let size = scanner.calculate_build_dir_size(&dir);
1917        assert_eq!(size, 12);
1918    }
1919
1920    // ── Quiet mode ──────────────────────────────────────────────────────
1921
1922    #[test]
1923    fn test_scanner_quiet_mode() {
1924        let tmp = TempDir::new().unwrap();
1925        let base = tmp.path();
1926
1927        let project = base.join("quiet-project");
1928        create_file(
1929            &project.join("Cargo.toml"),
1930            "[package]\nname = \"quiet\"\nversion = \"0.1.0\"",
1931        );
1932        create_file(&project.join("target/dummy"), "content");
1933
1934        let scanner = default_scanner(ProjectFilter::Rust).with_quiet(true);
1935        let projects = scanner.scan_directory(base);
1936        assert_eq!(projects.len(), 1);
1937    }
1938}