clean_dev_dirs/
scanner.rs

1//! Directory scanning and project detection functionality.
2//!
3//! This module provides the core scanning logic that traverses directory trees
4//! to find development projects and their build artifacts. It supports parallel
5//! processing for improved performance and handles various error conditions
6//! gracefully.
7
8use std::{
9    fs,
10    path::Path,
11    sync::{Arc, Mutex},
12};
13
14use colored::Colorize;
15use indicatif::{ProgressBar, ProgressStyle};
16use rayon::prelude::*;
17use serde_json::{Value, from_str};
18use walkdir::{DirEntry, WalkDir};
19
20use crate::{
21    config::{ProjectFilter, ScanOptions},
22    project::{BuildArtifacts, Project, ProjectType},
23};
24
25/// Directory scanner for detecting development projects.
26///
27/// The `Scanner` struct encapsulates the logic for traversing directory trees
28/// and identifying development projects (Rust and Node.js) along with their
29/// build artifacts. It supports configurable filtering and parallel processing
30/// for efficient scanning of large directory structures.
31pub struct Scanner {
32    /// Configuration options for scanning behavior
33    scan_options: ScanOptions,
34
35    /// Filter to restrict scanning to specific project types
36    project_filter: ProjectFilter,
37}
38
39impl Scanner {
40    /// Create a new scanner with the specified options.
41    ///
42    /// # Arguments
43    ///
44    /// * `scan_options` - Configuration for scanning behavior (threads, verbosity, etc.)
45    /// * `project_filter` - Filter to restrict scanning to specific project types
46    ///
47    /// # Returns
48    ///
49    /// A new `Scanner` instance configured with the provided options.
50    ///
51    /// # Examples
52    ///
53    /// ```
54    /// # use crate::{Scanner, ScanOptions, ProjectFilter};
55    /// let scan_options = ScanOptions {
56    ///     verbose: true,
57    ///     threads: 4,
58    ///     skip: vec![],
59    /// };
60    ///
61    /// let scanner = Scanner::new(scan_options, ProjectFilter::All);
62    /// ```
63    #[must_use]
64    pub fn new(scan_options: ScanOptions, project_filter: ProjectFilter) -> Self {
65        Self {
66            scan_options,
67            project_filter,
68        }
69    }
70
71    /// Scan a directory tree for development projects.
72    ///
73    /// This method performs a recursive scan of the specified directory to find
74    /// development projects. It operates in two phases:
75    /// 1. Directory traversal to identify potential projects
76    /// 2. Parallel size calculation for build directories
77    ///
78    /// # Arguments
79    ///
80    /// * `root` - The root directory to start scanning from
81    ///
82    /// # Returns
83    ///
84    /// A vector of `Project` instances representing all detected projects with
85    /// non-zero build directory sizes.
86    ///
87    /// # Panics
88    ///
89    /// This method may panic if the progress bar template string is invalid,
90    /// though this should not occur under normal circumstances as the template
91    /// is hardcoded and valid.
92    ///
93    /// # Examples
94    ///
95    /// ```
96    /// # use std::path::Path;
97    /// # use crate::Scanner;
98    /// let projects = scanner.scan_directory(Path::new("/path/to/projects"));
99    /// println!("Found {} projects", projects.len());
100    /// ```
101    ///
102    /// # Performance
103    ///
104    /// This method uses parallel processing for both directory traversal and
105    /// size calculation to maximize performance on systems with multiple cores
106    /// and fast storage.
107    pub fn scan_directory(&self, root: &Path) -> Vec<Project> {
108        let errors = Arc::new(Mutex::new(Vec::<String>::new()));
109
110        // Create a progress bar
111        let progress = ProgressBar::new_spinner();
112        progress.set_style(
113            ProgressStyle::default_spinner()
114                .template("{spinner:.green} {msg}")
115                .unwrap(),
116        );
117        progress.set_message("Scanning directories...");
118
119        // Find all potential project directories
120        let potential_projects: Vec<_> = WalkDir::new(root)
121            .into_iter()
122            .filter_map(Result::ok)
123            .filter(|entry| self.should_scan_entry(entry))
124            .collect::<Vec<_>>()
125            .into_par_iter()
126            .filter_map(|entry| self.detect_project(&entry, &errors))
127            .collect();
128
129        progress.finish_with_message("✅ Directory scan complete");
130
131        // Process projects in parallel to calculate sizes
132        let projects_with_sizes: Vec<_> = potential_projects
133            .into_par_iter()
134            .filter_map(|mut project| {
135                let size = self.calculate_build_dir_size(&project.build_arts.path);
136                project.build_arts.size = size;
137
138                if size > 0 { Some(project) } else { None }
139            })
140            .collect();
141
142        // Print errors if verbose
143        if self.scan_options.verbose {
144            let errors = errors.lock().unwrap();
145            for error in errors.iter() {
146                eprintln!("{}", error.red());
147            }
148        }
149
150        projects_with_sizes
151    }
152
153    /// Calculate the total size of a build directory.
154    ///
155    /// This method recursively traverses the specified directory and sums up
156    /// the sizes of all files contained within it. It handles errors gracefully
157    /// and optionally reports them in verbose mode.
158    ///
159    /// # Arguments
160    ///
161    /// * `path` - Path to the build directory to measure
162    ///
163    /// # Returns
164    ///
165    /// The total size of all files in the directory, in bytes. Returns 0 if
166    /// the directory doesn't exist or cannot be accessed.
167    ///
168    /// # Performance
169    ///
170    /// This method can be CPU and I/O intensive for large directories with
171    /// many files. It's designed to be called in parallel for multiple
172    /// directories to maximize throughput.
173    fn calculate_build_dir_size(&self, path: &Path) -> u64 {
174        if !path.exists() {
175            return 0;
176        }
177
178        let mut total_size = 0u64;
179
180        for entry in WalkDir::new(path) {
181            match entry {
182                Ok(entry) => {
183                    if entry.file_type().is_file()
184                        && let Ok(metadata) = entry.metadata()
185                    {
186                        total_size += metadata.len();
187                    }
188                }
189                Err(e) => {
190                    if self.scan_options.verbose {
191                        eprintln!("Warning: {e}");
192                    }
193                }
194            }
195        }
196
197        total_size
198    }
199
200    /// Detect a Node.js project in the specified directory.
201    ///
202    /// This method checks for the presence of both `package.json` and `node_modules/`
203    /// directory to identify a Node.js project. If found, it attempts to extract
204    /// the project name from the `package.json` file.
205    ///
206    /// # Arguments
207    ///
208    /// * `path` - Directory path to check for Node.js project
209    /// * `errors` - Shared error collection for reporting parsing issues
210    ///
211    /// # Returns
212    ///
213    /// - `Some(Project)` if a valid Node.js project is detected
214    /// - `None` if the directory doesn't contain a Node.js project
215    ///
216    /// # Detection Criteria
217    ///
218    /// 1. `package.json` file exists in directory
219    /// 2. `node_modules/` subdirectory exists in directory
220    /// 3. The project name is extracted from `package.json` if possible
221    fn detect_node_project(
222        &self,
223        path: &Path,
224        errors: &Arc<Mutex<Vec<String>>>,
225    ) -> Option<Project> {
226        let package_json = path.join("package.json");
227        let node_modules = path.join("node_modules");
228
229        if package_json.exists() && node_modules.exists() {
230            let name = self.extract_node_project_name(&package_json, errors);
231
232            let build_arts = BuildArtifacts {
233                path: path.join("node_modules"),
234                size: 0, // Will be calculated later
235            };
236
237            return Some(Project::new(
238                ProjectType::Node,
239                path.to_path_buf(),
240                build_arts,
241                name,
242            ));
243        }
244
245        None
246    }
247
248    /// Detect if a directory entry represents a development project.
249    ///
250    /// This method examines a directory entry and determines if it contains
251    /// a development project based on the presence of characteristic files
252    /// and directories. It respects the project filter settings.
253    ///
254    /// # Arguments
255    ///
256    /// * `entry` - The directory entry to examine
257    /// * `errors` - Shared error collection for reporting issues
258    ///
259    /// # Returns
260    ///
261    /// - `Some(Project)` if a valid project is detected
262    /// - `None` if no project is found or the entry doesn't match filters
263    ///
264    /// # Project Detection Logic
265    ///
266    /// - **Rust projects**: Presence of both `Cargo.toml` and `target/` directory
267    /// - **Node.js projects**: Presence of both `package.json` and `node_modules/` directory
268    /// - **Python projects**: Presence of configuration files and cache directories
269    /// - **Go projects**: Presence of both `go.mod` and `vendor/` directory
270    fn detect_project(
271        &self,
272        entry: &DirEntry,
273        errors: &Arc<Mutex<Vec<String>>>,
274    ) -> Option<Project> {
275        let path = entry.path();
276
277        if !entry.file_type().is_dir() {
278            return None;
279        }
280
281        // Check for a Rust project
282        if matches!(
283            self.project_filter,
284            ProjectFilter::All | ProjectFilter::Rust
285        ) && let Some(project) = self.detect_rust_project(path, errors)
286        {
287            return Some(project);
288        }
289
290        // Check for a Node.js project
291        if matches!(
292            self.project_filter,
293            ProjectFilter::All | ProjectFilter::Node
294        ) && let Some(project) = self.detect_node_project(path, errors)
295        {
296            return Some(project);
297        }
298
299        // Check for a Python project
300        if matches!(
301            self.project_filter,
302            ProjectFilter::All | ProjectFilter::Python
303        ) && let Some(project) = self.detect_python_project(path, errors)
304        {
305            return Some(project);
306        }
307
308        // Check for a Go project
309        if matches!(self.project_filter, ProjectFilter::All | ProjectFilter::Go)
310            && let Some(project) = self.detect_go_project(path, errors)
311        {
312            return Some(project);
313        }
314
315        None
316    }
317
318    /// Detect a Rust project in the specified directory.
319    ///
320    /// This method checks for the presence of both `Cargo.toml` and `target/`
321    /// directory to identify a Rust project. If found, it attempts to extract
322    /// the project name from the `Cargo.toml` file.
323    ///
324    /// # Arguments
325    ///
326    /// * `path` - Directory path to check for a Rust project
327    /// * `errors` - Shared error collection for reporting parsing issues
328    ///
329    /// # Returns
330    ///
331    /// - `Some(Project)` if a valid Rust project is detected
332    /// - `None` if the directory doesn't contain a Rust project
333    ///
334    /// # Detection Criteria
335    ///
336    /// 1. `Cargo.toml` file exists in directory
337    /// 2. `target/` subdirectory exists in directory
338    /// 3. The project name is extracted from `Cargo.toml` if possible
339    fn detect_rust_project(
340        &self,
341        path: &Path,
342        errors: &Arc<Mutex<Vec<String>>>,
343    ) -> Option<Project> {
344        let cargo_toml = path.join("Cargo.toml");
345        let target_dir = path.join("target");
346
347        if cargo_toml.exists() && target_dir.exists() {
348            let name = self.extract_rust_project_name(&cargo_toml, errors);
349
350            let build_arts = BuildArtifacts {
351                path: path.join("target"),
352                size: 0, // Will be calculated later
353            };
354
355            return Some(Project::new(
356                ProjectType::Rust,
357                path.to_path_buf(),
358                build_arts,
359                name,
360            ));
361        }
362
363        None
364    }
365
366    /// Extract the project name from a Cargo.toml file.
367    ///
368    /// This method performs simple TOML parsing to extract the project name
369    /// from a Rust project's `Cargo.toml` file. It uses a line-by-line approach
370    /// rather than a full TOML parser for simplicity and performance.
371    ///
372    /// # Arguments
373    ///
374    /// * `cargo_toml` - Path to the Cargo.toml file
375    /// * `errors` - Shared error collection for reporting parsing issues
376    ///
377    /// # Returns
378    ///
379    /// - `Some(String)` containing the project name if successfully extracted
380    /// - `None` if the name cannot be found or parsed
381    ///
382    /// # Parsing Strategy
383    ///
384    /// The method looks for lines matching the pattern `name = "project_name"`
385    /// and extracts the quoted string value. This trivial approach handles
386    /// most common cases without requiring a full TOML parser.
387    fn extract_rust_project_name(
388        &self,
389        cargo_toml: &Path,
390        errors: &Arc<Mutex<Vec<String>>>,
391    ) -> Option<String> {
392        let content = self.read_file_content(cargo_toml, errors)?;
393        Self::parse_toml_name_field(&content)
394    }
395
396    /// Extract a quoted string value from a line.
397    fn extract_quoted_value(line: &str) -> Option<String> {
398        let start = line.find('"')?;
399        let end = line.rfind('"')?;
400
401        if start == end {
402            return None;
403        }
404
405        Some(line[start + 1..end].to_string())
406    }
407
408    /// Extract the name from a single TOML line if it contains a name field.
409    fn extract_name_from_line(line: &str) -> Option<String> {
410        if !Self::is_name_line(line) {
411            return None;
412        }
413
414        Self::extract_quoted_value(line)
415    }
416
417    /// Extract the project name from a package.json file.
418    ///
419    /// This method parses a Node.js project's `package.json` file to extract
420    /// the project name. It uses full JSON parsing to handle the file format
421    /// correctly and safely.
422    ///
423    /// # Arguments
424    ///
425    /// * `package_json` - Path to the package.json file
426    /// * `errors` - Shared error collection for reporting parsing issues
427    ///
428    /// # Returns
429    ///
430    /// - `Some(String)` containing the project name if successfully extracted
431    /// - `None` if the name cannot be found, parsed, or the file is invalid
432    ///
433    /// # Error Handling
434    ///
435    /// This method handles both file I/O errors and JSON parsing errors gracefully.
436    /// Errors are optionally reported to the shared error collection in verbose mode.
437    fn extract_node_project_name(
438        &self,
439        package_json: &Path,
440        errors: &Arc<Mutex<Vec<String>>>,
441    ) -> Option<String> {
442        match fs::read_to_string(package_json) {
443            Ok(content) => match from_str::<Value>(&content) {
444                Ok(json) => json
445                    .get("name")
446                    .and_then(|v| v.as_str())
447                    .map(std::string::ToString::to_string),
448                Err(e) => {
449                    if self.scan_options.verbose {
450                        let mut errors = errors.lock().unwrap();
451                        errors.push(format!("Error parsing {}: {e}", package_json.display()));
452                    }
453                    None
454                }
455            },
456            Err(e) => {
457                if self.scan_options.verbose {
458                    let mut errors = errors.lock().unwrap();
459                    errors.push(format!("Error reading {}: {e}", package_json.display()));
460                }
461                None
462            }
463        }
464    }
465
466    /// Check if a line contains a name field assignment.
467    fn is_name_line(line: &str) -> bool {
468        line.starts_with("name") && line.contains('=')
469    }
470
471    /// Log a file reading error if verbose mode is enabled.
472    fn log_file_error(
473        &self,
474        file_path: &Path,
475        error: &std::io::Error,
476        errors: &Arc<Mutex<Vec<String>>>,
477    ) {
478        if self.scan_options.verbose {
479            let mut errors = errors.lock().unwrap();
480            errors.push(format!("Error reading {}: {error}", file_path.display()));
481        }
482    }
483
484    /// Parse the name field from TOML content.
485    fn parse_toml_name_field(content: &str) -> Option<String> {
486        for line in content.lines() {
487            if let Some(name) = Self::extract_name_from_line(line.trim()) {
488                return Some(name);
489            }
490        }
491        None
492    }
493
494    /// Read the content of a file and handle errors appropriately.
495    fn read_file_content(
496        &self,
497        file_path: &Path,
498        errors: &Arc<Mutex<Vec<String>>>,
499    ) -> Option<String> {
500        match fs::read_to_string(file_path) {
501            Ok(content) => Some(content),
502            Err(e) => {
503                self.log_file_error(file_path, &e, errors);
504                None
505            }
506        }
507    }
508
509    /// Determine if a directory entry should be scanned for projects.
510    ///
511    /// This method implements the filtering logic to decide whether a directory
512    /// should be traversed during the scanning process. It applies various
513    /// exclusion rules to improve performance and avoid scanning irrelevant
514    /// directories.
515    ///
516    /// # Arguments
517    ///
518    /// * `entry` - The directory entry to evaluate
519    ///
520    /// # Returns
521    ///
522    /// - `true` if the directory should be scanned
523    /// - `false` if the directory should be skipped
524    ///
525    /// # Exclusion Rules
526    ///
527    /// The following directories are excluded from scanning:
528    /// - Directories in the user-specified skip list
529    /// - Any directory inside a `node_modules/` directory (to avoid deep nesting)
530    /// - Hidden directories (starting with `.`) except `.cargo`
531    /// - Common build/temporary directories: `target`, `build`, `dist`, `out`, etc.
532    /// - Version control directories: `.git`, `.svn`, `.hg`
533    /// - Python cache and virtual environment directories
534    /// - Temporary directories: `temp`, `tmp`
535    /// - Go vendor directory
536    /// - Python pytest cache
537    /// - Python tox environments
538    /// - Python setuptools
539    /// - Python coverage files
540    /// - Node.js modules (already handled above but added for completeness)
541    fn should_scan_entry(&self, entry: &DirEntry) -> bool {
542        let path = entry.path();
543
544        // Early return if path is in skip list
545        if self.is_path_in_skip_list(path) {
546            return false;
547        }
548
549        // Skip any directory inside a node_modules directory
550        if path
551            .ancestors()
552            .any(|ancestor| ancestor.file_name().and_then(|n| n.to_str()) == Some("node_modules"))
553        {
554            return false;
555        }
556
557        // Skip hidden directories (except .cargo for Rust)
558        if Self::is_hidden_directory_to_skip(path) {
559            return false;
560        }
561
562        // Skip common non-project directories
563        !Self::is_excluded_directory(path)
564    }
565
566    /// Check if a path is in the skip list
567    fn is_path_in_skip_list(&self, path: &Path) -> bool {
568        self.scan_options.skip.iter().any(|skip| {
569            path.components().any(|component| {
570                component
571                    .as_os_str()
572                    .to_str()
573                    .is_some_and(|name| name == skip.to_string_lossy())
574            })
575        })
576    }
577
578    /// Check if directory is hidden and should be skipped
579    fn is_hidden_directory_to_skip(path: &Path) -> bool {
580        path.file_name()
581            .and_then(|n| n.to_str())
582            .is_some_and(|name| name.starts_with('.') && name != ".cargo")
583    }
584
585    /// Check if directory is in the excluded list
586    fn is_excluded_directory(path: &Path) -> bool {
587        let excluded_dirs = [
588            "target",
589            "build",
590            "dist",
591            "out",
592            ".git",
593            ".svn",
594            ".hg",
595            "__pycache__",
596            "venv",
597            ".venv",
598            "env",
599            ".env",
600            "temp",
601            "tmp",
602            "vendor",
603            ".pytest_cache",
604            ".tox",
605            ".eggs",
606            ".coverage",
607            "node_modules",
608        ];
609
610        path.file_name()
611            .and_then(|n| n.to_str())
612            .is_some_and(|name| excluded_dirs.contains(&name))
613    }
614
615    /// Detect a Python project in the specified directory.
616    ///
617    /// This method checks for Python configuration files and associated cache directories.
618    /// It looks for multiple build artifacts that can be cleaned.
619    ///
620    /// # Arguments
621    ///
622    /// * `path` - Directory path to check for a Python project
623    /// * `errors` - Shared error collection for reporting parsing issues
624    ///
625    /// # Returns
626    ///
627    /// - `Some(Project)` if a valid Python project is detected
628    /// - `None` if the directory doesn't contain a Python project
629    ///
630    /// # Detection Criteria
631    ///
632    /// A Python project is identified by having:
633    /// 1. At least one of: requirements.txt, setup.py, pyproject.toml, setup.cfg, Pipfile
634    /// 2. At least one of the cache/build directories: `__pycache__`, `.pytest_cache`, venv, .venv, build, dist, .eggs
635    fn detect_python_project(
636        &self,
637        path: &Path,
638        errors: &Arc<Mutex<Vec<String>>>,
639    ) -> Option<Project> {
640        let config_files = [
641            "requirements.txt",
642            "setup.py",
643            "pyproject.toml",
644            "setup.cfg",
645            "Pipfile",
646            "pipenv.lock",
647            "poetry.lock",
648        ];
649
650        let build_dirs = [
651            "__pycache__",
652            ".pytest_cache",
653            "venv",
654            ".venv",
655            "build",
656            "dist",
657            ".eggs",
658            ".tox",
659            ".coverage",
660        ];
661
662        // Check if any config file exists
663        let has_config = config_files.iter().any(|&file| path.join(file).exists());
664
665        if !has_config {
666            return None;
667        }
668
669        // Find the largest cache/build directory that exists
670        let mut largest_build_dir = None;
671        let mut largest_size = 0;
672
673        for &dir_name in &build_dirs {
674            let dir_path = path.join(dir_name);
675
676            if dir_path.exists()
677                && dir_path.is_dir()
678                && let Ok(size) = Self::calculate_directory_size(&dir_path)
679                && size > largest_size
680            {
681                largest_size = size;
682                largest_build_dir = Some(dir_path);
683            }
684        }
685
686        if let Some(build_path) = largest_build_dir {
687            let name = self.extract_python_project_name(path, errors);
688
689            let build_arts = BuildArtifacts {
690                path: build_path,
691                size: 0, // Will be calculated later
692            };
693
694            return Some(Project::new(
695                ProjectType::Python,
696                path.to_path_buf(),
697                build_arts,
698                name,
699            ));
700        }
701
702        None
703    }
704
705    /// Detect a Go project in the specified directory.
706    ///
707    /// This method checks for the presence of both `go.mod` and `vendor/`
708    /// directory to identify a Go project. If found, it attempts to extract
709    /// the project name from the `go.mod` file.
710    ///
711    /// # Arguments
712    ///
713    /// * `path` - Directory path to check for a Go project
714    /// * `errors` - Shared error collection for reporting parsing issues
715    ///
716    /// # Returns
717    ///
718    /// - `Some(Project)` if a valid Go project is detected
719    /// - `None` if the directory doesn't contain a Go project
720    ///
721    /// # Detection Criteria
722    ///
723    /// 1. `go.mod` file exists in directory
724    /// 2. `vendor/` subdirectory exists in directory
725    /// 3. The project name is extracted from `go.mod` if possible
726    fn detect_go_project(&self, path: &Path, errors: &Arc<Mutex<Vec<String>>>) -> Option<Project> {
727        let go_mod = path.join("go.mod");
728        let vendor_dir = path.join("vendor");
729
730        if go_mod.exists() && vendor_dir.exists() {
731            let name = self.extract_go_project_name(&go_mod, errors);
732
733            let build_arts = BuildArtifacts {
734                path: path.join("vendor"),
735                size: 0, // Will be calculated later
736            };
737
738            return Some(Project::new(
739                ProjectType::Go,
740                path.to_path_buf(),
741                build_arts,
742                name,
743            ));
744        }
745
746        None
747    }
748
749    /// Extract the project name from a Python project directory.
750    ///
751    /// This method attempts to extract the project name from various Python
752    /// configuration files in order of preference.
753    ///
754    /// # Arguments
755    ///
756    /// * `path` - Path to the Python project directory
757    /// * `errors` - Shared error collection for reporting parsing issues
758    ///
759    /// # Returns
760    ///
761    /// - `Some(String)` containing the project name if successfully extracted
762    /// - `None` if the name cannot be found or parsed
763    ///
764    /// # Extraction Order
765    ///
766    /// 1. pyproject.toml (from [project] name or [tool.poetry] name)
767    /// 2. setup.py (from name= parameter)
768    /// 3. setup.cfg (from [metadata] name)
769    /// 4. Use directory name as a fallback
770    fn extract_python_project_name(
771        &self,
772        path: &Path,
773        errors: &Arc<Mutex<Vec<String>>>,
774    ) -> Option<String> {
775        // Try files in order of preference
776        self.try_extract_from_pyproject_toml(path, errors)
777            .or_else(|| self.try_extract_from_setup_py(path, errors))
778            .or_else(|| self.try_extract_from_setup_cfg(path, errors))
779            .or_else(|| Self::fallback_to_directory_name(path))
780    }
781
782    /// Try to extract project name from pyproject.toml
783    fn try_extract_from_pyproject_toml(
784        &self,
785        path: &Path,
786        errors: &Arc<Mutex<Vec<String>>>,
787    ) -> Option<String> {
788        let pyproject_toml = path.join("pyproject.toml");
789        if !pyproject_toml.exists() {
790            return None;
791        }
792
793        let content = self.read_file_content(&pyproject_toml, errors)?;
794        Self::extract_name_from_toml_like_content(&content)
795    }
796
797    /// Try to extract project name from setup.py
798    fn try_extract_from_setup_py(
799        &self,
800        path: &Path,
801        errors: &Arc<Mutex<Vec<String>>>,
802    ) -> Option<String> {
803        let setup_py = path.join("setup.py");
804        if !setup_py.exists() {
805            return None;
806        }
807
808        let content = self.read_file_content(&setup_py, errors)?;
809        Self::extract_name_from_python_content(&content)
810    }
811
812    /// Try to extract project name from setup.cfg
813    fn try_extract_from_setup_cfg(
814        &self,
815        path: &Path,
816        errors: &Arc<Mutex<Vec<String>>>,
817    ) -> Option<String> {
818        let setup_cfg = path.join("setup.cfg");
819        if !setup_cfg.exists() {
820            return None;
821        }
822
823        let content = self.read_file_content(&setup_cfg, errors)?;
824        Self::extract_name_from_cfg_content(&content)
825    }
826
827    /// Extract name from TOML-like content (pyproject.toml)
828    fn extract_name_from_toml_like_content(content: &str) -> Option<String> {
829        content
830            .lines()
831            .map(str::trim)
832            .find(|line| line.starts_with("name") && line.contains('='))
833            .and_then(Self::extract_quoted_value)
834    }
835
836    /// Extract name from Python content (setup.py)
837    fn extract_name_from_python_content(content: &str) -> Option<String> {
838        content
839            .lines()
840            .map(str::trim)
841            .find(|line| line.contains("name") && line.contains('='))
842            .and_then(Self::extract_quoted_value)
843    }
844
845    /// Extract name from INI-style configuration content (setup.cfg)
846    fn extract_name_from_cfg_content(content: &str) -> Option<String> {
847        let mut in_metadata_section = false;
848
849        for line in content.lines() {
850            let line = line.trim();
851
852            if line == "[metadata]" {
853                in_metadata_section = true;
854            } else if line.starts_with('[') && line.ends_with(']') {
855                in_metadata_section = false;
856            } else if in_metadata_section && line.starts_with("name") && line.contains('=') {
857                return line.split('=').nth(1).map(|name| name.trim().to_string());
858            }
859        }
860
861        None
862    }
863
864    /// Fallback to directory name
865    fn fallback_to_directory_name(path: &Path) -> Option<String> {
866        path.file_name()
867            .and_then(|name| name.to_str())
868            .map(std::string::ToString::to_string)
869    }
870
871    /// Extract the project name from a `go.mod` file.
872    ///
873    /// This method parses a Go project's `go.mod` file to extract
874    /// the module name, which typically represents the project.
875    ///
876    /// # Arguments
877    ///
878    /// * `go_mod` - Path to the `go.mod` file
879    /// * `errors` - Shared error collection for reporting parsing issues
880    ///
881    /// # Returns
882    ///
883    /// - `Some(String)` containing the module name if successfully extracted
884    /// - `None` if the name cannot be found or parsed
885    ///
886    /// # Parsing Strategy
887    ///
888    /// The method looks for the first line starting with `module ` and extracts
889    /// the module path. For better display, it takes the last component of the path.
890    fn extract_go_project_name(
891        &self,
892        go_mod: &Path,
893        errors: &Arc<Mutex<Vec<String>>>,
894    ) -> Option<String> {
895        let content = self.read_file_content(go_mod, errors)?;
896
897        for line in content.lines() {
898            let line = line.trim();
899            if line.starts_with("module ") {
900                let module_path = line.strip_prefix("module ")?.trim();
901
902                // Take the last component of the module path for a cleaner name
903                if let Some(name) = module_path.split('/').next_back() {
904                    return Some(name.to_string());
905                }
906
907                return Some(module_path.to_string());
908            }
909        }
910
911        None
912    }
913
914    /// Calculate the size of a directory recursively.
915    ///
916    /// This is a helper method used for Python projects to determine which
917    /// cache directory is the largest and should be the primary cleanup target.
918    fn calculate_directory_size(dir_path: &Path) -> std::io::Result<u64> {
919        let mut total_size = 0;
920
921        for entry in fs::read_dir(dir_path)? {
922            let entry = entry?;
923            let path = entry.path();
924            if path.is_dir() {
925                total_size += Self::calculate_directory_size(&path).unwrap_or(0);
926            } else {
927                total_size += entry.metadata()?.len();
928            }
929        }
930
931        Ok(total_size)
932    }
933}