clean_dev_dirs/scanner.rs
1//! Directory scanning and project detection functionality.
2//!
3//! This module provides the core scanning logic that traverses directory trees
4//! to find development projects and their build artifacts. It supports parallel
5//! processing for improved performance and handles various error conditions
6//! gracefully.
7
8use std::{
9 fs,
10 path::Path,
11 sync::{Arc, Mutex},
12};
13
14use colored::Colorize;
15use indicatif::{ProgressBar, ProgressStyle};
16use rayon::prelude::*;
17use serde_json::{Value, from_str};
18use walkdir::{DirEntry, WalkDir};
19
20use crate::{
21 cli::{ProjectFilter, ScanOptions},
22 project::{BuildArtifacts, Project, ProjectType},
23};
24
25/// Directory scanner for detecting development projects.
26///
27/// The `Scanner` struct encapsulates the logic for traversing directory trees
28/// and identifying development projects (Rust and Node.js) along with their
29/// build artifacts. It supports configurable filtering and parallel processing
30/// for efficient scanning of large directory structures.
31pub struct Scanner {
32 /// Configuration options for scanning behavior
33 scan_options: ScanOptions,
34
35 /// Filter to restrict scanning to specific project types
36 project_filter: ProjectFilter,
37}
38
39impl Scanner {
40 /// Create a new scanner with the specified options.
41 ///
42 /// # Arguments
43 ///
44 /// * `scan_options` - Configuration for scanning behavior (threads, verbosity, etc.)
45 /// * `project_filter` - Filter to restrict scanning to specific project types
46 ///
47 /// # Returns
48 ///
49 /// A new `Scanner` instance configured with the provided options.
50 ///
51 /// # Examples
52 ///
53 /// ```
54 /// # use crate::{Scanner, ScanOptions, ProjectFilter};
55 /// let scan_options = ScanOptions {
56 /// verbose: true,
57 /// threads: 4,
58 /// skip: vec![],
59 /// };
60 ///
61 /// let scanner = Scanner::new(scan_options, ProjectFilter::All);
62 /// ```
63 #[must_use]
64 pub fn new(scan_options: ScanOptions, project_filter: ProjectFilter) -> Self {
65 Self {
66 scan_options,
67 project_filter,
68 }
69 }
70
71 /// Scan a directory tree for development projects.
72 ///
73 /// This method performs a recursive scan of the specified directory to find
74 /// development projects. It operates in two phases:
75 /// 1. Directory traversal to identify potential projects
76 /// 2. Parallel size calculation for build directories
77 ///
78 /// # Arguments
79 ///
80 /// * `root` - The root directory to start scanning from
81 ///
82 /// # Returns
83 ///
84 /// A vector of `Project` instances representing all detected projects with
85 /// non-zero build directory sizes.
86 ///
87 /// # Panics
88 ///
89 /// This method may panic if the progress bar template string is invalid,
90 /// though this should not occur under normal circumstances as the template
91 /// is hardcoded and valid.
92 ///
93 /// # Examples
94 ///
95 /// ```
96 /// # use std::path::Path;
97 /// # use crate::Scanner;
98 /// let projects = scanner.scan_directory(Path::new("/path/to/projects"));
99 /// println!("Found {} projects", projects.len());
100 /// ```
101 ///
102 /// # Performance
103 ///
104 /// This method uses parallel processing for both directory traversal and
105 /// size calculation to maximize performance on systems with multiple cores
106 /// and fast storage.
107 pub fn scan_directory(&self, root: &Path) -> Vec<Project> {
108 let errors = Arc::new(Mutex::new(Vec::<String>::new()));
109
110 // Create a progress bar
111 let progress = ProgressBar::new_spinner();
112 progress.set_style(
113 ProgressStyle::default_spinner()
114 .template("{spinner:.green} {msg}")
115 .unwrap(),
116 );
117 progress.set_message("Scanning directories...");
118
119 // Find all potential project directories
120 let potential_projects: Vec<_> = WalkDir::new(root)
121 .into_iter()
122 .filter_map(Result::ok)
123 .filter(|entry| self.should_scan_entry(entry))
124 .collect::<Vec<_>>()
125 .into_par_iter()
126 .filter_map(|entry| self.detect_project(&entry, &errors))
127 .collect();
128
129 progress.finish_with_message("✅ Directory scan complete");
130
131 // Process projects in parallel to calculate sizes
132 let projects_with_sizes: Vec<_> = potential_projects
133 .into_par_iter()
134 .filter_map(|mut project| {
135 let size = self.calculate_build_dir_size(&project.build_arts.path);
136 project.build_arts.size = size;
137
138 if size > 0 { Some(project) } else { None }
139 })
140 .collect();
141
142 // Print errors if verbose
143 if self.scan_options.verbose {
144 let errors = errors.lock().unwrap();
145 for error in errors.iter() {
146 eprintln!("{}", error.red());
147 }
148 }
149
150 projects_with_sizes
151 }
152
153 /// Calculate the total size of a build directory.
154 ///
155 /// This method recursively traverses the specified directory and sums up
156 /// the sizes of all files contained within it. It handles errors gracefully
157 /// and optionally reports them in verbose mode.
158 ///
159 /// # Arguments
160 ///
161 /// * `path` - Path to the build directory to measure
162 ///
163 /// # Returns
164 ///
165 /// The total size of all files in the directory, in bytes. Returns 0 if
166 /// the directory doesn't exist or cannot be accessed.
167 ///
168 /// # Performance
169 ///
170 /// This method can be CPU and I/O intensive for large directories with
171 /// many files. It's designed to be called in parallel for multiple
172 /// directories to maximize throughput.
173 fn calculate_build_dir_size(&self, path: &Path) -> u64 {
174 if !path.exists() {
175 return 0;
176 }
177
178 let mut total_size = 0u64;
179
180 for entry in WalkDir::new(path) {
181 match entry {
182 Ok(entry) => {
183 if entry.file_type().is_file() {
184 if let Ok(metadata) = entry.metadata() {
185 total_size += metadata.len();
186 }
187 }
188 }
189 Err(e) => {
190 if self.scan_options.verbose {
191 eprintln!("Warning: {e}");
192 }
193 }
194 }
195 }
196
197 total_size
198 }
199
200 /// Detect a Node.js project in the specified directory.
201 ///
202 /// This method checks for the presence of both `package.json` and `node_modules/`
203 /// directory to identify a Node.js project. If found, it attempts to extract
204 /// the project name from the `package.json` file.
205 ///
206 /// # Arguments
207 ///
208 /// * `path` - Directory path to check for Node.js project
209 /// * `errors` - Shared error collection for reporting parsing issues
210 ///
211 /// # Returns
212 ///
213 /// - `Some(Project)` if a valid Node.js project is detected
214 /// - `None` if the directory doesn't contain a Node.js project
215 ///
216 /// # Detection Criteria
217 ///
218 /// 1. `package.json` file exists in directory
219 /// 2. `node_modules/` subdirectory exists in directory
220 /// 3. The project name is extracted from `package.json` if possible
221 fn detect_node_project(
222 &self,
223 path: &Path,
224 errors: &Arc<Mutex<Vec<String>>>,
225 ) -> Option<Project> {
226 let package_json = path.join("package.json");
227 let node_modules = path.join("node_modules");
228
229 if package_json.exists() && node_modules.exists() {
230 let name = self.extract_node_project_name(&package_json, errors);
231
232 let build_arts = BuildArtifacts {
233 path: path.join("node_modules"),
234 size: 0, // Will be calculated later
235 };
236
237 return Some(Project::new(
238 ProjectType::Node,
239 path.to_path_buf(),
240 build_arts,
241 name,
242 ));
243 }
244
245 None
246 }
247
248 /// Detect if a directory entry represents a development project.
249 ///
250 /// This method examines a directory entry and determines if it contains
251 /// a development project based on the presence of characteristic files
252 /// and directories. It respects the project filter settings.
253 ///
254 /// # Arguments
255 ///
256 /// * `entry` - The directory entry to examine
257 /// * `errors` - Shared error collection for reporting issues
258 ///
259 /// # Returns
260 ///
261 /// - `Some(Project)` if a valid project is detected
262 /// - `None` if no project is found or the entry doesn't match filters
263 ///
264 /// # Project Detection Logic
265 ///
266 /// - **Rust projects**: Presence of both `Cargo.toml` and `target/` directory
267 /// - **Node.js projects**: Presence of both `package.json` and `node_modules/` directory
268 /// - **Python projects**: Presence of configuration files and cache directories
269 /// - **Go projects**: Presence of both `go.mod` and `vendor/` directory
270 fn detect_project(
271 &self,
272 entry: &DirEntry,
273 errors: &Arc<Mutex<Vec<String>>>,
274 ) -> Option<Project> {
275 let path = entry.path();
276
277 if !entry.file_type().is_dir() {
278 return None;
279 }
280
281 // Check for a Rust project
282 if matches!(
283 self.project_filter,
284 ProjectFilter::All | ProjectFilter::RustOnly
285 ) {
286 if let Some(project) = self.detect_rust_project(path, errors) {
287 return Some(project);
288 }
289 }
290
291 // Check for a Node.js project
292 if matches!(
293 self.project_filter,
294 ProjectFilter::All | ProjectFilter::NodeOnly
295 ) {
296 if let Some(project) = self.detect_node_project(path, errors) {
297 return Some(project);
298 }
299 }
300
301 // Check for a Python project
302 if matches!(
303 self.project_filter,
304 ProjectFilter::All | ProjectFilter::PythonOnly
305 ) {
306 if let Some(project) = self.detect_python_project(path, errors) {
307 return Some(project);
308 }
309 }
310
311 // Check for a Go project
312 if matches!(
313 self.project_filter,
314 ProjectFilter::All | ProjectFilter::GoOnly
315 ) {
316 if let Some(project) = self.detect_go_project(path, errors) {
317 return Some(project);
318 }
319 }
320
321 None
322 }
323
324 /// Detect a Rust project in the specified directory.
325 ///
326 /// This method checks for the presence of both `Cargo.toml` and `target/`
327 /// directory to identify a Rust project. If found, it attempts to extract
328 /// the project name from the `Cargo.toml` file.
329 ///
330 /// # Arguments
331 ///
332 /// * `path` - Directory path to check for a Rust project
333 /// * `errors` - Shared error collection for reporting parsing issues
334 ///
335 /// # Returns
336 ///
337 /// - `Some(Project)` if a valid Rust project is detected
338 /// - `None` if the directory doesn't contain a Rust project
339 ///
340 /// # Detection Criteria
341 ///
342 /// 1. `Cargo.toml` file exists in directory
343 /// 2. `target/` subdirectory exists in directory
344 /// 3. The project name is extracted from `Cargo.toml` if possible
345 fn detect_rust_project(
346 &self,
347 path: &Path,
348 errors: &Arc<Mutex<Vec<String>>>,
349 ) -> Option<Project> {
350 let cargo_toml = path.join("Cargo.toml");
351 let target_dir = path.join("target");
352
353 if cargo_toml.exists() && target_dir.exists() {
354 let name = self.extract_rust_project_name(&cargo_toml, errors);
355
356 let build_arts = BuildArtifacts {
357 path: path.join("target"),
358 size: 0, // Will be calculated later
359 };
360
361 return Some(Project::new(
362 ProjectType::Rust,
363 path.to_path_buf(),
364 build_arts,
365 name,
366 ));
367 }
368
369 None
370 }
371
372 /// Extract the project name from a Cargo.toml file.
373 ///
374 /// This method performs simple TOML parsing to extract the project name
375 /// from a Rust project's `Cargo.toml` file. It uses a line-by-line approach
376 /// rather than a full TOML parser for simplicity and performance.
377 ///
378 /// # Arguments
379 ///
380 /// * `cargo_toml` - Path to the Cargo.toml file
381 /// * `errors` - Shared error collection for reporting parsing issues
382 ///
383 /// # Returns
384 ///
385 /// - `Some(String)` containing the project name if successfully extracted
386 /// - `None` if the name cannot be found or parsed
387 ///
388 /// # Parsing Strategy
389 ///
390 /// The method looks for lines matching the pattern `name = "project_name"`
391 /// and extracts the quoted string value. This trivial approach handles
392 /// most common cases without requiring a full TOML parser.
393 fn extract_rust_project_name(
394 &self,
395 cargo_toml: &Path,
396 errors: &Arc<Mutex<Vec<String>>>,
397 ) -> Option<String> {
398 let content = self.read_file_content(cargo_toml, errors)?;
399 Self::parse_toml_name_field(&content)
400 }
401
402 /// Extract a quoted string value from a line.
403 fn extract_quoted_value(line: &str) -> Option<String> {
404 let start = line.find('"')?;
405 let end = line.rfind('"')?;
406
407 if start == end {
408 return None;
409 }
410
411 Some(line[start + 1..end].to_string())
412 }
413
414 /// Extract the name from a single TOML line if it contains a name field.
415 fn extract_name_from_line(line: &str) -> Option<String> {
416 if !Self::is_name_line(line) {
417 return None;
418 }
419
420 Self::extract_quoted_value(line)
421 }
422
423 /// Extract the project name from a package.json file.
424 ///
425 /// This method parses a Node.js project's `package.json` file to extract
426 /// the project name. It uses full JSON parsing to handle the file format
427 /// correctly and safely.
428 ///
429 /// # Arguments
430 ///
431 /// * `package_json` - Path to the package.json file
432 /// * `errors` - Shared error collection for reporting parsing issues
433 ///
434 /// # Returns
435 ///
436 /// - `Some(String)` containing the project name if successfully extracted
437 /// - `None` if the name cannot be found, parsed, or the file is invalid
438 ///
439 /// # Error Handling
440 ///
441 /// This method handles both file I/O errors and JSON parsing errors gracefully.
442 /// Errors are optionally reported to the shared error collection in verbose mode.
443 fn extract_node_project_name(
444 &self,
445 package_json: &Path,
446 errors: &Arc<Mutex<Vec<String>>>,
447 ) -> Option<String> {
448 match fs::read_to_string(package_json) {
449 Ok(content) => match from_str::<Value>(&content) {
450 Ok(json) => json
451 .get("name")
452 .and_then(|v| v.as_str())
453 .map(std::string::ToString::to_string),
454 Err(e) => {
455 if self.scan_options.verbose {
456 let mut errors = errors.lock().unwrap();
457 errors.push(format!("Error parsing {}: {e}", package_json.display()));
458 }
459 None
460 }
461 },
462 Err(e) => {
463 if self.scan_options.verbose {
464 let mut errors = errors.lock().unwrap();
465 errors.push(format!("Error reading {}: {e}", package_json.display()));
466 }
467 None
468 }
469 }
470 }
471
472 /// Check if a line contains a name field assignment.
473 fn is_name_line(line: &str) -> bool {
474 line.starts_with("name") && line.contains('=')
475 }
476
477 /// Log a file reading error if verbose mode is enabled.
478 fn log_file_error(
479 &self,
480 file_path: &Path,
481 error: &std::io::Error,
482 errors: &Arc<Mutex<Vec<String>>>,
483 ) {
484 if self.scan_options.verbose {
485 let mut errors = errors.lock().unwrap();
486 errors.push(format!("Error reading {}: {error}", file_path.display()));
487 }
488 }
489
490 /// Parse the name field from TOML content.
491 fn parse_toml_name_field(content: &str) -> Option<String> {
492 for line in content.lines() {
493 if let Some(name) = Self::extract_name_from_line(line.trim()) {
494 return Some(name);
495 }
496 }
497 None
498 }
499
500 /// Read the content of a file and handle errors appropriately.
501 fn read_file_content(
502 &self,
503 file_path: &Path,
504 errors: &Arc<Mutex<Vec<String>>>,
505 ) -> Option<String> {
506 match fs::read_to_string(file_path) {
507 Ok(content) => Some(content),
508 Err(e) => {
509 self.log_file_error(file_path, &e, errors);
510 None
511 }
512 }
513 }
514
515 /// Determine if a directory entry should be scanned for projects.
516 ///
517 /// This method implements the filtering logic to decide whether a directory
518 /// should be traversed during the scanning process. It applies various
519 /// exclusion rules to improve performance and avoid scanning irrelevant
520 /// directories.
521 ///
522 /// # Arguments
523 ///
524 /// * `entry` - The directory entry to evaluate
525 ///
526 /// # Returns
527 ///
528 /// - `true` if the directory should be scanned
529 /// - `false` if the directory should be skipped
530 ///
531 /// # Exclusion Rules
532 ///
533 /// The following directories are excluded from scanning:
534 /// - Directories in the user-specified skip list
535 /// - Any directory inside a `node_modules/` directory (to avoid deep nesting)
536 /// - Hidden directories (starting with `.`) except `.cargo`
537 /// - Common build/temporary directories: `target`, `build`, `dist`, `out`, etc.
538 /// - Version control directories: `.git`, `.svn`, `.hg`
539 /// - Python cache and virtual environment directories
540 /// - Temporary directories: `temp`, `tmp`
541 /// - Go vendor directory
542 /// - Python pytest cache
543 /// - Python tox environments
544 /// - Python setuptools
545 /// - Python coverage files
546 /// - Node.js modules (already handled above but added for completeness)
547 fn should_scan_entry(&self, entry: &DirEntry) -> bool {
548 let path = entry.path();
549
550 // Early return if path is in skip list
551 if self.is_path_in_skip_list(path) {
552 return false;
553 }
554
555 // Skip any directory inside a node_modules directory
556 if path
557 .ancestors()
558 .any(|ancestor| ancestor.file_name().and_then(|n| n.to_str()) == Some("node_modules"))
559 {
560 return false;
561 }
562
563 // Skip hidden directories (except .cargo for Rust)
564 if Self::is_hidden_directory_to_skip(path) {
565 return false;
566 }
567
568 // Skip common non-project directories
569 !Self::is_excluded_directory(path)
570 }
571
572 /// Check if a path is in the skip list
573 fn is_path_in_skip_list(&self, path: &Path) -> bool {
574 self.scan_options.skip.iter().any(|skip| {
575 path.components().any(|component| {
576 component
577 .as_os_str()
578 .to_str()
579 .is_some_and(|name| name == skip.to_string_lossy())
580 })
581 })
582 }
583
584 /// Check if directory is hidden and should be skipped
585 fn is_hidden_directory_to_skip(path: &Path) -> bool {
586 path.file_name()
587 .and_then(|n| n.to_str())
588 .is_some_and(|name| name.starts_with('.') && name != ".cargo")
589 }
590
591 /// Check if directory is in the excluded list
592 fn is_excluded_directory(path: &Path) -> bool {
593 let excluded_dirs = [
594 "target",
595 "build",
596 "dist",
597 "out",
598 ".git",
599 ".svn",
600 ".hg",
601 "__pycache__",
602 "venv",
603 ".venv",
604 "env",
605 ".env",
606 "temp",
607 "tmp",
608 "vendor",
609 ".pytest_cache",
610 ".tox",
611 ".eggs",
612 ".coverage",
613 "node_modules",
614 ];
615
616 path.file_name()
617 .and_then(|n| n.to_str())
618 .is_some_and(|name| excluded_dirs.contains(&name))
619 }
620
621 /// Detect a Python project in the specified directory.
622 ///
623 /// This method checks for Python configuration files and associated cache directories.
624 /// It looks for multiple build artifacts that can be cleaned.
625 ///
626 /// # Arguments
627 ///
628 /// * `path` - Directory path to check for a Python project
629 /// * `errors` - Shared error collection for reporting parsing issues
630 ///
631 /// # Returns
632 ///
633 /// - `Some(Project)` if a valid Python project is detected
634 /// - `None` if the directory doesn't contain a Python project
635 ///
636 /// # Detection Criteria
637 ///
638 /// A Python project is identified by having:
639 /// 1. At least one of: requirements.txt, setup.py, pyproject.toml, setup.cfg, Pipfile
640 /// 2. At least one of the cache/build directories: `__pycache__`, `.pytest_cache`, venv, .venv, build, dist, .eggs
641 fn detect_python_project(
642 &self,
643 path: &Path,
644 errors: &Arc<Mutex<Vec<String>>>,
645 ) -> Option<Project> {
646 let config_files = [
647 "requirements.txt",
648 "setup.py",
649 "pyproject.toml",
650 "setup.cfg",
651 "Pipfile",
652 "pipenv.lock",
653 "poetry.lock",
654 ];
655
656 let build_dirs = [
657 "__pycache__",
658 ".pytest_cache",
659 "venv",
660 ".venv",
661 "build",
662 "dist",
663 ".eggs",
664 ".tox",
665 ".coverage",
666 ];
667
668 // Check if any config file exists
669 let has_config = config_files.iter().any(|&file| path.join(file).exists());
670
671 if !has_config {
672 return None;
673 }
674
675 // Find the largest cache/build directory that exists
676 let mut largest_build_dir = None;
677 let mut largest_size = 0;
678
679 for &dir_name in &build_dirs {
680 let dir_path = path.join(dir_name);
681
682 if dir_path.exists() && dir_path.is_dir() {
683 if let Ok(size) = Self::calculate_directory_size(&dir_path) {
684 if size > largest_size {
685 largest_size = size;
686 largest_build_dir = Some(dir_path);
687 }
688 }
689 }
690 }
691
692 if let Some(build_path) = largest_build_dir {
693 let name = self.extract_python_project_name(path, errors);
694
695 let build_arts = BuildArtifacts {
696 path: build_path,
697 size: 0, // Will be calculated later
698 };
699
700 return Some(Project::new(
701 ProjectType::Python,
702 path.to_path_buf(),
703 build_arts,
704 name,
705 ));
706 }
707
708 None
709 }
710
711 /// Detect a Go project in the specified directory.
712 ///
713 /// This method checks for the presence of both `go.mod` and `vendor/`
714 /// directory to identify a Go project. If found, it attempts to extract
715 /// the project name from the `go.mod` file.
716 ///
717 /// # Arguments
718 ///
719 /// * `path` - Directory path to check for a Go project
720 /// * `errors` - Shared error collection for reporting parsing issues
721 ///
722 /// # Returns
723 ///
724 /// - `Some(Project)` if a valid Go project is detected
725 /// - `None` if the directory doesn't contain a Go project
726 ///
727 /// # Detection Criteria
728 ///
729 /// 1. `go.mod` file exists in directory
730 /// 2. `vendor/` subdirectory exists in directory
731 /// 3. The project name is extracted from `go.mod` if possible
732 fn detect_go_project(&self, path: &Path, errors: &Arc<Mutex<Vec<String>>>) -> Option<Project> {
733 let go_mod = path.join("go.mod");
734 let vendor_dir = path.join("vendor");
735
736 if go_mod.exists() && vendor_dir.exists() {
737 let name = self.extract_go_project_name(&go_mod, errors);
738
739 let build_arts = BuildArtifacts {
740 path: path.join("vendor"),
741 size: 0, // Will be calculated later
742 };
743
744 return Some(Project::new(
745 ProjectType::Go,
746 path.to_path_buf(),
747 build_arts,
748 name,
749 ));
750 }
751
752 None
753 }
754
755 /// Extract the project name from a Python project directory.
756 ///
757 /// This method attempts to extract the project name from various Python
758 /// configuration files in order of preference.
759 ///
760 /// # Arguments
761 ///
762 /// * `path` - Path to the Python project directory
763 /// * `errors` - Shared error collection for reporting parsing issues
764 ///
765 /// # Returns
766 ///
767 /// - `Some(String)` containing the project name if successfully extracted
768 /// - `None` if the name cannot be found or parsed
769 ///
770 /// # Extraction Order
771 ///
772 /// 1. pyproject.toml (from [project] name or [tool.poetry] name)
773 /// 2. setup.py (from name= parameter)
774 /// 3. setup.cfg (from [metadata] name)
775 /// 4. Use directory name as a fallback
776 fn extract_python_project_name(
777 &self,
778 path: &Path,
779 errors: &Arc<Mutex<Vec<String>>>,
780 ) -> Option<String> {
781 // Try files in order of preference
782 self.try_extract_from_pyproject_toml(path, errors)
783 .or_else(|| self.try_extract_from_setup_py(path, errors))
784 .or_else(|| self.try_extract_from_setup_cfg(path, errors))
785 .or_else(|| Self::fallback_to_directory_name(path))
786 }
787
788 /// Try to extract project name from pyproject.toml
789 fn try_extract_from_pyproject_toml(
790 &self,
791 path: &Path,
792 errors: &Arc<Mutex<Vec<String>>>,
793 ) -> Option<String> {
794 let pyproject_toml = path.join("pyproject.toml");
795 if !pyproject_toml.exists() {
796 return None;
797 }
798
799 let content = self.read_file_content(&pyproject_toml, errors)?;
800 Self::extract_name_from_toml_like_content(&content)
801 }
802
803 /// Try to extract project name from setup.py
804 fn try_extract_from_setup_py(
805 &self,
806 path: &Path,
807 errors: &Arc<Mutex<Vec<String>>>,
808 ) -> Option<String> {
809 let setup_py = path.join("setup.py");
810 if !setup_py.exists() {
811 return None;
812 }
813
814 let content = self.read_file_content(&setup_py, errors)?;
815 Self::extract_name_from_python_content(&content)
816 }
817
818 /// Try to extract project name from setup.cfg
819 fn try_extract_from_setup_cfg(
820 &self,
821 path: &Path,
822 errors: &Arc<Mutex<Vec<String>>>,
823 ) -> Option<String> {
824 let setup_cfg = path.join("setup.cfg");
825 if !setup_cfg.exists() {
826 return None;
827 }
828
829 let content = self.read_file_content(&setup_cfg, errors)?;
830 Self::extract_name_from_cfg_content(&content)
831 }
832
833 /// Extract name from TOML-like content (pyproject.toml)
834 fn extract_name_from_toml_like_content(content: &str) -> Option<String> {
835 content
836 .lines()
837 .map(str::trim)
838 .find(|line| line.starts_with("name") && line.contains('='))
839 .and_then(Self::extract_quoted_value)
840 }
841
842 /// Extract name from Python content (setup.py)
843 fn extract_name_from_python_content(content: &str) -> Option<String> {
844 content
845 .lines()
846 .map(str::trim)
847 .find(|line| line.contains("name") && line.contains('='))
848 .and_then(Self::extract_quoted_value)
849 }
850
851 /// Extract name from INI-style configuration content (setup.cfg)
852 fn extract_name_from_cfg_content(content: &str) -> Option<String> {
853 let mut in_metadata_section = false;
854
855 for line in content.lines() {
856 let line = line.trim();
857
858 if line == "[metadata]" {
859 in_metadata_section = true;
860 } else if line.starts_with('[') && line.ends_with(']') {
861 in_metadata_section = false;
862 } else if in_metadata_section && line.starts_with("name") && line.contains('=') {
863 return line.split('=').nth(1).map(|name| name.trim().to_string());
864 }
865 }
866
867 None
868 }
869
870 /// Fallback to directory name
871 fn fallback_to_directory_name(path: &Path) -> Option<String> {
872 path.file_name()
873 .and_then(|name| name.to_str())
874 .map(std::string::ToString::to_string)
875 }
876
877 /// Extract the project name from a `go.mod` file.
878 ///
879 /// This method parses a Go project's `go.mod` file to extract
880 /// the module name, which typically represents the project.
881 ///
882 /// # Arguments
883 ///
884 /// * `go_mod` - Path to the `go.mod` file
885 /// * `errors` - Shared error collection for reporting parsing issues
886 ///
887 /// # Returns
888 ///
889 /// - `Some(String)` containing the module name if successfully extracted
890 /// - `None` if the name cannot be found or parsed
891 ///
892 /// # Parsing Strategy
893 ///
894 /// The method looks for the first line starting with `module ` and extracts
895 /// the module path. For better display, it takes the last component of the path.
896 fn extract_go_project_name(
897 &self,
898 go_mod: &Path,
899 errors: &Arc<Mutex<Vec<String>>>,
900 ) -> Option<String> {
901 let content = self.read_file_content(go_mod, errors)?;
902
903 for line in content.lines() {
904 let line = line.trim();
905 if line.starts_with("module ") {
906 let module_path = line.strip_prefix("module ")?.trim();
907
908 // Take the last component of the module path for a cleaner name
909 if let Some(name) = module_path.split('/').next_back() {
910 return Some(name.to_string());
911 }
912
913 return Some(module_path.to_string());
914 }
915 }
916
917 None
918 }
919
920 /// Calculate the size of a directory recursively.
921 ///
922 /// This is a helper method used for Python projects to determine which
923 /// cache directory is the largest and should be the primary cleanup target.
924 fn calculate_directory_size(dir_path: &Path) -> std::io::Result<u64> {
925 let mut total_size = 0;
926
927 for entry in fs::read_dir(dir_path)? {
928 let entry = entry?;
929 let path = entry.path();
930 if path.is_dir() {
931 total_size += Self::calculate_directory_size(&path).unwrap_or(0);
932 } else {
933 total_size += entry.metadata()?.len();
934 }
935 }
936
937 Ok(total_size)
938 }
939}