clean_dev_dirs/scanner.rs
1//! Directory scanning and project detection functionality.
2//!
3//! This module provides the core scanning logic that traverses directory trees
4//! to find development projects and their build artifacts. It supports parallel
5//! processing for improved performance and handles various error conditions
6//! gracefully.
7
8use std::{
9 fs,
10 path::Path,
11 sync::{Arc, Mutex},
12};
13
14use colored::Colorize;
15use indicatif::{ProgressBar, ProgressStyle};
16use rayon::prelude::*;
17use serde_json::{Value, from_str};
18use walkdir::{DirEntry, WalkDir};
19
20use crate::{
21 config::{ProjectFilter, ScanOptions},
22 project::{BuildArtifacts, Project, ProjectType},
23};
24
25/// Directory scanner for detecting development projects.
26///
27/// The `Scanner` struct encapsulates the logic for traversing directory trees
28/// and identifying development projects (Rust and Node.js) along with their
29/// build artifacts. It supports configurable filtering and parallel processing
30/// for efficient scanning of large directory structures.
31pub struct Scanner {
32 /// Configuration options for scanning behavior
33 scan_options: ScanOptions,
34
35 /// Filter to restrict scanning to specific project types
36 project_filter: ProjectFilter,
37}
38
39impl Scanner {
40 /// Create a new scanner with the specified options.
41 ///
42 /// # Arguments
43 ///
44 /// * `scan_options` - Configuration for scanning behavior (threads, verbosity, etc.)
45 /// * `project_filter` - Filter to restrict scanning to specific project types
46 ///
47 /// # Returns
48 ///
49 /// A new `Scanner` instance configured with the provided options.
50 ///
51 /// # Examples
52 ///
53 /// ```
54 /// # use crate::{Scanner, ScanOptions, ProjectFilter};
55 /// let scan_options = ScanOptions {
56 /// verbose: true,
57 /// threads: 4,
58 /// skip: vec![],
59 /// };
60 ///
61 /// let scanner = Scanner::new(scan_options, ProjectFilter::All);
62 /// ```
63 #[must_use]
64 pub fn new(scan_options: ScanOptions, project_filter: ProjectFilter) -> Self {
65 Self {
66 scan_options,
67 project_filter,
68 }
69 }
70
71 /// Scan a directory tree for development projects.
72 ///
73 /// This method performs a recursive scan of the specified directory to find
74 /// development projects. It operates in two phases:
75 /// 1. Directory traversal to identify potential projects
76 /// 2. Parallel size calculation for build directories
77 ///
78 /// # Arguments
79 ///
80 /// * `root` - The root directory to start scanning from
81 ///
82 /// # Returns
83 ///
84 /// A vector of `Project` instances representing all detected projects with
85 /// non-zero build directory sizes.
86 ///
87 /// # Panics
88 ///
89 /// This method may panic if the progress bar template string is invalid,
90 /// though this should not occur under normal circumstances as the template
91 /// is hardcoded and valid.
92 ///
93 /// # Examples
94 ///
95 /// ```
96 /// # use std::path::Path;
97 /// # use crate::Scanner;
98 /// let projects = scanner.scan_directory(Path::new("/path/to/projects"));
99 /// println!("Found {} projects", projects.len());
100 /// ```
101 ///
102 /// # Performance
103 ///
104 /// This method uses parallel processing for both directory traversal and
105 /// size calculation to maximize performance on systems with multiple cores
106 /// and fast storage.
107 pub fn scan_directory(&self, root: &Path) -> Vec<Project> {
108 let errors = Arc::new(Mutex::new(Vec::<String>::new()));
109
110 // Create a progress bar
111 let progress = ProgressBar::new_spinner();
112 progress.set_style(
113 ProgressStyle::default_spinner()
114 .template("{spinner:.green} {msg}")
115 .unwrap(),
116 );
117 progress.set_message("Scanning directories...");
118
119 // Find all potential project directories
120 let potential_projects: Vec<_> = WalkDir::new(root)
121 .into_iter()
122 .filter_map(Result::ok)
123 .filter(|entry| self.should_scan_entry(entry))
124 .collect::<Vec<_>>()
125 .into_par_iter()
126 .filter_map(|entry| self.detect_project(&entry, &errors))
127 .collect();
128
129 progress.finish_with_message("✅ Directory scan complete");
130
131 // Process projects in parallel to calculate sizes
132 let projects_with_sizes: Vec<_> = potential_projects
133 .into_par_iter()
134 .filter_map(|mut project| {
135 let size = self.calculate_build_dir_size(&project.build_arts.path);
136 project.build_arts.size = size;
137
138 if size > 0 { Some(project) } else { None }
139 })
140 .collect();
141
142 // Print errors if verbose
143 if self.scan_options.verbose {
144 let errors = errors.lock().unwrap();
145 for error in errors.iter() {
146 eprintln!("{}", error.red());
147 }
148 }
149
150 projects_with_sizes
151 }
152
153 /// Calculate the total size of a build directory.
154 ///
155 /// This method recursively traverses the specified directory and sums up
156 /// the sizes of all files contained within it. It handles errors gracefully
157 /// and optionally reports them in verbose mode.
158 ///
159 /// # Arguments
160 ///
161 /// * `path` - Path to the build directory to measure
162 ///
163 /// # Returns
164 ///
165 /// The total size of all files in the directory, in bytes. Returns 0 if
166 /// the directory doesn't exist or cannot be accessed.
167 ///
168 /// # Performance
169 ///
170 /// This method can be CPU and I/O intensive for large directories with
171 /// many files. It's designed to be called in parallel for multiple
172 /// directories to maximize throughput.
173 fn calculate_build_dir_size(&self, path: &Path) -> u64 {
174 if !path.exists() {
175 return 0;
176 }
177
178 let mut total_size = 0u64;
179
180 for entry in WalkDir::new(path) {
181 match entry {
182 Ok(entry) => {
183 if entry.file_type().is_file()
184 && let Ok(metadata) = entry.metadata()
185 {
186 total_size += metadata.len();
187 }
188 }
189 Err(e) => {
190 if self.scan_options.verbose {
191 eprintln!("Warning: {e}");
192 }
193 }
194 }
195 }
196
197 total_size
198 }
199
200 /// Detect a Node.js project in the specified directory.
201 ///
202 /// This method checks for the presence of both `package.json` and `node_modules/`
203 /// directory to identify a Node.js project. If found, it attempts to extract
204 /// the project name from the `package.json` file.
205 ///
206 /// # Arguments
207 ///
208 /// * `path` - Directory path to check for Node.js project
209 /// * `errors` - Shared error collection for reporting parsing issues
210 ///
211 /// # Returns
212 ///
213 /// - `Some(Project)` if a valid Node.js project is detected
214 /// - `None` if the directory doesn't contain a Node.js project
215 ///
216 /// # Detection Criteria
217 ///
218 /// 1. `package.json` file exists in directory
219 /// 2. `node_modules/` subdirectory exists in directory
220 /// 3. The project name is extracted from `package.json` if possible
221 fn detect_node_project(
222 &self,
223 path: &Path,
224 errors: &Arc<Mutex<Vec<String>>>,
225 ) -> Option<Project> {
226 let package_json = path.join("package.json");
227 let node_modules = path.join("node_modules");
228
229 if package_json.exists() && node_modules.exists() {
230 let name = self.extract_node_project_name(&package_json, errors);
231
232 let build_arts = BuildArtifacts {
233 path: path.join("node_modules"),
234 size: 0, // Will be calculated later
235 };
236
237 return Some(Project::new(
238 ProjectType::Node,
239 path.to_path_buf(),
240 build_arts,
241 name,
242 ));
243 }
244
245 None
246 }
247
248 /// Detect if a directory entry represents a development project.
249 ///
250 /// This method examines a directory entry and determines if it contains
251 /// a development project based on the presence of characteristic files
252 /// and directories. It respects the project filter settings.
253 ///
254 /// # Arguments
255 ///
256 /// * `entry` - The directory entry to examine
257 /// * `errors` - Shared error collection for reporting issues
258 ///
259 /// # Returns
260 ///
261 /// - `Some(Project)` if a valid project is detected
262 /// - `None` if no project is found or the entry doesn't match filters
263 ///
264 /// # Project Detection Logic
265 ///
266 /// - **Rust projects**: Presence of both `Cargo.toml` and `target/` directory
267 /// - **Node.js projects**: Presence of both `package.json` and `node_modules/` directory
268 /// - **Python projects**: Presence of configuration files and cache directories
269 /// - **Go projects**: Presence of both `go.mod` and `vendor/` directory
270 fn detect_project(
271 &self,
272 entry: &DirEntry,
273 errors: &Arc<Mutex<Vec<String>>>,
274 ) -> Option<Project> {
275 let path = entry.path();
276
277 if !entry.file_type().is_dir() {
278 return None;
279 }
280
281 // Check for a Rust project
282 if matches!(
283 self.project_filter,
284 ProjectFilter::All | ProjectFilter::Rust
285 ) && let Some(project) = self.detect_rust_project(path, errors)
286 {
287 return Some(project);
288 }
289
290 // Check for a Node.js project
291 if matches!(
292 self.project_filter,
293 ProjectFilter::All | ProjectFilter::Node
294 ) && let Some(project) = self.detect_node_project(path, errors)
295 {
296 return Some(project);
297 }
298
299 // Check for a Python project
300 if matches!(
301 self.project_filter,
302 ProjectFilter::All | ProjectFilter::Python
303 ) && let Some(project) = self.detect_python_project(path, errors)
304 {
305 return Some(project);
306 }
307
308 // Check for a Go project
309 if matches!(self.project_filter, ProjectFilter::All | ProjectFilter::Go)
310 && let Some(project) = self.detect_go_project(path, errors)
311 {
312 return Some(project);
313 }
314
315 None
316 }
317
318 /// Detect a Rust project in the specified directory.
319 ///
320 /// This method checks for the presence of both `Cargo.toml` and `target/`
321 /// directory to identify a Rust project. If found, it attempts to extract
322 /// the project name from the `Cargo.toml` file.
323 ///
324 /// # Arguments
325 ///
326 /// * `path` - Directory path to check for a Rust project
327 /// * `errors` - Shared error collection for reporting parsing issues
328 ///
329 /// # Returns
330 ///
331 /// - `Some(Project)` if a valid Rust project is detected
332 /// - `None` if the directory doesn't contain a Rust project
333 ///
334 /// # Detection Criteria
335 ///
336 /// 1. `Cargo.toml` file exists in directory
337 /// 2. `target/` subdirectory exists in directory
338 /// 3. The project name is extracted from `Cargo.toml` if possible
339 fn detect_rust_project(
340 &self,
341 path: &Path,
342 errors: &Arc<Mutex<Vec<String>>>,
343 ) -> Option<Project> {
344 let cargo_toml = path.join("Cargo.toml");
345 let target_dir = path.join("target");
346
347 if cargo_toml.exists() && target_dir.exists() {
348 let name = self.extract_rust_project_name(&cargo_toml, errors);
349
350 let build_arts = BuildArtifacts {
351 path: path.join("target"),
352 size: 0, // Will be calculated later
353 };
354
355 return Some(Project::new(
356 ProjectType::Rust,
357 path.to_path_buf(),
358 build_arts,
359 name,
360 ));
361 }
362
363 None
364 }
365
366 /// Extract the project name from a Cargo.toml file.
367 ///
368 /// This method performs simple TOML parsing to extract the project name
369 /// from a Rust project's `Cargo.toml` file. It uses a line-by-line approach
370 /// rather than a full TOML parser for simplicity and performance.
371 ///
372 /// # Arguments
373 ///
374 /// * `cargo_toml` - Path to the Cargo.toml file
375 /// * `errors` - Shared error collection for reporting parsing issues
376 ///
377 /// # Returns
378 ///
379 /// - `Some(String)` containing the project name if successfully extracted
380 /// - `None` if the name cannot be found or parsed
381 ///
382 /// # Parsing Strategy
383 ///
384 /// The method looks for lines matching the pattern `name = "project_name"`
385 /// and extracts the quoted string value. This trivial approach handles
386 /// most common cases without requiring a full TOML parser.
387 fn extract_rust_project_name(
388 &self,
389 cargo_toml: &Path,
390 errors: &Arc<Mutex<Vec<String>>>,
391 ) -> Option<String> {
392 let content = self.read_file_content(cargo_toml, errors)?;
393 Self::parse_toml_name_field(&content)
394 }
395
396 /// Extract a quoted string value from a line.
397 fn extract_quoted_value(line: &str) -> Option<String> {
398 let start = line.find('"')?;
399 let end = line.rfind('"')?;
400
401 if start == end {
402 return None;
403 }
404
405 Some(line[start + 1..end].to_string())
406 }
407
408 /// Extract the name from a single TOML line if it contains a name field.
409 fn extract_name_from_line(line: &str) -> Option<String> {
410 if !Self::is_name_line(line) {
411 return None;
412 }
413
414 Self::extract_quoted_value(line)
415 }
416
417 /// Extract the project name from a package.json file.
418 ///
419 /// This method parses a Node.js project's `package.json` file to extract
420 /// the project name. It uses full JSON parsing to handle the file format
421 /// correctly and safely.
422 ///
423 /// # Arguments
424 ///
425 /// * `package_json` - Path to the package.json file
426 /// * `errors` - Shared error collection for reporting parsing issues
427 ///
428 /// # Returns
429 ///
430 /// - `Some(String)` containing the project name if successfully extracted
431 /// - `None` if the name cannot be found, parsed, or the file is invalid
432 ///
433 /// # Error Handling
434 ///
435 /// This method handles both file I/O errors and JSON parsing errors gracefully.
436 /// Errors are optionally reported to the shared error collection in verbose mode.
437 fn extract_node_project_name(
438 &self,
439 package_json: &Path,
440 errors: &Arc<Mutex<Vec<String>>>,
441 ) -> Option<String> {
442 match fs::read_to_string(package_json) {
443 Ok(content) => match from_str::<Value>(&content) {
444 Ok(json) => json
445 .get("name")
446 .and_then(|v| v.as_str())
447 .map(std::string::ToString::to_string),
448 Err(e) => {
449 if self.scan_options.verbose {
450 let mut errors = errors.lock().unwrap();
451 errors.push(format!("Error parsing {}: {e}", package_json.display()));
452 }
453 None
454 }
455 },
456 Err(e) => {
457 if self.scan_options.verbose {
458 let mut errors = errors.lock().unwrap();
459 errors.push(format!("Error reading {}: {e}", package_json.display()));
460 }
461 None
462 }
463 }
464 }
465
466 /// Check if a line contains a name field assignment.
467 fn is_name_line(line: &str) -> bool {
468 line.starts_with("name") && line.contains('=')
469 }
470
471 /// Log a file reading error if verbose mode is enabled.
472 fn log_file_error(
473 &self,
474 file_path: &Path,
475 error: &std::io::Error,
476 errors: &Arc<Mutex<Vec<String>>>,
477 ) {
478 if self.scan_options.verbose {
479 let mut errors = errors.lock().unwrap();
480 errors.push(format!("Error reading {}: {error}", file_path.display()));
481 }
482 }
483
484 /// Parse the name field from TOML content.
485 fn parse_toml_name_field(content: &str) -> Option<String> {
486 for line in content.lines() {
487 if let Some(name) = Self::extract_name_from_line(line.trim()) {
488 return Some(name);
489 }
490 }
491 None
492 }
493
494 /// Read the content of a file and handle errors appropriately.
495 fn read_file_content(
496 &self,
497 file_path: &Path,
498 errors: &Arc<Mutex<Vec<String>>>,
499 ) -> Option<String> {
500 match fs::read_to_string(file_path) {
501 Ok(content) => Some(content),
502 Err(e) => {
503 self.log_file_error(file_path, &e, errors);
504 None
505 }
506 }
507 }
508
509 /// Determine if a directory entry should be scanned for projects.
510 ///
511 /// This method implements the filtering logic to decide whether a directory
512 /// should be traversed during the scanning process. It applies various
513 /// exclusion rules to improve performance and avoid scanning irrelevant
514 /// directories.
515 ///
516 /// # Arguments
517 ///
518 /// * `entry` - The directory entry to evaluate
519 ///
520 /// # Returns
521 ///
522 /// - `true` if the directory should be scanned
523 /// - `false` if the directory should be skipped
524 ///
525 /// # Exclusion Rules
526 ///
527 /// The following directories are excluded from scanning:
528 /// - Directories in the user-specified skip list
529 /// - Any directory inside a `node_modules/` directory (to avoid deep nesting)
530 /// - Hidden directories (starting with `.`) except `.cargo`
531 /// - Common build/temporary directories: `target`, `build`, `dist`, `out`, etc.
532 /// - Version control directories: `.git`, `.svn`, `.hg`
533 /// - Python cache and virtual environment directories
534 /// - Temporary directories: `temp`, `tmp`
535 /// - Go vendor directory
536 /// - Python pytest cache
537 /// - Python tox environments
538 /// - Python setuptools
539 /// - Python coverage files
540 /// - Node.js modules (already handled above but added for completeness)
541 fn should_scan_entry(&self, entry: &DirEntry) -> bool {
542 let path = entry.path();
543
544 // Early return if path is in skip list
545 if self.is_path_in_skip_list(path) {
546 return false;
547 }
548
549 // Skip any directory inside a node_modules directory
550 if path
551 .ancestors()
552 .any(|ancestor| ancestor.file_name().and_then(|n| n.to_str()) == Some("node_modules"))
553 {
554 return false;
555 }
556
557 // Skip hidden directories (except .cargo for Rust)
558 if Self::is_hidden_directory_to_skip(path) {
559 return false;
560 }
561
562 // Skip common non-project directories
563 !Self::is_excluded_directory(path)
564 }
565
566 /// Check if a path is in the skip list
567 fn is_path_in_skip_list(&self, path: &Path) -> bool {
568 self.scan_options.skip.iter().any(|skip| {
569 path.components().any(|component| {
570 component
571 .as_os_str()
572 .to_str()
573 .is_some_and(|name| name == skip.to_string_lossy())
574 })
575 })
576 }
577
578 /// Check if directory is hidden and should be skipped
579 fn is_hidden_directory_to_skip(path: &Path) -> bool {
580 path.file_name()
581 .and_then(|n| n.to_str())
582 .is_some_and(|name| name.starts_with('.') && name != ".cargo")
583 }
584
585 /// Check if directory is in the excluded list
586 fn is_excluded_directory(path: &Path) -> bool {
587 let excluded_dirs = [
588 "target",
589 "build",
590 "dist",
591 "out",
592 ".git",
593 ".svn",
594 ".hg",
595 "__pycache__",
596 "venv",
597 ".venv",
598 "env",
599 ".env",
600 "temp",
601 "tmp",
602 "vendor",
603 ".pytest_cache",
604 ".tox",
605 ".eggs",
606 ".coverage",
607 "node_modules",
608 ];
609
610 path.file_name()
611 .and_then(|n| n.to_str())
612 .is_some_and(|name| excluded_dirs.contains(&name))
613 }
614
615 /// Detect a Python project in the specified directory.
616 ///
617 /// This method checks for Python configuration files and associated cache directories.
618 /// It looks for multiple build artifacts that can be cleaned.
619 ///
620 /// # Arguments
621 ///
622 /// * `path` - Directory path to check for a Python project
623 /// * `errors` - Shared error collection for reporting parsing issues
624 ///
625 /// # Returns
626 ///
627 /// - `Some(Project)` if a valid Python project is detected
628 /// - `None` if the directory doesn't contain a Python project
629 ///
630 /// # Detection Criteria
631 ///
632 /// A Python project is identified by having:
633 /// 1. At least one of: requirements.txt, setup.py, pyproject.toml, setup.cfg, Pipfile
634 /// 2. At least one of the cache/build directories: `__pycache__`, `.pytest_cache`, venv, .venv, build, dist, .eggs
635 fn detect_python_project(
636 &self,
637 path: &Path,
638 errors: &Arc<Mutex<Vec<String>>>,
639 ) -> Option<Project> {
640 let config_files = [
641 "requirements.txt",
642 "setup.py",
643 "pyproject.toml",
644 "setup.cfg",
645 "Pipfile",
646 "pipenv.lock",
647 "poetry.lock",
648 ];
649
650 let build_dirs = [
651 "__pycache__",
652 ".pytest_cache",
653 "venv",
654 ".venv",
655 "build",
656 "dist",
657 ".eggs",
658 ".tox",
659 ".coverage",
660 ];
661
662 // Check if any config file exists
663 let has_config = config_files.iter().any(|&file| path.join(file).exists());
664
665 if !has_config {
666 return None;
667 }
668
669 // Find the largest cache/build directory that exists
670 let mut largest_build_dir = None;
671 let mut largest_size = 0;
672
673 for &dir_name in &build_dirs {
674 let dir_path = path.join(dir_name);
675
676 if dir_path.exists()
677 && dir_path.is_dir()
678 && let Ok(size) = Self::calculate_directory_size(&dir_path)
679 && size > largest_size
680 {
681 largest_size = size;
682 largest_build_dir = Some(dir_path);
683 }
684 }
685
686 if let Some(build_path) = largest_build_dir {
687 let name = self.extract_python_project_name(path, errors);
688
689 let build_arts = BuildArtifacts {
690 path: build_path,
691 size: 0, // Will be calculated later
692 };
693
694 return Some(Project::new(
695 ProjectType::Python,
696 path.to_path_buf(),
697 build_arts,
698 name,
699 ));
700 }
701
702 None
703 }
704
705 /// Detect a Go project in the specified directory.
706 ///
707 /// This method checks for the presence of both `go.mod` and `vendor/`
708 /// directory to identify a Go project. If found, it attempts to extract
709 /// the project name from the `go.mod` file.
710 ///
711 /// # Arguments
712 ///
713 /// * `path` - Directory path to check for a Go project
714 /// * `errors` - Shared error collection for reporting parsing issues
715 ///
716 /// # Returns
717 ///
718 /// - `Some(Project)` if a valid Go project is detected
719 /// - `None` if the directory doesn't contain a Go project
720 ///
721 /// # Detection Criteria
722 ///
723 /// 1. `go.mod` file exists in directory
724 /// 2. `vendor/` subdirectory exists in directory
725 /// 3. The project name is extracted from `go.mod` if possible
726 fn detect_go_project(&self, path: &Path, errors: &Arc<Mutex<Vec<String>>>) -> Option<Project> {
727 let go_mod = path.join("go.mod");
728 let vendor_dir = path.join("vendor");
729
730 if go_mod.exists() && vendor_dir.exists() {
731 let name = self.extract_go_project_name(&go_mod, errors);
732
733 let build_arts = BuildArtifacts {
734 path: path.join("vendor"),
735 size: 0, // Will be calculated later
736 };
737
738 return Some(Project::new(
739 ProjectType::Go,
740 path.to_path_buf(),
741 build_arts,
742 name,
743 ));
744 }
745
746 None
747 }
748
749 /// Extract the project name from a Python project directory.
750 ///
751 /// This method attempts to extract the project name from various Python
752 /// configuration files in order of preference.
753 ///
754 /// # Arguments
755 ///
756 /// * `path` - Path to the Python project directory
757 /// * `errors` - Shared error collection for reporting parsing issues
758 ///
759 /// # Returns
760 ///
761 /// - `Some(String)` containing the project name if successfully extracted
762 /// - `None` if the name cannot be found or parsed
763 ///
764 /// # Extraction Order
765 ///
766 /// 1. pyproject.toml (from [project] name or [tool.poetry] name)
767 /// 2. setup.py (from name= parameter)
768 /// 3. setup.cfg (from [metadata] name)
769 /// 4. Use directory name as a fallback
770 fn extract_python_project_name(
771 &self,
772 path: &Path,
773 errors: &Arc<Mutex<Vec<String>>>,
774 ) -> Option<String> {
775 // Try files in order of preference
776 self.try_extract_from_pyproject_toml(path, errors)
777 .or_else(|| self.try_extract_from_setup_py(path, errors))
778 .or_else(|| self.try_extract_from_setup_cfg(path, errors))
779 .or_else(|| Self::fallback_to_directory_name(path))
780 }
781
782 /// Try to extract project name from pyproject.toml
783 fn try_extract_from_pyproject_toml(
784 &self,
785 path: &Path,
786 errors: &Arc<Mutex<Vec<String>>>,
787 ) -> Option<String> {
788 let pyproject_toml = path.join("pyproject.toml");
789 if !pyproject_toml.exists() {
790 return None;
791 }
792
793 let content = self.read_file_content(&pyproject_toml, errors)?;
794 Self::extract_name_from_toml_like_content(&content)
795 }
796
797 /// Try to extract project name from setup.py
798 fn try_extract_from_setup_py(
799 &self,
800 path: &Path,
801 errors: &Arc<Mutex<Vec<String>>>,
802 ) -> Option<String> {
803 let setup_py = path.join("setup.py");
804 if !setup_py.exists() {
805 return None;
806 }
807
808 let content = self.read_file_content(&setup_py, errors)?;
809 Self::extract_name_from_python_content(&content)
810 }
811
812 /// Try to extract project name from setup.cfg
813 fn try_extract_from_setup_cfg(
814 &self,
815 path: &Path,
816 errors: &Arc<Mutex<Vec<String>>>,
817 ) -> Option<String> {
818 let setup_cfg = path.join("setup.cfg");
819 if !setup_cfg.exists() {
820 return None;
821 }
822
823 let content = self.read_file_content(&setup_cfg, errors)?;
824 Self::extract_name_from_cfg_content(&content)
825 }
826
827 /// Extract name from TOML-like content (pyproject.toml)
828 fn extract_name_from_toml_like_content(content: &str) -> Option<String> {
829 content
830 .lines()
831 .map(str::trim)
832 .find(|line| line.starts_with("name") && line.contains('='))
833 .and_then(Self::extract_quoted_value)
834 }
835
836 /// Extract name from Python content (setup.py)
837 fn extract_name_from_python_content(content: &str) -> Option<String> {
838 content
839 .lines()
840 .map(str::trim)
841 .find(|line| line.contains("name") && line.contains('='))
842 .and_then(Self::extract_quoted_value)
843 }
844
845 /// Extract name from INI-style configuration content (setup.cfg)
846 fn extract_name_from_cfg_content(content: &str) -> Option<String> {
847 let mut in_metadata_section = false;
848
849 for line in content.lines() {
850 let line = line.trim();
851
852 if line == "[metadata]" {
853 in_metadata_section = true;
854 } else if line.starts_with('[') && line.ends_with(']') {
855 in_metadata_section = false;
856 } else if in_metadata_section && line.starts_with("name") && line.contains('=') {
857 return line.split('=').nth(1).map(|name| name.trim().to_string());
858 }
859 }
860
861 None
862 }
863
864 /// Fallback to directory name
865 fn fallback_to_directory_name(path: &Path) -> Option<String> {
866 path.file_name()
867 .and_then(|name| name.to_str())
868 .map(std::string::ToString::to_string)
869 }
870
871 /// Extract the project name from a `go.mod` file.
872 ///
873 /// This method parses a Go project's `go.mod` file to extract
874 /// the module name, which typically represents the project.
875 ///
876 /// # Arguments
877 ///
878 /// * `go_mod` - Path to the `go.mod` file
879 /// * `errors` - Shared error collection for reporting parsing issues
880 ///
881 /// # Returns
882 ///
883 /// - `Some(String)` containing the module name if successfully extracted
884 /// - `None` if the name cannot be found or parsed
885 ///
886 /// # Parsing Strategy
887 ///
888 /// The method looks for the first line starting with `module ` and extracts
889 /// the module path. For better display, it takes the last component of the path.
890 fn extract_go_project_name(
891 &self,
892 go_mod: &Path,
893 errors: &Arc<Mutex<Vec<String>>>,
894 ) -> Option<String> {
895 let content = self.read_file_content(go_mod, errors)?;
896
897 for line in content.lines() {
898 let line = line.trim();
899 if line.starts_with("module ") {
900 let module_path = line.strip_prefix("module ")?.trim();
901
902 // Take the last component of the module path for a cleaner name
903 if let Some(name) = module_path.split('/').next_back() {
904 return Some(name.to_string());
905 }
906
907 return Some(module_path.to_string());
908 }
909 }
910
911 None
912 }
913
914 /// Calculate the size of a directory recursively.
915 ///
916 /// This is a helper method used for Python projects to determine which
917 /// cache directory is the largest and should be the primary cleanup target.
918 fn calculate_directory_size(dir_path: &Path) -> std::io::Result<u64> {
919 let mut total_size = 0;
920
921 for entry in fs::read_dir(dir_path)? {
922 let entry = entry?;
923 let path = entry.path();
924 if path.is_dir() {
925 total_size += Self::calculate_directory_size(&path).unwrap_or(0);
926 } else {
927 total_size += entry.metadata()?.len();
928 }
929 }
930
931 Ok(total_size)
932 }
933}