Skip to main content

st/formatters/
waste.rs

1//
2// -----------------------------------------------------------------------------
3// ๐Ÿ—‘๏ธ WASTE DETECTION FORMATTER - The Marie Kondo of Code! โœจ
4//
5// Hey there, Hue! This is our brilliant waste detection system that you dreamed up!
6// It analyzes projects for duplicate files, bloated dependencies, forgotten build
7// artifacts, and suggests optimizations that would make Trisha in Accounting
8// do a happy dance! ๐Ÿ’ƒ
9//
10// This formatter is like having a personal organizer for your codebase - it finds
11// all the clutter and tells you exactly how to clean it up. Elvis would be proud
12// of this rock-solid optimization! ๐ŸŽธ
13//
14// Brought to you by Hue & Aye - making codebases lean and mean! ๐Ÿš€
15// -----------------------------------------------------------------------------
16
17use super::Formatter;
18use crate::scanner::{FileNode, TreeStats};
19use anyhow::Result;
20use humansize::{format_size, BINARY};
21use std::collections::HashMap;
22use std::io::Write;
23use std::path::Path;
24
25/// The WasteFormatter - Your personal codebase cleanup consultant! ๐Ÿงน
26pub struct WasteFormatter {
27    /// Show detailed suggestions for cleanup
28    pub show_suggestions: bool,
29    /// Minimum file size to consider for large file analysis (default: 10MB)
30    pub large_file_threshold: u64,
31    /// Maximum number of duplicates to show per group
32    pub max_duplicates_shown: usize,
33}
34
35impl Default for WasteFormatter {
36    fn default() -> Self {
37        Self::new()
38    }
39}
40
41impl WasteFormatter {
42    pub fn new() -> Self {
43        Self {
44            show_suggestions: true,
45            large_file_threshold: 10 * 1024 * 1024, // 10MB
46            max_duplicates_shown: 5,
47        }
48    }
49
50    pub fn with_threshold(mut self, threshold: u64) -> Self {
51        self.large_file_threshold = threshold;
52        self
53    }
54
55    pub fn with_suggestions(mut self, show: bool) -> Self {
56        self.show_suggestions = show;
57        self
58    }
59
60    /// Analyze files for potential duplicates based on size and name patterns
61    fn analyze_duplicates<'a>(&self, nodes: &'a [FileNode]) -> HashMap<u64, Vec<&'a FileNode>> {
62        let mut size_groups: HashMap<u64, Vec<&FileNode>> = HashMap::new();
63
64        for node in nodes {
65            if !node.is_dir && node.size > 0 && !node.permission_denied {
66                size_groups.entry(node.size).or_default().push(node);
67            }
68        }
69
70        // Only keep groups with multiple files of the same size
71        size_groups.retain(|_, files| files.len() > 1);
72        size_groups
73    }
74
75    /// Detect common build artifacts and temporary files
76    fn analyze_build_artifacts<'a>(&self, nodes: &'a [FileNode]) -> Vec<&'a FileNode> {
77        let build_patterns = [
78            "node_modules",
79            "target",
80            "build",
81            "dist",
82            ".next",
83            ".nuxt",
84            ".svelte-kit",
85            "__pycache__",
86            ".pytest_cache",
87            "coverage",
88            ".coverage",
89            ".nyc_output",
90            "logs",
91            "*.log",
92            ".DS_Store",
93            "Thumbs.db",
94            "*.tmp",
95            "*.temp",
96            ".cache",
97            ".parcel-cache",
98        ];
99
100        nodes
101            .iter()
102            .filter(|node| {
103                let path_str = node.path.to_string_lossy().to_lowercase();
104                build_patterns.iter().any(|pattern| {
105                    if pattern.contains('*') {
106                        // Simple wildcard matching
107                        let pattern = pattern.replace('*', "");
108                        path_str.contains(&pattern)
109                    } else {
110                        path_str.contains(pattern)
111                    }
112                })
113            })
114            .collect()
115    }
116
117    /// Find large files that might be candidates for optimization
118    fn analyze_large_files<'a>(&self, nodes: &'a [FileNode]) -> Vec<&'a FileNode> {
119        let mut large_files: Vec<&FileNode> = nodes
120            .iter()
121            .filter(|node| !node.is_dir && node.size >= self.large_file_threshold)
122            .collect();
123
124        large_files.sort_by(|a, b| b.size.cmp(&a.size));
125        large_files
126    }
127
128    /// Detect dependency-related waste (package managers)
129    fn analyze_dependency_waste<'a>(
130        &self,
131        nodes: &'a [FileNode],
132    ) -> HashMap<String, Vec<&'a FileNode>> {
133        let mut dependency_groups: HashMap<String, Vec<&FileNode>> = HashMap::new();
134
135        for node in nodes {
136            let path_str = node.path.to_string_lossy();
137
138            // Node.js dependencies
139            if path_str.contains("node_modules") {
140                dependency_groups
141                    .entry("node_modules".to_string())
142                    .or_default()
143                    .push(node);
144            }
145            // Rust dependencies
146            else if path_str.contains("target/debug") || path_str.contains("target/release") {
147                dependency_groups
148                    .entry("rust_target".to_string())
149                    .or_default()
150                    .push(node);
151            }
152            // Python cache
153            else if path_str.contains("__pycache__") || path_str.contains(".pyc") {
154                dependency_groups
155                    .entry("python_cache".to_string())
156                    .or_default()
157                    .push(node);
158            }
159            // Go modules
160            else if path_str.contains("go/pkg/mod") {
161                dependency_groups
162                    .entry("go_modules".to_string())
163                    .or_default()
164                    .push(node);
165            }
166        }
167
168        dependency_groups
169    }
170
171    /// Calculate potential space savings
172    fn calculate_savings(
173        &self,
174        duplicates: &HashMap<u64, Vec<&FileNode>>,
175        build_artifacts: &[&FileNode],
176        _large_files: &[&FileNode],
177    ) -> u64 {
178        let mut total_savings = 0u64;
179
180        // Savings from duplicate removal (keep one, remove others)
181        for (size, files) in duplicates {
182            if files.len() > 1 {
183                total_savings += size * (files.len() - 1) as u64;
184            }
185        }
186
187        // Savings from build artifact cleanup (conservative estimate: 70%)
188        let artifact_size: u64 = build_artifacts.iter().map(|n| n.size).sum();
189        total_savings += (artifact_size as f64 * 0.7) as u64;
190
191        total_savings
192    }
193
194    /// Generate cleanup suggestions
195    fn generate_suggestions(
196        &self,
197        duplicates: &HashMap<u64, Vec<&FileNode>>,
198        build_artifacts: &[&FileNode],
199        dependency_waste: &HashMap<String, Vec<&FileNode>>,
200        _root_path: &Path,
201    ) -> Vec<String> {
202        let mut suggestions = Vec::new();
203
204        // Duplicate file suggestions
205        if !duplicates.is_empty() {
206            suggestions.push("๐Ÿ”„ DUPLICATE FILE CLEANUP:".to_string());
207            suggestions.push(
208                "   Consider using symbolic links or git submodules for identical files"
209                    .to_string(),
210            );
211            suggestions.push("   Review and consolidate duplicate configuration files".to_string());
212            suggestions.push("".to_string());
213        }
214
215        // Build artifact suggestions
216        if !build_artifacts.is_empty() {
217            suggestions.push("๐Ÿงน BUILD ARTIFACT CLEANUP:".to_string());
218            suggestions.push("   rm -rf */node_modules  # Clean Node.js dependencies".to_string());
219            suggestions.push("   rm -rf */target        # Clean Rust build artifacts".to_string());
220            suggestions.push("   find . -name '__pycache__' -type d -exec rm -rf {} +".to_string());
221            suggestions.push("   Add build directories to .gitignore".to_string());
222            suggestions.push("".to_string());
223        }
224
225        // Dependency optimization suggestions
226        if dependency_waste.contains_key("node_modules") {
227            suggestions.push("๐Ÿ“ฆ DEPENDENCY OPTIMIZATION:".to_string());
228            suggestions.push("   Consider using pnpm for 60-80% space savings".to_string());
229            suggestions.push("   Use yarn workspaces for monorepos".to_string());
230            suggestions.push("   Run 'npm dedupe' to remove duplicate packages".to_string());
231            suggestions.push("".to_string());
232        }
233
234        // General optimization tips
235        suggestions.push("๐Ÿ’ก OPTIMIZATION TIPS:".to_string());
236        suggestions.push("   Use .gitignore to prevent committing build artifacts".to_string());
237        suggestions.push("   Consider using Docker multi-stage builds".to_string());
238        suggestions.push("   Implement automated cleanup scripts".to_string());
239
240        suggestions
241    }
242}
243
244impl Formatter for WasteFormatter {
245    fn format(
246        &self,
247        writer: &mut dyn Write,
248        nodes: &[FileNode],
249        stats: &TreeStats,
250        root_path: &Path,
251    ) -> Result<()> {
252        // Header with Elvis-worthy style! ๐ŸŽธ
253        writeln!(writer, "{}", "โ•".repeat(80))?;
254        writeln!(
255            writer,
256            "๐Ÿ—‘๏ธ  SMART TREE WASTE ANALYSIS - Marie Kondo Mode Activated! โœจ"
257        )?;
258        writeln!(writer, "   Project: {}", root_path.display())?;
259        writeln!(
260            writer,
261            "   Analyzed: {} files, {} directories",
262            stats.total_files, stats.total_dirs
263        )?;
264        writeln!(writer, "{}", "โ•".repeat(80))?;
265        writeln!(writer)?;
266
267        // Analyze different types of waste
268        let duplicates = self.analyze_duplicates(nodes);
269        let build_artifacts = self.analyze_build_artifacts(nodes);
270        let large_files = self.analyze_large_files(nodes);
271        let dependency_waste = self.analyze_dependency_waste(nodes);
272
273        // Calculate total waste and potential savings
274        let total_waste_size: u64 = duplicates
275            .values()
276            .flat_map(|files| files.iter())
277            .map(|node| node.size)
278            .sum::<u64>()
279            + build_artifacts.iter().map(|node| node.size).sum::<u64>();
280
281        let potential_savings = self.calculate_savings(&duplicates, &build_artifacts, &large_files);
282
283        // Summary section - The executive summary for Trisha! ๐Ÿ“Š
284        writeln!(writer, "๐Ÿ“Š WASTE SUMMARY:")?;
285        writeln!(
286            writer,
287            "โ”œโ”€โ”€ Total Project Size: {}",
288            format_size(stats.total_size, BINARY)
289        )?;
290        writeln!(
291            writer,
292            "โ”œโ”€โ”€ Potential Waste: {} ({:.1}% of project)",
293            format_size(total_waste_size, BINARY),
294            (total_waste_size as f64 / stats.total_size as f64) * 100.0
295        )?;
296        writeln!(writer, "โ”œโ”€โ”€ Duplicate Groups: {}", duplicates.len())?;
297        writeln!(writer, "โ”œโ”€โ”€ Build Artifacts: {}", build_artifacts.len())?;
298        writeln!(
299            writer,
300            "โ”œโ”€โ”€ Large Files (>{}): {}",
301            format_size(self.large_file_threshold, BINARY),
302            large_files.len()
303        )?;
304        writeln!(
305            writer,
306            "โ””โ”€โ”€ Potential Savings: {} ({:.1}% reduction possible)",
307            format_size(potential_savings, BINARY),
308            (potential_savings as f64 / stats.total_size as f64) * 100.0
309        )?;
310        writeln!(writer)?;
311
312        // Duplicate files analysis
313        if !duplicates.is_empty() {
314            writeln!(writer, "๐Ÿ”„ DUPLICATE FILES DETECTED:")?;
315            let mut sorted_duplicates: Vec<_> = duplicates.iter().collect();
316            sorted_duplicates
317                .sort_by(|a, b| (b.1.len() * *b.0 as usize).cmp(&(a.1.len() * *a.0 as usize)));
318
319            for (size, files) in sorted_duplicates.iter().take(10) {
320                writeln!(
321                    writer,
322                    "โ”œโ”€โ”€ {} files of size {} each:",
323                    files.len(),
324                    format_size(**size, BINARY)
325                )?;
326                for (i, file) in files.iter().take(self.max_duplicates_shown).enumerate() {
327                    let rel_path = file.path.strip_prefix(root_path).unwrap_or(&file.path);
328                    let prefix = if i == files.len() - 1 || i == self.max_duplicates_shown - 1 {
329                        "โ””โ”€โ”€"
330                    } else {
331                        "โ”œโ”€โ”€"
332                    };
333                    writeln!(writer, "โ”‚   {} {}", prefix, rel_path.display())?;
334                }
335                if files.len() > self.max_duplicates_shown {
336                    writeln!(
337                        writer,
338                        "โ”‚   โ””โ”€โ”€ ... and {} more",
339                        files.len() - self.max_duplicates_shown
340                    )?;
341                }
342            }
343            writeln!(writer)?;
344        }
345
346        // Build artifacts analysis
347        if !build_artifacts.is_empty() {
348            writeln!(writer, "๐Ÿงน BUILD ARTIFACTS & TEMPORARY FILES:")?;
349            let artifact_size: u64 = build_artifacts.iter().map(|n| n.size).sum();
350            writeln!(
351                writer,
352                "โ”œโ”€โ”€ Total Size: {}",
353                format_size(artifact_size, BINARY)
354            )?;
355
356            let mut artifact_types: HashMap<String, (usize, u64)> = HashMap::new();
357            for artifact in &build_artifacts {
358                let path_str = artifact.path.to_string_lossy();
359                let artifact_type = if path_str.contains("node_modules") {
360                    "node_modules"
361                } else if path_str.contains("target") {
362                    "rust_target"
363                } else if path_str.contains("__pycache__") {
364                    "python_cache"
365                } else if path_str.contains(".svelte-kit") {
366                    "svelte_build"
367                } else {
368                    "other"
369                };
370
371                let entry = artifact_types
372                    .entry(artifact_type.to_string())
373                    .or_insert((0, 0));
374                entry.0 += 1;
375                entry.1 += artifact.size;
376            }
377
378            for (artifact_type, (count, size)) in artifact_types {
379                writeln!(
380                    writer,
381                    "โ”œโ”€โ”€ {}: {} files ({})",
382                    artifact_type,
383                    count,
384                    format_size(size, BINARY)
385                )?;
386            }
387            writeln!(writer)?;
388        }
389
390        // Large files analysis
391        if !large_files.is_empty() {
392            writeln!(writer, "๐Ÿ“ฆ LARGE FILES (Potential Optimization Targets):")?;
393            for (i, file) in large_files.iter().take(10).enumerate() {
394                let rel_path = file.path.strip_prefix(root_path).unwrap_or(&file.path);
395                let prefix = if i == large_files.len().min(10) - 1 {
396                    "โ””โ”€โ”€"
397                } else {
398                    "โ”œโ”€โ”€"
399                };
400                writeln!(
401                    writer,
402                    "{} {} ({})",
403                    prefix,
404                    rel_path.display(),
405                    format_size(file.size, BINARY)
406                )?;
407            }
408            if large_files.len() > 10 {
409                writeln!(
410                    writer,
411                    "โ””โ”€โ”€ ... and {} more large files",
412                    large_files.len() - 10
413                )?;
414            }
415            writeln!(writer)?;
416        }
417
418        // Dependency waste analysis
419        if !dependency_waste.is_empty() {
420            writeln!(writer, "๐Ÿ“š DEPENDENCY ANALYSIS:")?;
421            for (dep_type, files) in &dependency_waste {
422                let total_size: u64 = files.iter().map(|f| f.size).sum();
423                writeln!(
424                    writer,
425                    "โ”œโ”€โ”€ {}: {} files ({})",
426                    dep_type,
427                    files.len(),
428                    format_size(total_size, BINARY)
429                )?;
430            }
431            writeln!(writer)?;
432        }
433
434        // Suggestions section - The action plan! ๐ŸŽฏ
435        if self.show_suggestions {
436            let suggestions = self.generate_suggestions(
437                &duplicates,
438                &build_artifacts,
439                &dependency_waste,
440                root_path,
441            );
442            if !suggestions.is_empty() {
443                writeln!(writer, "๐Ÿ’ก OPTIMIZATION SUGGESTIONS:")?;
444                for suggestion in suggestions {
445                    if suggestion.is_empty() {
446                        writeln!(writer)?;
447                    } else {
448                        writeln!(writer, "{}", suggestion)?;
449                    }
450                }
451                writeln!(writer)?;
452            }
453        }
454
455        // Footer with encouragement from Trisha! ๐Ÿ’ช
456        writeln!(writer, "{}", "โ•".repeat(80))?;
457        writeln!(
458            writer,
459            "๐ŸŽ‰ Analysis Complete! Trisha from Accounting is proud of this optimization mindset!"
460        )?;
461        writeln!(
462            writer,
463            "   Remember: A clean codebase is a happy codebase! Keep it lean, keep it mean! ๐Ÿš€"
464        )?;
465        writeln!(
466            writer,
467            "   Pro Tip: Run this analysis regularly to keep your projects in tip-top shape!"
468        )?;
469        writeln!(writer, "{}", "โ•".repeat(80))?;
470
471        Ok(())
472    }
473}
474
475#[cfg(test)]
476mod tests {
477    use super::*;
478    use crate::scanner::{FileCategory, FileType, FilesystemType};
479    use std::path::PathBuf;
480    use std::time::SystemTime;
481
482    #[test]
483    fn test_waste_formatter_creation() {
484        let formatter = WasteFormatter::new();
485        assert_eq!(formatter.large_file_threshold, 10 * 1024 * 1024);
486        assert!(formatter.show_suggestions);
487    }
488
489    #[test]
490    fn test_duplicate_detection() {
491        let formatter = WasteFormatter::new();
492
493        // Create test nodes with same size
494        let nodes = vec![
495            FileNode {
496                path: PathBuf::from("/test/file1.txt"),
497                is_dir: false,
498                size: 1024,
499                permissions: 644,
500                uid: 1000,
501                gid: 1000,
502                modified: SystemTime::now(),
503                is_symlink: false,
504                is_hidden: false,
505                permission_denied: false,
506                is_ignored: false,
507                depth: 1,
508                file_type: FileType::RegularFile,
509                category: FileCategory::Markdown,
510                search_matches: None,
511                filesystem_type: FilesystemType::Ext4,
512                git_branch: None,
513                traversal_context: None,
514                interest: None,
515                security_findings: Vec::new(),
516                change_status: None,
517                content_hash: None,
518            },
519            FileNode {
520                path: PathBuf::from("/test/file2.txt"),
521                is_dir: false,
522                size: 1024, // Same size as file1
523                permissions: 644,
524                uid: 1000,
525                gid: 1000,
526                modified: SystemTime::now(),
527                is_symlink: false,
528                is_hidden: false,
529                permission_denied: false,
530                is_ignored: false,
531                depth: 1,
532                file_type: FileType::RegularFile,
533                category: FileCategory::Markdown,
534                search_matches: None,
535                filesystem_type: FilesystemType::Ext4,
536                git_branch: None,
537                traversal_context: None,
538                interest: None,
539                security_findings: Vec::new(),
540                change_status: None,
541                content_hash: None,
542            },
543        ];
544
545        let duplicates = formatter.analyze_duplicates(&nodes);
546        assert_eq!(duplicates.len(), 1);
547        assert_eq!(duplicates.get(&1024).unwrap().len(), 2);
548    }
549
550    #[test]
551    fn test_build_artifact_detection() {
552        let formatter = WasteFormatter::new();
553
554        let nodes = vec![FileNode {
555            path: PathBuf::from("/test/node_modules/package/index.js"),
556            is_dir: false,
557            size: 1024,
558            permissions: 644,
559            uid: 1000,
560            gid: 1000,
561            modified: SystemTime::now(),
562            is_symlink: false,
563            is_hidden: false,
564            permission_denied: false,
565            is_ignored: false,
566            depth: 2,
567            file_type: FileType::RegularFile,
568            category: FileCategory::JavaScript,
569            search_matches: None,
570            filesystem_type: FilesystemType::Ext4,
571            git_branch: None,
572            traversal_context: None,
573            interest: None,
574            security_findings: Vec::new(),
575            change_status: None,
576            content_hash: None,
577        }];
578
579        let artifacts = formatter.analyze_build_artifacts(&nodes);
580        assert_eq!(artifacts.len(), 1);
581    }
582}