1use super::Formatter;
18use crate::scanner::{FileNode, TreeStats};
19use anyhow::Result;
20use humansize::{format_size, BINARY};
21use std::collections::HashMap;
22use std::io::Write;
23use std::path::Path;
24
25pub struct WasteFormatter {
27 pub show_suggestions: bool,
29 pub large_file_threshold: u64,
31 pub max_duplicates_shown: usize,
33}
34
35impl Default for WasteFormatter {
36 fn default() -> Self {
37 Self::new()
38 }
39}
40
41impl WasteFormatter {
42 pub fn new() -> Self {
43 Self {
44 show_suggestions: true,
45 large_file_threshold: 10 * 1024 * 1024, max_duplicates_shown: 5,
47 }
48 }
49
50 pub fn with_threshold(mut self, threshold: u64) -> Self {
51 self.large_file_threshold = threshold;
52 self
53 }
54
55 pub fn with_suggestions(mut self, show: bool) -> Self {
56 self.show_suggestions = show;
57 self
58 }
59
60 fn analyze_duplicates<'a>(&self, nodes: &'a [FileNode]) -> HashMap<u64, Vec<&'a FileNode>> {
62 let mut size_groups: HashMap<u64, Vec<&FileNode>> = HashMap::new();
63
64 for node in nodes {
65 if !node.is_dir && node.size > 0 && !node.permission_denied {
66 size_groups.entry(node.size).or_default().push(node);
67 }
68 }
69
70 size_groups.retain(|_, files| files.len() > 1);
72 size_groups
73 }
74
75 fn analyze_build_artifacts<'a>(&self, nodes: &'a [FileNode]) -> Vec<&'a FileNode> {
77 let build_patterns = [
78 "node_modules",
79 "target",
80 "build",
81 "dist",
82 ".next",
83 ".nuxt",
84 ".svelte-kit",
85 "__pycache__",
86 ".pytest_cache",
87 "coverage",
88 ".coverage",
89 ".nyc_output",
90 "logs",
91 "*.log",
92 ".DS_Store",
93 "Thumbs.db",
94 "*.tmp",
95 "*.temp",
96 ".cache",
97 ".parcel-cache",
98 ];
99
100 nodes
101 .iter()
102 .filter(|node| {
103 let path_str = node.path.to_string_lossy().to_lowercase();
104 build_patterns.iter().any(|pattern| {
105 if pattern.contains('*') {
106 let pattern = pattern.replace('*', "");
108 path_str.contains(&pattern)
109 } else {
110 path_str.contains(pattern)
111 }
112 })
113 })
114 .collect()
115 }
116
117 fn analyze_large_files<'a>(&self, nodes: &'a [FileNode]) -> Vec<&'a FileNode> {
119 let mut large_files: Vec<&FileNode> = nodes
120 .iter()
121 .filter(|node| !node.is_dir && node.size >= self.large_file_threshold)
122 .collect();
123
124 large_files.sort_by(|a, b| b.size.cmp(&a.size));
125 large_files
126 }
127
128 fn analyze_dependency_waste<'a>(
130 &self,
131 nodes: &'a [FileNode],
132 ) -> HashMap<String, Vec<&'a FileNode>> {
133 let mut dependency_groups: HashMap<String, Vec<&FileNode>> = HashMap::new();
134
135 for node in nodes {
136 let path_str = node.path.to_string_lossy();
137
138 if path_str.contains("node_modules") {
140 dependency_groups
141 .entry("node_modules".to_string())
142 .or_default()
143 .push(node);
144 }
145 else if path_str.contains("target/debug") || path_str.contains("target/release") {
147 dependency_groups
148 .entry("rust_target".to_string())
149 .or_default()
150 .push(node);
151 }
152 else if path_str.contains("__pycache__") || path_str.contains(".pyc") {
154 dependency_groups
155 .entry("python_cache".to_string())
156 .or_default()
157 .push(node);
158 }
159 else if path_str.contains("go/pkg/mod") {
161 dependency_groups
162 .entry("go_modules".to_string())
163 .or_default()
164 .push(node);
165 }
166 }
167
168 dependency_groups
169 }
170
171 fn calculate_savings(
173 &self,
174 duplicates: &HashMap<u64, Vec<&FileNode>>,
175 build_artifacts: &[&FileNode],
176 _large_files: &[&FileNode],
177 ) -> u64 {
178 let mut total_savings = 0u64;
179
180 for (size, files) in duplicates {
182 if files.len() > 1 {
183 total_savings += size * (files.len() - 1) as u64;
184 }
185 }
186
187 let artifact_size: u64 = build_artifacts.iter().map(|n| n.size).sum();
189 total_savings += (artifact_size as f64 * 0.7) as u64;
190
191 total_savings
192 }
193
194 fn generate_suggestions(
196 &self,
197 duplicates: &HashMap<u64, Vec<&FileNode>>,
198 build_artifacts: &[&FileNode],
199 dependency_waste: &HashMap<String, Vec<&FileNode>>,
200 _root_path: &Path,
201 ) -> Vec<String> {
202 let mut suggestions = Vec::new();
203
204 if !duplicates.is_empty() {
206 suggestions.push("๐ DUPLICATE FILE CLEANUP:".to_string());
207 suggestions.push(
208 " Consider using symbolic links or git submodules for identical files"
209 .to_string(),
210 );
211 suggestions.push(" Review and consolidate duplicate configuration files".to_string());
212 suggestions.push("".to_string());
213 }
214
215 if !build_artifacts.is_empty() {
217 suggestions.push("๐งน BUILD ARTIFACT CLEANUP:".to_string());
218 suggestions.push(" rm -rf */node_modules # Clean Node.js dependencies".to_string());
219 suggestions.push(" rm -rf */target # Clean Rust build artifacts".to_string());
220 suggestions.push(" find . -name '__pycache__' -type d -exec rm -rf {} +".to_string());
221 suggestions.push(" Add build directories to .gitignore".to_string());
222 suggestions.push("".to_string());
223 }
224
225 if dependency_waste.contains_key("node_modules") {
227 suggestions.push("๐ฆ DEPENDENCY OPTIMIZATION:".to_string());
228 suggestions.push(" Consider using pnpm for 60-80% space savings".to_string());
229 suggestions.push(" Use yarn workspaces for monorepos".to_string());
230 suggestions.push(" Run 'npm dedupe' to remove duplicate packages".to_string());
231 suggestions.push("".to_string());
232 }
233
234 suggestions.push("๐ก OPTIMIZATION TIPS:".to_string());
236 suggestions.push(" Use .gitignore to prevent committing build artifacts".to_string());
237 suggestions.push(" Consider using Docker multi-stage builds".to_string());
238 suggestions.push(" Implement automated cleanup scripts".to_string());
239
240 suggestions
241 }
242}
243
244impl Formatter for WasteFormatter {
245 fn format(
246 &self,
247 writer: &mut dyn Write,
248 nodes: &[FileNode],
249 stats: &TreeStats,
250 root_path: &Path,
251 ) -> Result<()> {
252 writeln!(writer, "{}", "โ".repeat(80))?;
254 writeln!(
255 writer,
256 "๐๏ธ SMART TREE WASTE ANALYSIS - Marie Kondo Mode Activated! โจ"
257 )?;
258 writeln!(writer, " Project: {}", root_path.display())?;
259 writeln!(
260 writer,
261 " Analyzed: {} files, {} directories",
262 stats.total_files, stats.total_dirs
263 )?;
264 writeln!(writer, "{}", "โ".repeat(80))?;
265 writeln!(writer)?;
266
267 let duplicates = self.analyze_duplicates(nodes);
269 let build_artifacts = self.analyze_build_artifacts(nodes);
270 let large_files = self.analyze_large_files(nodes);
271 let dependency_waste = self.analyze_dependency_waste(nodes);
272
273 let total_waste_size: u64 = duplicates
275 .values()
276 .flat_map(|files| files.iter())
277 .map(|node| node.size)
278 .sum::<u64>()
279 + build_artifacts.iter().map(|node| node.size).sum::<u64>();
280
281 let potential_savings = self.calculate_savings(&duplicates, &build_artifacts, &large_files);
282
283 writeln!(writer, "๐ WASTE SUMMARY:")?;
285 writeln!(
286 writer,
287 "โโโ Total Project Size: {}",
288 format_size(stats.total_size, BINARY)
289 )?;
290 writeln!(
291 writer,
292 "โโโ Potential Waste: {} ({:.1}% of project)",
293 format_size(total_waste_size, BINARY),
294 (total_waste_size as f64 / stats.total_size as f64) * 100.0
295 )?;
296 writeln!(writer, "โโโ Duplicate Groups: {}", duplicates.len())?;
297 writeln!(writer, "โโโ Build Artifacts: {}", build_artifacts.len())?;
298 writeln!(
299 writer,
300 "โโโ Large Files (>{}): {}",
301 format_size(self.large_file_threshold, BINARY),
302 large_files.len()
303 )?;
304 writeln!(
305 writer,
306 "โโโ Potential Savings: {} ({:.1}% reduction possible)",
307 format_size(potential_savings, BINARY),
308 (potential_savings as f64 / stats.total_size as f64) * 100.0
309 )?;
310 writeln!(writer)?;
311
312 if !duplicates.is_empty() {
314 writeln!(writer, "๐ DUPLICATE FILES DETECTED:")?;
315 let mut sorted_duplicates: Vec<_> = duplicates.iter().collect();
316 sorted_duplicates
317 .sort_by(|a, b| (b.1.len() * *b.0 as usize).cmp(&(a.1.len() * *a.0 as usize)));
318
319 for (size, files) in sorted_duplicates.iter().take(10) {
320 writeln!(
321 writer,
322 "โโโ {} files of size {} each:",
323 files.len(),
324 format_size(**size, BINARY)
325 )?;
326 for (i, file) in files.iter().take(self.max_duplicates_shown).enumerate() {
327 let rel_path = file.path.strip_prefix(root_path).unwrap_or(&file.path);
328 let prefix = if i == files.len() - 1 || i == self.max_duplicates_shown - 1 {
329 "โโโ"
330 } else {
331 "โโโ"
332 };
333 writeln!(writer, "โ {} {}", prefix, rel_path.display())?;
334 }
335 if files.len() > self.max_duplicates_shown {
336 writeln!(
337 writer,
338 "โ โโโ ... and {} more",
339 files.len() - self.max_duplicates_shown
340 )?;
341 }
342 }
343 writeln!(writer)?;
344 }
345
346 if !build_artifacts.is_empty() {
348 writeln!(writer, "๐งน BUILD ARTIFACTS & TEMPORARY FILES:")?;
349 let artifact_size: u64 = build_artifacts.iter().map(|n| n.size).sum();
350 writeln!(
351 writer,
352 "โโโ Total Size: {}",
353 format_size(artifact_size, BINARY)
354 )?;
355
356 let mut artifact_types: HashMap<String, (usize, u64)> = HashMap::new();
357 for artifact in &build_artifacts {
358 let path_str = artifact.path.to_string_lossy();
359 let artifact_type = if path_str.contains("node_modules") {
360 "node_modules"
361 } else if path_str.contains("target") {
362 "rust_target"
363 } else if path_str.contains("__pycache__") {
364 "python_cache"
365 } else if path_str.contains(".svelte-kit") {
366 "svelte_build"
367 } else {
368 "other"
369 };
370
371 let entry = artifact_types
372 .entry(artifact_type.to_string())
373 .or_insert((0, 0));
374 entry.0 += 1;
375 entry.1 += artifact.size;
376 }
377
378 for (artifact_type, (count, size)) in artifact_types {
379 writeln!(
380 writer,
381 "โโโ {}: {} files ({})",
382 artifact_type,
383 count,
384 format_size(size, BINARY)
385 )?;
386 }
387 writeln!(writer)?;
388 }
389
390 if !large_files.is_empty() {
392 writeln!(writer, "๐ฆ LARGE FILES (Potential Optimization Targets):")?;
393 for (i, file) in large_files.iter().take(10).enumerate() {
394 let rel_path = file.path.strip_prefix(root_path).unwrap_or(&file.path);
395 let prefix = if i == large_files.len().min(10) - 1 {
396 "โโโ"
397 } else {
398 "โโโ"
399 };
400 writeln!(
401 writer,
402 "{} {} ({})",
403 prefix,
404 rel_path.display(),
405 format_size(file.size, BINARY)
406 )?;
407 }
408 if large_files.len() > 10 {
409 writeln!(
410 writer,
411 "โโโ ... and {} more large files",
412 large_files.len() - 10
413 )?;
414 }
415 writeln!(writer)?;
416 }
417
418 if !dependency_waste.is_empty() {
420 writeln!(writer, "๐ DEPENDENCY ANALYSIS:")?;
421 for (dep_type, files) in &dependency_waste {
422 let total_size: u64 = files.iter().map(|f| f.size).sum();
423 writeln!(
424 writer,
425 "โโโ {}: {} files ({})",
426 dep_type,
427 files.len(),
428 format_size(total_size, BINARY)
429 )?;
430 }
431 writeln!(writer)?;
432 }
433
434 if self.show_suggestions {
436 let suggestions = self.generate_suggestions(
437 &duplicates,
438 &build_artifacts,
439 &dependency_waste,
440 root_path,
441 );
442 if !suggestions.is_empty() {
443 writeln!(writer, "๐ก OPTIMIZATION SUGGESTIONS:")?;
444 for suggestion in suggestions {
445 if suggestion.is_empty() {
446 writeln!(writer)?;
447 } else {
448 writeln!(writer, "{}", suggestion)?;
449 }
450 }
451 writeln!(writer)?;
452 }
453 }
454
455 writeln!(writer, "{}", "โ".repeat(80))?;
457 writeln!(
458 writer,
459 "๐ Analysis Complete! Trisha from Accounting is proud of this optimization mindset!"
460 )?;
461 writeln!(
462 writer,
463 " Remember: A clean codebase is a happy codebase! Keep it lean, keep it mean! ๐"
464 )?;
465 writeln!(
466 writer,
467 " Pro Tip: Run this analysis regularly to keep your projects in tip-top shape!"
468 )?;
469 writeln!(writer, "{}", "โ".repeat(80))?;
470
471 Ok(())
472 }
473}
474
475#[cfg(test)]
476mod tests {
477 use super::*;
478 use crate::scanner::{FileCategory, FileType, FilesystemType};
479 use std::path::PathBuf;
480 use std::time::SystemTime;
481
482 #[test]
483 fn test_waste_formatter_creation() {
484 let formatter = WasteFormatter::new();
485 assert_eq!(formatter.large_file_threshold, 10 * 1024 * 1024);
486 assert!(formatter.show_suggestions);
487 }
488
489 #[test]
490 fn test_duplicate_detection() {
491 let formatter = WasteFormatter::new();
492
493 let nodes = vec![
495 FileNode {
496 path: PathBuf::from("/test/file1.txt"),
497 is_dir: false,
498 size: 1024,
499 permissions: 644,
500 uid: 1000,
501 gid: 1000,
502 modified: SystemTime::now(),
503 is_symlink: false,
504 is_hidden: false,
505 permission_denied: false,
506 is_ignored: false,
507 depth: 1,
508 file_type: FileType::RegularFile,
509 category: FileCategory::Markdown,
510 search_matches: None,
511 filesystem_type: FilesystemType::Ext4,
512 git_branch: None,
513 traversal_context: None,
514 interest: None,
515 security_findings: Vec::new(),
516 change_status: None,
517 content_hash: None,
518 },
519 FileNode {
520 path: PathBuf::from("/test/file2.txt"),
521 is_dir: false,
522 size: 1024, permissions: 644,
524 uid: 1000,
525 gid: 1000,
526 modified: SystemTime::now(),
527 is_symlink: false,
528 is_hidden: false,
529 permission_denied: false,
530 is_ignored: false,
531 depth: 1,
532 file_type: FileType::RegularFile,
533 category: FileCategory::Markdown,
534 search_matches: None,
535 filesystem_type: FilesystemType::Ext4,
536 git_branch: None,
537 traversal_context: None,
538 interest: None,
539 security_findings: Vec::new(),
540 change_status: None,
541 content_hash: None,
542 },
543 ];
544
545 let duplicates = formatter.analyze_duplicates(&nodes);
546 assert_eq!(duplicates.len(), 1);
547 assert_eq!(duplicates.get(&1024).unwrap().len(), 2);
548 }
549
550 #[test]
551 fn test_build_artifact_detection() {
552 let formatter = WasteFormatter::new();
553
554 let nodes = vec![FileNode {
555 path: PathBuf::from("/test/node_modules/package/index.js"),
556 is_dir: false,
557 size: 1024,
558 permissions: 644,
559 uid: 1000,
560 gid: 1000,
561 modified: SystemTime::now(),
562 is_symlink: false,
563 is_hidden: false,
564 permission_denied: false,
565 is_ignored: false,
566 depth: 2,
567 file_type: FileType::RegularFile,
568 category: FileCategory::JavaScript,
569 search_matches: None,
570 filesystem_type: FilesystemType::Ext4,
571 git_branch: None,
572 traversal_context: None,
573 interest: None,
574 security_findings: Vec::new(),
575 change_status: None,
576 content_hash: None,
577 }];
578
579 let artifacts = formatter.analyze_build_artifacts(&nodes);
580 assert_eq!(artifacts.len(), 1);
581 }
582}