Skip to main content

diskard_core/
scanner.rs

1use std::time::{Duration, Instant, SystemTime};
2
3use crate::config::Config;
4use crate::error::Result;
5use crate::finding::{Category, Finding, RiskLevel};
6use crate::recognizer::Recognizer;
7
8/// Results from a scan operation.
9pub struct ScanResult {
10    pub findings: Vec<Finding>,
11    pub total_reclaimable: u64,
12    pub scan_duration: Duration,
13    pub errors: Vec<String>,
14}
15
16/// How to sort findings.
17#[derive(Debug, Clone, Copy, Default)]
18pub enum SortOrder {
19    #[default]
20    Size,
21    Risk,
22    Category,
23}
24
25/// Options for controlling scan behavior.
26pub struct ScanOptions {
27    pub max_risk: RiskLevel,
28    pub min_size: u64,
29    pub category: Option<Category>,
30    pub older_than: Option<Duration>,
31    pub sort: SortOrder,
32}
33
34impl Default for ScanOptions {
35    fn default() -> Self {
36        Self {
37            max_risk: RiskLevel::Risky,
38            min_size: 0,
39            category: None,
40            older_than: None,
41            sort: SortOrder::Size,
42        }
43    }
44}
45
46/// Run all enabled recognizers in parallel and collect findings.
47pub fn scan(
48    recognizers: &[Box<dyn Recognizer>],
49    config: &Config,
50    options: &ScanOptions,
51) -> ScanResult {
52    let start = Instant::now();
53
54    // Filter to enabled recognizers, optionally by category
55    let enabled: Vec<&Box<dyn Recognizer>> = recognizers
56        .iter()
57        .filter(|r| config.is_recognizer_enabled(r.id()))
58        .filter(|r| options.category.is_none() || Some(r.category()) == options.category)
59        .collect();
60
61    // Run recognizers sequentially to avoid file descriptor exhaustion.
62    // Each recognizer uses jwalk (rayon-based) internally for dir_size,
63    // and running them all in parallel can exceed the OS open-file limit.
64    let results: Vec<Result<Vec<Finding>>> = enabled
65        .iter()
66        .map(|recognizer| {
67            log::debug!("Running recognizer: {}", recognizer.name());
68            recognizer.scan()
69        })
70        .collect();
71
72    let mut findings = Vec::new();
73    let mut errors = Vec::new();
74
75    for result in results {
76        match result {
77            Ok(mut f) => findings.append(&mut f),
78            Err(e) => errors.push(e.to_string()),
79        }
80    }
81
82    // Filter by config
83    let now = SystemTime::now();
84    findings.retain(|f| {
85        if f.risk > options.max_risk || f.size_bytes < options.min_size {
86            return false;
87        }
88        if config.is_path_ignored(&f.path) {
89            return false;
90        }
91        if let Some(max_age) = options.older_than {
92            if let Some(modified) = f.last_modified {
93                if let Ok(age) = now.duration_since(modified) {
94                    if age < max_age {
95                        return false;
96                    }
97                }
98            }
99            // If no last_modified, include it (we can't determine age)
100        }
101        true
102    });
103
104    // Sort
105    match options.sort {
106        SortOrder::Size => findings.sort_by(|a, b| b.size_bytes.cmp(&a.size_bytes)),
107        SortOrder::Risk => findings.sort_by(|a, b| b.risk.cmp(&a.risk)),
108        SortOrder::Category => {
109            findings.sort_by(|a, b| a.category.to_string().cmp(&b.category.to_string()))
110        }
111    }
112
113    let total_reclaimable = findings.iter().map(|f| f.size_bytes).sum();
114
115    ScanResult {
116        findings,
117        total_reclaimable,
118        scan_duration: start.elapsed(),
119        errors,
120    }
121}