Skip to main content

diskard_core/
scanner.rs

1use rayon::prelude::*;
2use std::time::{Duration, Instant, SystemTime};
3
4use crate::config::Config;
5use crate::error::Result;
6use crate::finding::{Category, Finding, RiskLevel};
7use crate::recognizer::Recognizer;
8
9/// Results from a scan operation.
10pub struct ScanResult {
11    pub findings: Vec<Finding>,
12    pub total_reclaimable: u64,
13    pub scan_duration: Duration,
14    pub errors: Vec<String>,
15}
16
17/// How to sort findings.
18#[derive(Debug, Clone, Copy, Default)]
19pub enum SortOrder {
20    #[default]
21    Size,
22    Risk,
23    Category,
24}
25
26/// Options for controlling scan behavior.
27pub struct ScanOptions {
28    pub max_risk: RiskLevel,
29    pub min_size: u64,
30    pub category: Option<Category>,
31    pub older_than: Option<Duration>,
32    pub sort: SortOrder,
33}
34
35impl Default for ScanOptions {
36    fn default() -> Self {
37        Self {
38            max_risk: RiskLevel::Risky,
39            min_size: 0,
40            category: None,
41            older_than: None,
42            sort: SortOrder::Size,
43        }
44    }
45}
46
47/// Run all enabled recognizers in parallel and collect findings.
48pub fn scan(
49    recognizers: &[Box<dyn Recognizer>],
50    config: &Config,
51    options: &ScanOptions,
52) -> ScanResult {
53    let start = Instant::now();
54
55    // Filter to enabled recognizers, optionally by category
56    let enabled: Vec<&Box<dyn Recognizer>> = recognizers
57        .iter()
58        .filter(|r| config.is_recognizer_enabled(r.id()))
59        .filter(|r| options.category.is_none() || Some(r.category()) == options.category)
60        .collect();
61
62    // Run recognizers in parallel
63    let results: Vec<Result<Vec<Finding>>> = enabled
64        .par_iter()
65        .map(|recognizer| {
66            log::debug!("Running recognizer: {}", recognizer.name());
67            recognizer.scan()
68        })
69        .collect();
70
71    let mut findings = Vec::new();
72    let mut errors = Vec::new();
73
74    for result in results {
75        match result {
76            Ok(mut f) => findings.append(&mut f),
77            Err(e) => errors.push(e.to_string()),
78        }
79    }
80
81    // Filter by config
82    let now = SystemTime::now();
83    findings.retain(|f| {
84        if f.risk > options.max_risk || f.size_bytes < options.min_size {
85            return false;
86        }
87        if config.is_path_ignored(&f.path) {
88            return false;
89        }
90        if let Some(max_age) = options.older_than {
91            if let Some(modified) = f.last_modified {
92                if let Ok(age) = now.duration_since(modified) {
93                    if age < max_age {
94                        return false;
95                    }
96                }
97            }
98            // If no last_modified, include it (we can't determine age)
99        }
100        true
101    });
102
103    // Sort
104    match options.sort {
105        SortOrder::Size => findings.sort_by(|a, b| b.size_bytes.cmp(&a.size_bytes)),
106        SortOrder::Risk => findings.sort_by(|a, b| b.risk.cmp(&a.risk)),
107        SortOrder::Category => {
108            findings.sort_by(|a, b| a.category.to_string().cmp(&b.category.to_string()))
109        }
110    }
111
112    let total_reclaimable = findings.iter().map(|f| f.size_bytes).sum();
113
114    ScanResult {
115        findings,
116        total_reclaimable,
117        scan_duration: start.elapsed(),
118        errors,
119    }
120}