Skip to main content

dumap_core/scan/
walker.rs

1use crate::error::ScanError;
2use crate::path_util::clean_path;
3use crate::tree::{DirNode, split_path};
4use ignore::WalkBuilder;
5use parking_lot::RwLock;
6use std::path::PathBuf;
7use std::sync::Arc;
8use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
9use tracing::debug;
10
11/// Configuration for a filesystem scan.
12#[derive(Clone)]
13pub struct ScanConfig {
14    /// Root path to scan
15    pub root: PathBuf,
16    /// Whether to follow symbolic links (default: false)
17    pub follow_links: bool,
18    /// Whether to include hidden files (default: true)
19    pub include_hidden: bool,
20    /// Maximum directory depth (None = unlimited)
21    pub max_depth: Option<usize>,
22    /// Whether to use apparent size instead of disk usage (default: false)
23    pub apparent_size: bool,
24}
25
26impl Default for ScanConfig {
27    fn default() -> Self {
28        Self {
29            root: PathBuf::from("."),
30            follow_links: false,
31            include_hidden: true,
32            max_depth: None,
33            apparent_size: false,
34        }
35    }
36}
37
38/// Progress counters shared between the scan thread and the UI.
39pub struct ScanProgress {
40    pub files_found: Arc<AtomicU64>,
41    pub dirs_found: Arc<AtomicU64>,
42    pub bytes_found: Arc<AtomicU64>,
43    pub current_path: Arc<RwLock<String>>,
44    pub cancelled: Arc<AtomicBool>,
45}
46
47impl ScanProgress {
48    pub fn new() -> Self {
49        Self {
50            files_found: Arc::new(AtomicU64::new(0)),
51            dirs_found: Arc::new(AtomicU64::new(0)),
52            bytes_found: Arc::new(AtomicU64::new(0)),
53            current_path: Arc::new(RwLock::new(String::new())),
54            cancelled: Arc::new(AtomicBool::new(false)),
55        }
56    }
57}
58
59impl Default for ScanProgress {
60    fn default() -> Self {
61        Self::new()
62    }
63}
64
65/// Get the size of a file from metadata.
66///
67/// When `apparent` is true, returns the logical file size.
68/// When false, returns the actual disk usage (block-aligned on Unix).
69fn file_size(metadata: &std::fs::Metadata, apparent: bool) -> u64 {
70    if apparent {
71        metadata.len()
72    } else {
73        disk_usage(metadata)
74    }
75}
76
77#[cfg(unix)]
78fn disk_usage(metadata: &std::fs::Metadata) -> u64 {
79    use std::os::unix::fs::MetadataExt;
80    // blocks are always 512-byte units on Unix
81    metadata.blocks() * 512
82}
83
84#[cfg(not(unix))]
85fn disk_usage(metadata: &std::fs::Metadata) -> u64 {
86    // On Windows, apparent size is the best we can do without Win32 API calls
87    metadata.len()
88}
89
90/// Scan a directory tree and produce a DirNode tree.
91///
92/// Uses `ignore::WalkBuilder` for efficient traversal with symlink safety.
93/// Progress is reported via the `ScanProgress` atomic counters.
94pub fn scan_directory(config: &ScanConfig, progress: &ScanProgress) -> Result<DirNode, ScanError> {
95    let root = &config.root;
96
97    if !root.exists() {
98        return Err(ScanError::PathNotFound(root.clone()));
99    }
100    if !root.is_dir() {
101        return Err(ScanError::NotADirectory(root.clone()));
102    }
103
104    let root_canonical = clean_path(root.canonicalize().map_err(|e| ScanError::Io {
105        path: root.clone(),
106        source: e,
107    })?);
108
109    let mut builder = WalkBuilder::new(&root_canonical);
110    builder
111        .hidden(!config.include_hidden)
112        .follow_links(config.follow_links)
113        .parents(false);
114
115    if let Some(depth) = config.max_depth {
116        builder.max_depth(Some(depth));
117    }
118
119    let mut tree = DirNode::new();
120    let root_prefix = root_canonical.as_path();
121
122    for entry_result in builder.build() {
123        if progress.cancelled.load(Ordering::Relaxed) {
124            return Err(ScanError::Cancelled);
125        }
126
127        let entry = match entry_result {
128            Ok(entry) => entry,
129            Err(err) => {
130                debug!("Walk error: {err:?}");
131                continue;
132            }
133        };
134
135        let file_type = match entry.file_type() {
136            Some(ft) => ft,
137            None => continue,
138        };
139
140        if file_type.is_dir() {
141            progress.dirs_found.fetch_add(1, Ordering::Relaxed);
142            continue;
143        }
144
145        if !file_type.is_file() {
146            continue;
147        }
148
149        let path = entry.path();
150
151        // Get metadata (don't follow symlinks)
152        let metadata = match std::fs::symlink_metadata(path) {
153            Ok(m) => m,
154            Err(err) => {
155                debug!("Unable to read metadata for {path:?}: {err:?}");
156                continue;
157            }
158        };
159
160        let size = file_size(&metadata, config.apparent_size);
161
162        // Get relative path from scan root
163        let rel_path = match path.strip_prefix(root_prefix) {
164            Ok(rel) => rel,
165            Err(_) => path,
166        };
167
168        let path_str = rel_path.to_string_lossy();
169        let components = split_path(&path_str);
170        if !components.is_empty() {
171            tree.insert(&components, size);
172        }
173
174        progress.files_found.fetch_add(1, Ordering::Relaxed);
175        progress.bytes_found.fetch_add(size, Ordering::Relaxed);
176
177        // Update current path periodically (every file for now; could throttle)
178        *progress.current_path.write() = path.to_string_lossy().into_owned();
179    }
180
181    Ok(tree)
182}
183
184/// Format a byte count as a human-readable string.
185pub fn format_size(bytes: u64) -> String {
186    const KB: u64 = 1024;
187    const MB: u64 = KB * 1024;
188    const GB: u64 = MB * 1024;
189    const TB: u64 = GB * 1024;
190
191    if bytes >= TB {
192        format!("{:.1} TB", bytes as f64 / TB as f64)
193    } else if bytes >= GB {
194        format!("{:.1} GB", bytes as f64 / GB as f64)
195    } else if bytes >= MB {
196        format!("{:.1} MB", bytes as f64 / MB as f64)
197    } else if bytes >= KB {
198        format!("{:.1} KB", bytes as f64 / KB as f64)
199    } else {
200        format!("{} B", bytes)
201    }
202}