1use crate::{Throttle, WalkOptions, crossdev, get_size_or_panic, inodefilter::InodeFilter};
2
3use crossbeam::channel::Receiver;
4use filesize::PathExt;
5use petgraph::{Directed, Direction, graph::NodeIndex, stable_graph::StableGraph};
6use std::time::Instant;
7use std::{
8 fmt,
9 fs::Metadata,
10 io,
11 path::{Path, PathBuf},
12 sync::Arc,
13 time::{Duration, SystemTime, UNIX_EPOCH},
14};
15
16pub type TreeIndex = NodeIndex;
17pub type Tree = StableGraph<EntryData, (), Directed>;
18
19#[derive(Eq, PartialEq, Clone)]
20pub struct EntryData {
21 pub name: PathBuf,
22 pub size: u128,
25 pub mtime: SystemTime,
26 pub entry_count: Option<u64>,
27 pub metadata_io_error: bool,
29 pub is_dir: bool,
30}
31
32impl Default for EntryData {
33 fn default() -> EntryData {
34 EntryData {
35 name: PathBuf::default(),
36 size: u128::default(),
37 mtime: UNIX_EPOCH,
38 entry_count: None,
39 metadata_io_error: bool::default(),
40 is_dir: false,
41 }
42 }
43}
44
45impl fmt::Debug for EntryData {
46 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
47 f.debug_struct("EntryData")
48 .field("name", &self.name)
49 .field("size", &self.size)
50 .field("entry_count", &self.entry_count)
51 .field("metadata_io_error", &self.metadata_io_error)
53 .finish()
54 }
55}
56
57#[derive(Debug)]
59pub struct Traversal {
60 pub tree: Tree,
62 pub root_index: TreeIndex,
64 pub start_time: Instant,
66 pub cost: Option<Duration>,
68}
69
70impl Default for Traversal {
71 fn default() -> Self {
72 Self::new()
73 }
74}
75
76impl Traversal {
77 pub fn new() -> Self {
78 let mut tree = Tree::new();
79 let root_index = tree.add_node(EntryData::default());
80 Self {
81 tree,
82 root_index,
83 start_time: Instant::now(),
84 cost: None,
85 }
86 }
87
88 pub fn recompute_node_size(&self, node_index: TreeIndex) -> u128 {
89 self.tree
90 .neighbors_directed(node_index, Direction::Outgoing)
91 .map(|idx| get_size_or_panic(&self.tree, idx))
92 .sum()
93 }
94
95 pub fn is_costly(&self) -> bool {
96 self.cost.is_none_or(|d| d.as_secs_f32() > 10.0)
97 }
98}
99
100#[derive(Clone, Copy)]
101pub struct TraversalStats {
102 pub entries_traversed: u64,
104 pub start: std::time::Instant,
106 pub elapsed: Option<std::time::Duration>,
108 pub io_errors: u64,
110 pub total_bytes: Option<u128>,
112}
113
114impl Default for TraversalStats {
115 fn default() -> Self {
116 Self {
117 entries_traversed: 0,
118 start: std::time::Instant::now(),
119 elapsed: None,
120 io_errors: 0,
121 total_bytes: None,
122 }
123 }
124}
125
126#[derive(Default, Copy, Clone)]
127pub struct EntryInfo {
128 pub size: u128,
129 pub entries_count: Option<u64>,
130}
131
132impl EntryInfo {
133 pub fn add_count(&mut self, other: &Self) {
134 self.entries_count = match (self.entries_count, other.entries_count) {
135 (Some(a), Some(b)) => Some(a + b),
136 (None, Some(b)) => Some(b),
137 (Some(a), None) => Some(a),
138 (None, None) => None,
139 };
140 }
141}
142
143pub fn set_entry_info_or_panic(
144 tree: &mut Tree,
145 node_idx: TreeIndex,
146 node_own_size: u128,
147 EntryInfo {
148 size,
149 entries_count,
150 }: EntryInfo,
151) {
152 let node = tree
153 .node_weight_mut(node_idx)
154 .expect("node for parent index we just retrieved");
155 node.size = size + node_own_size;
156 node.entry_count = entries_count.map(|n| n + 1);
157}
158
159pub fn parent_or_panic(tree: &mut Tree, parent_node_idx: TreeIndex) -> TreeIndex {
160 tree.neighbors_directed(parent_node_idx, Direction::Incoming)
161 .next()
162 .expect("every node in the iteration has a parent")
163}
164
165pub fn pop_or_panic<T>(v: &mut Vec<T>) -> T {
166 v.pop().expect("sizes per level to be in sync with graph")
167}
168
169pub type TraversalEntry =
170 Result<jwalk::DirEntry<((), Option<Result<std::fs::Metadata, jwalk::Error>>)>, jwalk::Error>;
171
172#[allow(clippy::large_enum_variant)]
173pub enum TraversalEvent {
174 Entry(TraversalEntry, Arc<PathBuf>, u64),
175 Finished(u64),
176}
177
178pub struct BackgroundTraversal {
180 walk_options: WalkOptions,
181 pub root_idx: TreeIndex,
182 pub stats: TraversalStats,
183 previous_node_idx: TreeIndex,
184 parent_node_idx: TreeIndex,
185 directory_info_per_depth_level: Vec<EntryInfo>,
186 current_directory_at_depth: EntryInfo,
187 parent_node_size: u128,
188 parent_node_size_per_depth_level: Vec<u128>,
189 previous_node_size: u128,
190 previous_depth: usize,
191 inodes: InodeFilter,
192 throttle: Option<Throttle>,
193 skip_root: bool,
194 use_root_path: bool,
195 pub event_rx: Receiver<TraversalEvent>,
196}
197
198impl BackgroundTraversal {
199 pub fn start(
202 root_idx: TreeIndex,
203 walk_options: &WalkOptions,
204 input: Vec<PathBuf>,
205 skip_root: bool,
206 use_root_path: bool,
207 ) -> anyhow::Result<BackgroundTraversal> {
208 let (entry_tx, entry_rx) = crossbeam::channel::bounded(100);
209 std::thread::Builder::new()
210 .name("dua-fs-walk-dispatcher".to_string())
211 .spawn({
212 let walk_options = walk_options.clone();
213 let mut io_errors: u64 = 0;
214 move || {
215 for root_path in input.into_iter() {
216 log::info!("Walking {root_path:?}");
217 let device_id = match crossdev::init(root_path.as_ref()) {
218 Ok(id) => id,
219 Err(_) => {
220 io_errors += 1;
221 continue;
222 }
223 };
224
225 let root_path = Arc::new(root_path);
226 for entry in walk_options
227 .iter_from_path(root_path.as_ref(), device_id, skip_root)
228 .into_iter()
229 {
230 if entry_tx
231 .send(TraversalEvent::Entry(
232 entry,
233 Arc::clone(&root_path),
234 device_id,
235 ))
236 .is_err()
237 {
238 return;
241 }
242 }
243 }
244 if entry_tx.send(TraversalEvent::Finished(io_errors)).is_err() {
245 log::error!("Failed to send TraversalEvents::Finished event");
246 }
247 }
248 })?;
249
250 Ok(Self {
251 walk_options: walk_options.clone(),
252 root_idx,
253 stats: TraversalStats::default(),
254 previous_node_idx: root_idx,
255 parent_node_idx: root_idx,
256 previous_node_size: 0,
257 parent_node_size: 0,
258 parent_node_size_per_depth_level: Vec::new(),
259 directory_info_per_depth_level: Vec::new(),
260 current_directory_at_depth: EntryInfo::default(),
261 previous_depth: 0,
262 inodes: InodeFilter::default(),
263 throttle: Some(Throttle::new(Duration::from_millis(250), None)),
264 skip_root,
265 use_root_path,
266 event_rx: entry_rx,
267 })
268 }
269
270 pub fn integrate_traversal_event(
278 &mut self,
279 traversal: &mut Traversal,
280 event: TraversalEvent,
281 ) -> Option<bool> {
282 match event {
283 TraversalEvent::Entry(entry, root_path, device_id) => {
284 self.stats.entries_traversed += 1;
285 let mut data = EntryData::default();
286 match entry {
287 Ok(mut entry) => {
288 if self.skip_root {
289 entry.depth -= 1;
290 data.name = entry.file_name.into()
291 } else {
292 data.name = if entry.depth < 1 && self.use_root_path {
293 (*root_path).clone()
294 } else {
295 entry.file_name.into()
296 }
297 }
298
299 let mut file_size = 0u128;
300 let mut mtime: SystemTime = UNIX_EPOCH;
301 let mut file_count = 0u64;
302 match &entry.client_state {
303 Some(Ok(m)) => {
304 if self.walk_options.count_hard_links
305 || self.inodes.add(m)
306 && (self.walk_options.cross_filesystems
307 || crossdev::is_same_device(device_id, m))
308 {
309 file_count = 1;
310 if self.walk_options.apparent_size {
311 file_size = m.len() as u128;
312 } else {
313 file_size = size_on_disk(&entry.parent_path, &data.name, m)
314 .unwrap_or_else(|_| {
315 self.stats.io_errors += 1;
316 data.metadata_io_error = true;
317 0
318 })
319 as u128;
320 }
321 } else {
322 data.entry_count = Some(0);
323 data.is_dir = true;
324 }
325
326 match m.modified() {
327 Ok(modified) => {
328 mtime = modified;
329 }
330 Err(_) => {
331 self.stats.io_errors += 1;
332 data.metadata_io_error = true;
333 }
334 }
335 }
336 Some(Err(_)) => {
337 self.stats.io_errors += 1;
338 data.metadata_io_error = true;
339 }
340 None => {}
341 }
342
343 match (entry.depth, self.previous_depth) {
344 (n, p) if n > p => {
345 self.directory_info_per_depth_level
346 .push(self.current_directory_at_depth);
347 self.current_directory_at_depth = EntryInfo {
348 size: file_size,
349 entries_count: Some(file_count),
350 };
351
352 self.parent_node_size_per_depth_level
353 .push(self.previous_node_size);
354
355 self.parent_node_idx = self.previous_node_idx;
356 self.parent_node_size = self.previous_node_size;
357 }
358 (n, p) if n < p => {
359 for _ in n..p {
360 set_entry_info_or_panic(
361 &mut traversal.tree,
362 self.parent_node_idx,
363 self.parent_node_size,
364 self.current_directory_at_depth,
365 );
366 let dir_info =
367 pop_or_panic(&mut self.directory_info_per_depth_level);
368
369 self.current_directory_at_depth.size += dir_info.size;
370 self.current_directory_at_depth.add_count(&dir_info);
371
372 self.parent_node_idx =
373 parent_or_panic(&mut traversal.tree, self.parent_node_idx);
374 self.parent_node_size =
375 pop_or_panic(&mut self.parent_node_size_per_depth_level);
376 }
377 self.current_directory_at_depth.size += file_size;
378 *self
379 .current_directory_at_depth
380 .entries_count
381 .get_or_insert(0) += file_count;
382 set_entry_info_or_panic(
383 &mut traversal.tree,
384 self.parent_node_idx,
385 self.parent_node_size,
386 self.current_directory_at_depth,
387 );
388 }
389 _ => {
390 self.current_directory_at_depth.size += file_size;
391 *self
392 .current_directory_at_depth
393 .entries_count
394 .get_or_insert(0) += file_count;
395 }
396 };
397
398 data.mtime = mtime;
399 data.size = file_size;
400 let entry_index = traversal.tree.add_node(data);
401
402 traversal
403 .tree
404 .add_edge(self.parent_node_idx, entry_index, ());
405 self.previous_node_idx = entry_index;
406 self.previous_node_size = file_size;
407 self.previous_depth = entry.depth;
408 }
409 Err(_) => {
410 if self.previous_depth == 0 {
411 data.name.clone_from(&(*root_path));
412 let entry_index = traversal.tree.add_node(data);
413 traversal
414 .tree
415 .add_edge(self.parent_node_idx, entry_index, ());
416 }
417
418 self.stats.io_errors += 1
419 }
420 }
421
422 if self.throttle.as_ref().is_some_and(|t| t.can_update()) {
423 return Some(false);
424 }
425 }
426 TraversalEvent::Finished(io_errors) => {
427 self.stats.io_errors += io_errors;
428
429 self.throttle = None;
430 self.directory_info_per_depth_level
431 .push(self.current_directory_at_depth);
432 self.current_directory_at_depth = EntryInfo::default();
433 for _ in 0..self.previous_depth {
434 let dir_info = pop_or_panic(&mut self.directory_info_per_depth_level);
435 self.current_directory_at_depth.size += dir_info.size;
436 self.current_directory_at_depth.add_count(&dir_info);
437
438 set_entry_info_or_panic(
439 &mut traversal.tree,
440 self.parent_node_idx,
441 self.parent_node_size,
442 self.current_directory_at_depth,
443 );
444 self.parent_node_idx =
445 parent_or_panic(&mut traversal.tree, self.parent_node_idx);
446 }
447 let root_size = traversal.recompute_node_size(self.root_idx);
448 set_entry_info_or_panic(
449 &mut traversal.tree,
450 self.root_idx,
451 root_size,
452 EntryInfo {
453 size: root_size,
454 entries_count: (self.stats.entries_traversed > 0)
455 .then_some(self.stats.entries_traversed),
456 },
457 );
458 self.stats.total_bytes = Some(root_size);
459 self.stats.elapsed = Some(self.stats.start.elapsed());
460
461 return Some(true);
462 }
463 }
464 None
465 }
466}
467
468#[cfg(not(windows))]
469pub fn size_on_disk(_parent: &Path, name: &Path, meta: &Metadata) -> io::Result<u64> {
470 name.size_on_disk_fast(meta)
471}
472
473#[cfg(windows)]
474pub fn size_on_disk(parent: &Path, name: &Path, meta: &Metadata) -> io::Result<u64> {
475 parent.join(name).size_on_disk_fast(meta)
476}
477
478#[cfg(test)]
479mod tests {
480 use super::*;
481
482 #[test]
483 fn size_of_entry_data() {
484 assert!(
485 std::mem::size_of::<EntryData>() <= 80,
486 "the size of this ({}) should not exceed 80 as it affects overall memory consumption",
487 std::mem::size_of::<EntryData>()
488 );
489 }
490}