1use crate::{Throttle, WalkOptions, crossdev, get_size_or_panic, inodefilter::InodeFilter};
2
3use crossbeam::channel::Receiver;
4use filesize::PathExt;
5use petgraph::{Directed, Direction, graph::NodeIndex, stable_graph::StableGraph};
6use std::time::Instant;
7use std::{
8 fmt,
9 fs::Metadata,
10 io,
11 path::{Path, PathBuf},
12 sync::Arc,
13 time::{Duration, SystemTime, UNIX_EPOCH},
14};
15
16pub type TreeIndex = NodeIndex;
18pub type Tree = StableGraph<EntryData, (), Directed>;
20
21#[derive(Eq, PartialEq, Clone)]
23pub struct EntryData {
24 pub name: PathBuf,
26 pub size: u128,
29 pub mtime: SystemTime,
31 pub entry_count: Option<u64>,
33 pub metadata_io_error: bool,
35 pub is_dir: bool,
37}
38
39impl Default for EntryData {
40 fn default() -> EntryData {
41 EntryData {
42 name: PathBuf::default(),
43 size: u128::default(),
44 mtime: UNIX_EPOCH,
45 entry_count: None,
46 metadata_io_error: bool::default(),
47 is_dir: false,
48 }
49 }
50}
51
52impl fmt::Debug for EntryData {
53 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
54 f.debug_struct("EntryData")
55 .field("name", &self.name)
56 .field("size", &self.size)
57 .field("entry_count", &self.entry_count)
58 .field("metadata_io_error", &self.metadata_io_error)
60 .finish()
61 }
62}
63
64#[derive(Debug)]
66pub struct Traversal {
67 pub tree: Tree,
69 pub root_index: TreeIndex,
71 pub start_time: Instant,
73 pub cost: Option<Duration>,
75}
76
77impl Default for Traversal {
78 fn default() -> Self {
79 Self::new()
80 }
81}
82
83impl Traversal {
84 pub fn new() -> Self {
86 let mut tree = Tree::new();
87 let root_index = tree.add_node(EntryData::default());
88 Self {
89 tree,
90 root_index,
91 start_time: Instant::now(),
92 cost: None,
93 }
94 }
95
96 pub(crate) fn recompute_node_size(&self, node_index: TreeIndex) -> u128 {
98 self.tree
99 .neighbors_directed(node_index, Direction::Outgoing)
100 .map(|idx| get_size_or_panic(&self.tree, idx))
101 .sum()
102 }
103
104 pub fn is_costly(&self) -> bool {
106 self.cost.is_none_or(|d| d.as_secs_f32() > 10.0)
107 }
108}
109
110#[derive(Clone, Copy)]
112pub struct TraversalStats {
113 pub entries_traversed: u64,
115 pub start: std::time::Instant,
117 pub elapsed: Option<std::time::Duration>,
119 pub io_errors: u64,
121 pub total_bytes: Option<u128>,
123}
124
125impl Default for TraversalStats {
126 fn default() -> Self {
127 Self {
128 entries_traversed: 0,
129 start: std::time::Instant::now(),
130 elapsed: None,
131 io_errors: 0,
132 total_bytes: None,
133 }
134 }
135}
136
137#[derive(Default, Copy, Clone)]
139struct EntryInfo {
140 size: u128,
142 entries_count: Option<u64>,
144}
145
146impl EntryInfo {
147 fn add_count(&mut self, other: &Self) {
149 self.entries_count = match (self.entries_count, other.entries_count) {
150 (Some(a), Some(b)) => Some(a + b),
151 (None, Some(b)) => Some(b),
152 (Some(a), None) => Some(a),
153 (None, None) => None,
154 };
155 }
156}
157
158fn set_entry_info_or_panic(
162 tree: &mut Tree,
163 node_idx: TreeIndex,
164 node_own_size: u128,
165 EntryInfo {
166 size,
167 entries_count,
168 }: EntryInfo,
169) {
170 let node = tree
171 .node_weight_mut(node_idx)
172 .expect("node for parent index we just retrieved");
173 node.size = size + node_own_size;
174 node.entry_count = entries_count.map(|n| n + 1);
175}
176
177fn parent_or_panic(tree: &mut Tree, parent_node_idx: TreeIndex) -> TreeIndex {
179 tree.neighbors_directed(parent_node_idx, Direction::Incoming)
180 .next()
181 .expect("every node in the iteration has a parent")
182}
183
184fn pop_or_panic<T>(v: &mut Vec<T>) -> T {
186 v.pop().expect("sizes per level to be in sync with graph")
187}
188
189type TraversalEntry =
191 Result<jwalk::DirEntry<((), Option<Result<std::fs::Metadata, jwalk::Error>>)>, jwalk::Error>;
192
193#[allow(clippy::large_enum_variant)]
194pub enum TraversalEvent {
196 Entry(TraversalEntry, Arc<PathBuf>, u64),
198 Finished(u64),
200}
201
202pub struct BackgroundTraversal {
204 walk_options: WalkOptions,
205 pub root_idx: TreeIndex,
207 pub stats: TraversalStats,
209 previous_node_idx: TreeIndex,
210 parent_node_idx: TreeIndex,
211 directory_info_per_depth_level: Vec<EntryInfo>,
212 current_directory_at_depth: EntryInfo,
213 parent_node_size: u128,
214 parent_node_size_per_depth_level: Vec<u128>,
215 previous_node_size: u128,
216 previous_depth: usize,
217 inodes: InodeFilter,
218 throttle: Option<Throttle>,
219 skip_root: bool,
220 use_root_path: bool,
221 pub event_rx: Receiver<TraversalEvent>,
223}
224
225impl BackgroundTraversal {
226 pub fn start(
229 root_idx: TreeIndex,
230 walk_options: &WalkOptions,
231 input: Vec<PathBuf>,
232 skip_root: bool,
233 use_root_path: bool,
234 ) -> anyhow::Result<BackgroundTraversal> {
235 let (entry_tx, entry_rx) = crossbeam::channel::bounded(100);
236 std::thread::Builder::new()
237 .name("dua-fs-walk-dispatcher".to_string())
238 .spawn({
239 let walk_options = walk_options.clone();
240 let mut io_errors: u64 = 0;
241 move || {
242 for root_path in input.into_iter() {
243 log::info!("Walking {root_path:?}");
244 let device_id = match crossdev::init(root_path.as_ref()) {
245 Ok(id) => id,
246 Err(_) => {
247 io_errors += 1;
248 continue;
249 }
250 };
251
252 let root_path = Arc::new(root_path);
253 for entry in walk_options
254 .iter_from_path(root_path.as_ref(), device_id, skip_root)
255 .into_iter()
256 {
257 if entry_tx
258 .send(TraversalEvent::Entry(
259 entry,
260 Arc::clone(&root_path),
261 device_id,
262 ))
263 .is_err()
264 {
265 return;
268 }
269 }
270 }
271 if entry_tx.send(TraversalEvent::Finished(io_errors)).is_err() {
272 log::error!("Failed to send TraversalEvents::Finished event");
273 }
274 }
275 })?;
276
277 Ok(Self {
278 walk_options: walk_options.clone(),
279 root_idx,
280 stats: TraversalStats::default(),
281 previous_node_idx: root_idx,
282 parent_node_idx: root_idx,
283 previous_node_size: 0,
284 parent_node_size: 0,
285 parent_node_size_per_depth_level: Vec::new(),
286 directory_info_per_depth_level: Vec::new(),
287 current_directory_at_depth: EntryInfo::default(),
288 previous_depth: 0,
289 inodes: InodeFilter::default(),
290 throttle: Some(Throttle::new(Duration::from_millis(250), None)),
291 skip_root,
292 use_root_path,
293 event_rx: entry_rx,
294 })
295 }
296
297 pub fn integrate_traversal_event(
305 &mut self,
306 traversal: &mut Traversal,
307 event: TraversalEvent,
308 ) -> Option<bool> {
309 match event {
310 TraversalEvent::Entry(entry, root_path, device_id) => {
311 self.stats.entries_traversed += 1;
312 let mut data = EntryData::default();
313 match entry {
314 Ok(mut entry) => {
315 if self.skip_root {
316 entry.depth -= 1;
317 data.name = entry.file_name.into()
318 } else {
319 data.name = if entry.depth < 1 && self.use_root_path {
320 (*root_path).clone()
321 } else {
322 entry.file_name.into()
323 }
324 }
325
326 let mut file_size = 0u128;
327 let mut mtime: SystemTime = UNIX_EPOCH;
328 let mut file_count = 0u64;
329 match &entry.client_state {
330 Some(Ok(m)) => {
331 if self.walk_options.count_hard_links
332 || self.inodes.add(m)
333 && (self.walk_options.cross_filesystems
334 || crossdev::is_same_device(device_id, m))
335 {
336 file_count = 1;
337 if self.walk_options.apparent_size {
338 file_size = m.len() as u128;
339 } else {
340 file_size = size_on_disk(&entry.parent_path, &data.name, m)
341 .unwrap_or_else(|_| {
342 self.stats.io_errors += 1;
343 data.metadata_io_error = true;
344 0
345 })
346 as u128;
347 }
348 } else {
349 data.entry_count = Some(0);
350 data.is_dir = true;
351 }
352
353 match m.modified() {
354 Ok(modified) => {
355 mtime = modified;
356 }
357 Err(_) => {
358 self.stats.io_errors += 1;
359 data.metadata_io_error = true;
360 }
361 }
362 }
363 Some(Err(_)) => {
364 self.stats.io_errors += 1;
365 data.metadata_io_error = true;
366 }
367 None => {}
368 }
369
370 match (entry.depth, self.previous_depth) {
371 (n, p) if n > p => {
372 self.directory_info_per_depth_level
373 .push(self.current_directory_at_depth);
374 self.current_directory_at_depth = EntryInfo {
375 size: file_size,
376 entries_count: Some(file_count),
377 };
378
379 self.parent_node_size_per_depth_level
380 .push(self.previous_node_size);
381
382 self.parent_node_idx = self.previous_node_idx;
383 self.parent_node_size = self.previous_node_size;
384 }
385 (n, p) if n < p => {
386 for _ in n..p {
387 set_entry_info_or_panic(
388 &mut traversal.tree,
389 self.parent_node_idx,
390 self.parent_node_size,
391 self.current_directory_at_depth,
392 );
393 let dir_info =
394 pop_or_panic(&mut self.directory_info_per_depth_level);
395
396 self.current_directory_at_depth.size += dir_info.size;
397 self.current_directory_at_depth.add_count(&dir_info);
398
399 self.parent_node_idx =
400 parent_or_panic(&mut traversal.tree, self.parent_node_idx);
401 self.parent_node_size =
402 pop_or_panic(&mut self.parent_node_size_per_depth_level);
403 }
404 self.current_directory_at_depth.size += file_size;
405 *self
406 .current_directory_at_depth
407 .entries_count
408 .get_or_insert(0) += file_count;
409 set_entry_info_or_panic(
410 &mut traversal.tree,
411 self.parent_node_idx,
412 self.parent_node_size,
413 self.current_directory_at_depth,
414 );
415 }
416 _ => {
417 self.current_directory_at_depth.size += file_size;
418 *self
419 .current_directory_at_depth
420 .entries_count
421 .get_or_insert(0) += file_count;
422 }
423 };
424
425 data.mtime = mtime;
426 data.size = file_size;
427 let entry_index = traversal.tree.add_node(data);
428
429 traversal
430 .tree
431 .add_edge(self.parent_node_idx, entry_index, ());
432 self.previous_node_idx = entry_index;
433 self.previous_node_size = file_size;
434 self.previous_depth = entry.depth;
435 }
436 Err(_) => {
437 if self.previous_depth == 0 {
438 data.name.clone_from(&(*root_path));
439 let entry_index = traversal.tree.add_node(data);
440 traversal
441 .tree
442 .add_edge(self.parent_node_idx, entry_index, ());
443 }
444
445 self.stats.io_errors += 1
446 }
447 }
448
449 if self.throttle.as_ref().is_some_and(|t| t.can_update()) {
450 return Some(false);
451 }
452 }
453 TraversalEvent::Finished(io_errors) => {
454 self.stats.io_errors += io_errors;
455
456 self.throttle = None;
457 self.directory_info_per_depth_level
458 .push(self.current_directory_at_depth);
459 self.current_directory_at_depth = EntryInfo::default();
460 for _ in 0..self.previous_depth {
461 let dir_info = pop_or_panic(&mut self.directory_info_per_depth_level);
462 self.current_directory_at_depth.size += dir_info.size;
463 self.current_directory_at_depth.add_count(&dir_info);
464
465 set_entry_info_or_panic(
466 &mut traversal.tree,
467 self.parent_node_idx,
468 self.parent_node_size,
469 self.current_directory_at_depth,
470 );
471 self.parent_node_idx =
472 parent_or_panic(&mut traversal.tree, self.parent_node_idx);
473 }
474 let root_size = traversal.recompute_node_size(self.root_idx);
475 set_entry_info_or_panic(
476 &mut traversal.tree,
477 self.root_idx,
478 root_size,
479 EntryInfo {
480 size: root_size,
481 entries_count: (self.stats.entries_traversed > 0)
482 .then_some(self.stats.entries_traversed),
483 },
484 );
485 self.stats.total_bytes = Some(root_size);
486 self.stats.elapsed = Some(self.stats.start.elapsed());
487
488 return Some(true);
489 }
490 }
491 None
492 }
493}
494
495#[cfg(not(windows))]
496fn size_on_disk(_parent: &Path, name: &Path, meta: &Metadata) -> io::Result<u64> {
498 name.size_on_disk_fast(meta)
499}
500
501#[cfg(windows)]
502fn size_on_disk(parent: &Path, name: &Path, meta: &Metadata) -> io::Result<u64> {
504 parent.join(name).size_on_disk_fast(meta)
505}
506
507#[cfg(test)]
508mod tests {
509 use super::*;
510
511 #[test]
512 fn size_of_entry_data() {
513 assert!(
514 std::mem::size_of::<EntryData>() <= 80,
515 "the size of this ({}) should not exceed 80 as it affects overall memory consumption",
516 std::mem::size_of::<EntryData>()
517 );
518 }
519}