1use crate::{crossdev, get_size_or_panic, inodefilter::InodeFilter, Throttle, WalkOptions};
2
3use crossbeam::channel::Receiver;
4use filesize::PathExt;
5use petgraph::{graph::NodeIndex, stable_graph::StableGraph, Directed, Direction};
6use std::{
7 fmt,
8 fs::Metadata,
9 io,
10 path::{Path, PathBuf},
11 sync::Arc,
12 time::{Duration, SystemTime, UNIX_EPOCH},
13};
14
15pub type TreeIndex = NodeIndex;
16pub type Tree = StableGraph<EntryData, (), Directed>;
17
18#[derive(Eq, PartialEq, Clone)]
19pub struct EntryData {
20 pub name: PathBuf,
21 pub size: u128,
24 pub mtime: SystemTime,
25 pub entry_count: Option<u64>,
26 pub metadata_io_error: bool,
28 pub is_dir: bool,
29}
30
31impl Default for EntryData {
32 fn default() -> EntryData {
33 EntryData {
34 name: PathBuf::default(),
35 size: u128::default(),
36 mtime: UNIX_EPOCH,
37 entry_count: None,
38 metadata_io_error: bool::default(),
39 is_dir: false,
40 }
41 }
42}
43
44impl fmt::Debug for EntryData {
45 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
46 f.debug_struct("EntryData")
47 .field("name", &self.name)
48 .field("size", &self.size)
49 .field("entry_count", &self.entry_count)
50 .field("metadata_io_error", &self.metadata_io_error)
52 .finish()
53 }
54}
55
56#[derive(Debug)]
58pub struct Traversal {
59 pub tree: Tree,
61 pub root_index: TreeIndex,
63}
64
65impl Default for Traversal {
66 fn default() -> Self {
67 Self::new()
68 }
69}
70
71impl Traversal {
72 pub fn new() -> Self {
73 let mut tree = Tree::new();
74 let root_index = tree.add_node(EntryData::default());
75 Self { tree, root_index }
76 }
77
78 pub fn recompute_node_size(&self, node_index: TreeIndex) -> u128 {
79 self.tree
80 .neighbors_directed(node_index, Direction::Outgoing)
81 .map(|idx| get_size_or_panic(&self.tree, idx))
82 .sum()
83 }
84}
85
86#[derive(Clone, Copy)]
87pub struct TraversalStats {
88 pub entries_traversed: u64,
90 pub start: std::time::Instant,
92 pub elapsed: Option<std::time::Duration>,
94 pub io_errors: u64,
96 pub total_bytes: Option<u128>,
98}
99
100impl Default for TraversalStats {
101 fn default() -> Self {
102 Self {
103 entries_traversed: 0,
104 start: std::time::Instant::now(),
105 elapsed: None,
106 io_errors: 0,
107 total_bytes: None,
108 }
109 }
110}
111
112#[derive(Default, Copy, Clone)]
113pub struct EntryInfo {
114 pub size: u128,
115 pub entries_count: Option<u64>,
116}
117
118impl EntryInfo {
119 pub fn add_count(&mut self, other: &Self) {
120 self.entries_count = match (self.entries_count, other.entries_count) {
121 (Some(a), Some(b)) => Some(a + b),
122 (None, Some(b)) => Some(b),
123 (Some(a), None) => Some(a),
124 (None, None) => None,
125 };
126 }
127}
128
129pub fn set_entry_info_or_panic(
130 tree: &mut Tree,
131 node_idx: TreeIndex,
132 node_own_size: u128,
133 EntryInfo {
134 size,
135 entries_count,
136 }: EntryInfo,
137) {
138 let node = tree
139 .node_weight_mut(node_idx)
140 .expect("node for parent index we just retrieved");
141 node.size = size + node_own_size;
142 node.entry_count = entries_count.map(|n| n + 1);
143}
144
145pub fn parent_or_panic(tree: &mut Tree, parent_node_idx: TreeIndex) -> TreeIndex {
146 tree.neighbors_directed(parent_node_idx, Direction::Incoming)
147 .next()
148 .expect("every node in the iteration has a parent")
149}
150
151pub fn pop_or_panic<T>(v: &mut Vec<T>) -> T {
152 v.pop().expect("sizes per level to be in sync with graph")
153}
154
155pub type TraversalEntry =
156 Result<jwalk::DirEntry<((), Option<Result<std::fs::Metadata, jwalk::Error>>)>, jwalk::Error>;
157
158#[allow(clippy::large_enum_variant)]
159pub enum TraversalEvent {
160 Entry(TraversalEntry, Arc<PathBuf>, u64),
161 Finished(u64),
162}
163
164pub struct BackgroundTraversal {
166 walk_options: WalkOptions,
167 pub root_idx: TreeIndex,
168 pub stats: TraversalStats,
169 previous_node_idx: TreeIndex,
170 parent_node_idx: TreeIndex,
171 directory_info_per_depth_level: Vec<EntryInfo>,
172 current_directory_at_depth: EntryInfo,
173 parent_node_size: u128,
174 parent_node_size_per_depth_level: Vec<u128>,
175 previous_node_size: u128,
176 previous_depth: usize,
177 inodes: InodeFilter,
178 throttle: Option<Throttle>,
179 skip_root: bool,
180 use_root_path: bool,
181 pub event_rx: Receiver<TraversalEvent>,
182}
183
184impl BackgroundTraversal {
185 pub fn start(
188 root_idx: TreeIndex,
189 walk_options: &WalkOptions,
190 input: Vec<PathBuf>,
191 skip_root: bool,
192 use_root_path: bool,
193 ) -> anyhow::Result<BackgroundTraversal> {
194 let (entry_tx, entry_rx) = crossbeam::channel::bounded(100);
195 std::thread::Builder::new()
196 .name("dua-fs-walk-dispatcher".to_string())
197 .spawn({
198 let walk_options = walk_options.clone();
199 let mut io_errors: u64 = 0;
200 move || {
201 for root_path in input.into_iter() {
202 log::info!("Walking {root_path:?}");
203 let device_id = match crossdev::init(root_path.as_ref()) {
204 Ok(id) => id,
205 Err(_) => {
206 io_errors += 1;
207 continue;
208 }
209 };
210
211 let root_path = Arc::new(root_path);
212 for entry in walk_options
213 .iter_from_path(root_path.as_ref(), device_id, skip_root)
214 .into_iter()
215 {
216 if entry_tx
217 .send(TraversalEvent::Entry(
218 entry,
219 Arc::clone(&root_path),
220 device_id,
221 ))
222 .is_err()
223 {
224 return;
227 }
228 }
229 }
230 if entry_tx.send(TraversalEvent::Finished(io_errors)).is_err() {
231 log::error!("Failed to send TraversalEvents::Finished event");
232 }
233 }
234 })?;
235
236 Ok(Self {
237 walk_options: walk_options.clone(),
238 root_idx,
239 stats: TraversalStats::default(),
240 previous_node_idx: root_idx,
241 parent_node_idx: root_idx,
242 previous_node_size: 0,
243 parent_node_size: 0,
244 parent_node_size_per_depth_level: Vec::new(),
245 directory_info_per_depth_level: Vec::new(),
246 current_directory_at_depth: EntryInfo::default(),
247 previous_depth: 0,
248 inodes: InodeFilter::default(),
249 throttle: Some(Throttle::new(Duration::from_millis(250), None)),
250 skip_root,
251 use_root_path,
252 event_rx: entry_rx,
253 })
254 }
255
256 pub fn integrate_traversal_event(
264 &mut self,
265 traversal: &mut Traversal,
266 event: TraversalEvent,
267 ) -> Option<bool> {
268 match event {
269 TraversalEvent::Entry(entry, root_path, device_id) => {
270 self.stats.entries_traversed += 1;
271 let mut data = EntryData::default();
272 match entry {
273 Ok(mut entry) => {
274 if self.skip_root {
275 entry.depth -= 1;
276 data.name = entry.file_name.into()
277 } else {
278 data.name = if entry.depth < 1 && self.use_root_path {
279 (*root_path).clone()
280 } else {
281 entry.file_name.into()
282 }
283 }
284
285 let mut file_size = 0u128;
286 let mut mtime: SystemTime = UNIX_EPOCH;
287 let mut file_count = 0u64;
288 match &entry.client_state {
289 Some(Ok(ref m)) => {
290 if self.walk_options.count_hard_links
291 || self.inodes.add(m)
292 && (self.walk_options.cross_filesystems
293 || crossdev::is_same_device(device_id, m))
294 {
295 file_count = 1;
296 if self.walk_options.apparent_size {
297 file_size = m.len() as u128;
298 } else {
299 file_size = size_on_disk(&entry.parent_path, &data.name, m)
300 .unwrap_or_else(|_| {
301 self.stats.io_errors += 1;
302 data.metadata_io_error = true;
303 0
304 })
305 as u128;
306 }
307 } else {
308 data.entry_count = Some(0);
309 data.is_dir = true;
310 }
311
312 match m.modified() {
313 Ok(modified) => {
314 mtime = modified;
315 }
316 Err(_) => {
317 self.stats.io_errors += 1;
318 data.metadata_io_error = true;
319 }
320 }
321 }
322 Some(Err(_)) => {
323 self.stats.io_errors += 1;
324 data.metadata_io_error = true;
325 }
326 None => {}
327 }
328
329 match (entry.depth, self.previous_depth) {
330 (n, p) if n > p => {
331 self.directory_info_per_depth_level
332 .push(self.current_directory_at_depth);
333 self.current_directory_at_depth = EntryInfo {
334 size: file_size,
335 entries_count: Some(file_count),
336 };
337
338 self.parent_node_size_per_depth_level
339 .push(self.previous_node_size);
340
341 self.parent_node_idx = self.previous_node_idx;
342 self.parent_node_size = self.previous_node_size;
343 }
344 (n, p) if n < p => {
345 for _ in n..p {
346 set_entry_info_or_panic(
347 &mut traversal.tree,
348 self.parent_node_idx,
349 self.parent_node_size,
350 self.current_directory_at_depth,
351 );
352 let dir_info =
353 pop_or_panic(&mut self.directory_info_per_depth_level);
354
355 self.current_directory_at_depth.size += dir_info.size;
356 self.current_directory_at_depth.add_count(&dir_info);
357
358 self.parent_node_idx =
359 parent_or_panic(&mut traversal.tree, self.parent_node_idx);
360 self.parent_node_size =
361 pop_or_panic(&mut self.parent_node_size_per_depth_level);
362 }
363 self.current_directory_at_depth.size += file_size;
364 *self
365 .current_directory_at_depth
366 .entries_count
367 .get_or_insert(0) += file_count;
368 set_entry_info_or_panic(
369 &mut traversal.tree,
370 self.parent_node_idx,
371 self.parent_node_size,
372 self.current_directory_at_depth,
373 );
374 }
375 _ => {
376 self.current_directory_at_depth.size += file_size;
377 *self
378 .current_directory_at_depth
379 .entries_count
380 .get_or_insert(0) += file_count;
381 }
382 };
383
384 data.mtime = mtime;
385 data.size = file_size;
386 let entry_index = traversal.tree.add_node(data);
387
388 traversal
389 .tree
390 .add_edge(self.parent_node_idx, entry_index, ());
391 self.previous_node_idx = entry_index;
392 self.previous_node_size = file_size;
393 self.previous_depth = entry.depth;
394 }
395 Err(_) => {
396 if self.previous_depth == 0 {
397 data.name.clone_from(&(*root_path));
398 let entry_index = traversal.tree.add_node(data);
399 traversal
400 .tree
401 .add_edge(self.parent_node_idx, entry_index, ());
402 }
403
404 self.stats.io_errors += 1
405 }
406 }
407
408 if self.throttle.as_ref().is_some_and(|t| t.can_update()) {
409 return Some(false);
410 }
411 }
412 TraversalEvent::Finished(io_errors) => {
413 self.stats.io_errors += io_errors;
414
415 self.throttle = None;
416 self.directory_info_per_depth_level
417 .push(self.current_directory_at_depth);
418 self.current_directory_at_depth = EntryInfo::default();
419 for _ in 0..self.previous_depth {
420 let dir_info = pop_or_panic(&mut self.directory_info_per_depth_level);
421 self.current_directory_at_depth.size += dir_info.size;
422 self.current_directory_at_depth.add_count(&dir_info);
423
424 set_entry_info_or_panic(
425 &mut traversal.tree,
426 self.parent_node_idx,
427 self.parent_node_size,
428 self.current_directory_at_depth,
429 );
430 self.parent_node_idx =
431 parent_or_panic(&mut traversal.tree, self.parent_node_idx);
432 }
433 let root_size = traversal.recompute_node_size(self.root_idx);
434 set_entry_info_or_panic(
435 &mut traversal.tree,
436 self.root_idx,
437 root_size,
438 EntryInfo {
439 size: root_size,
440 entries_count: (self.stats.entries_traversed > 0)
441 .then_some(self.stats.entries_traversed),
442 },
443 );
444 self.stats.total_bytes = Some(root_size);
445 self.stats.elapsed = Some(self.stats.start.elapsed());
446
447 return Some(true);
448 }
449 }
450 None
451 }
452}
453
454#[cfg(not(windows))]
455pub fn size_on_disk(_parent: &Path, name: &Path, meta: &Metadata) -> io::Result<u64> {
456 name.size_on_disk_fast(meta)
457}
458
459#[cfg(windows)]
460pub fn size_on_disk(parent: &Path, name: &Path, meta: &Metadata) -> io::Result<u64> {
461 parent.join(name).size_on_disk_fast(meta)
462}
463
464#[cfg(test)]
465mod tests {
466 use super::*;
467
468 #[test]
469 fn size_of_entry_data() {
470 assert!(
471 std::mem::size_of::<EntryData>() <= 80,
472 "the size of this ({}) should not exceed 80 as it affects overall memory consumption",
473 std::mem::size_of::<EntryData>()
474 );
475 }
476}