1use crate::{crossdev, get_size_or_panic, inodefilter::InodeFilter, Throttle, WalkOptions};
2
3use crossbeam::channel::Receiver;
4use filesize::PathExt;
5use petgraph::{graph::NodeIndex, stable_graph::StableGraph, Directed, Direction};
6use std::{
7 fmt,
8 fs::Metadata,
9 io,
10 path::{Path, PathBuf},
11 sync::Arc,
12 time::{Duration, SystemTime, UNIX_EPOCH},
13};
14
15pub type TreeIndex = NodeIndex;
16pub type Tree = StableGraph<EntryData, (), Directed>;
17
18#[derive(Eq, PartialEq, Clone)]
19pub struct EntryData {
20 pub name: PathBuf,
21 pub size: u128,
23 pub mtime: SystemTime,
24 pub entry_count: Option<u64>,
25 pub metadata_io_error: bool,
27 pub is_dir: bool,
28}
29
30impl Default for EntryData {
31 fn default() -> EntryData {
32 EntryData {
33 name: PathBuf::default(),
34 size: u128::default(),
35 mtime: UNIX_EPOCH,
36 entry_count: None,
37 metadata_io_error: bool::default(),
38 is_dir: false,
39 }
40 }
41}
42
43impl fmt::Debug for EntryData {
44 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
45 f.debug_struct("EntryData")
46 .field("name", &self.name)
47 .field("size", &self.size)
48 .field("entry_count", &self.entry_count)
49 .field("metadata_io_error", &self.metadata_io_error)
51 .finish()
52 }
53}
54
55#[derive(Debug)]
57pub struct Traversal {
58 pub tree: Tree,
60 pub root_index: TreeIndex,
62}
63
64impl Default for Traversal {
65 fn default() -> Self {
66 Self::new()
67 }
68}
69
70impl Traversal {
71 pub fn new() -> Self {
72 let mut tree = Tree::new();
73 let root_index = tree.add_node(EntryData::default());
74 Self { tree, root_index }
75 }
76
77 pub fn recompute_node_size(&self, node_index: TreeIndex) -> u128 {
78 self.tree
79 .neighbors_directed(node_index, Direction::Outgoing)
80 .map(|idx| get_size_or_panic(&self.tree, idx))
81 .sum()
82 }
83}
84
85#[derive(Clone, Copy)]
86pub struct TraversalStats {
87 pub entries_traversed: u64,
89 pub start: std::time::Instant,
91 pub elapsed: Option<std::time::Duration>,
93 pub io_errors: u64,
95 pub total_bytes: Option<u128>,
97}
98
99impl Default for TraversalStats {
100 fn default() -> Self {
101 Self {
102 entries_traversed: 0,
103 start: std::time::Instant::now(),
104 elapsed: None,
105 io_errors: 0,
106 total_bytes: None,
107 }
108 }
109}
110
111#[derive(Default, Copy, Clone)]
112pub struct EntryInfo {
113 pub size: u128,
114 pub entries_count: Option<u64>,
115}
116
117impl EntryInfo {
118 pub fn add_count(&mut self, other: &Self) {
119 self.entries_count = match (self.entries_count, other.entries_count) {
120 (Some(a), Some(b)) => Some(a + b),
121 (None, Some(b)) => Some(b),
122 (Some(a), None) => Some(a),
123 (None, None) => None,
124 };
125 }
126}
127
128pub fn set_entry_info_or_panic(
129 tree: &mut Tree,
130 node_idx: TreeIndex,
131 EntryInfo {
132 size,
133 entries_count,
134 }: EntryInfo,
135) {
136 let node = tree
137 .node_weight_mut(node_idx)
138 .expect("node for parent index we just retrieved");
139 node.size = size;
140 node.entry_count = entries_count;
141}
142
143pub fn parent_or_panic(tree: &mut Tree, parent_node_idx: TreeIndex) -> TreeIndex {
144 tree.neighbors_directed(parent_node_idx, Direction::Incoming)
145 .next()
146 .expect("every node in the iteration has a parent")
147}
148
149pub fn pop_or_panic(v: &mut Vec<EntryInfo>) -> EntryInfo {
150 v.pop().expect("sizes per level to be in sync with graph")
151}
152
153pub type TraversalEntry =
154 Result<jwalk::DirEntry<((), Option<Result<std::fs::Metadata, jwalk::Error>>)>, jwalk::Error>;
155
156#[allow(clippy::large_enum_variant)]
157pub enum TraversalEvent {
158 Entry(TraversalEntry, Arc<PathBuf>, u64),
159 Finished(u64),
160}
161
162pub struct BackgroundTraversal {
164 walk_options: WalkOptions,
165 pub root_idx: TreeIndex,
166 pub stats: TraversalStats,
167 previous_node_idx: TreeIndex,
168 parent_node_idx: TreeIndex,
169 directory_info_per_depth_level: Vec<EntryInfo>,
170 current_directory_at_depth: EntryInfo,
171 previous_depth: usize,
172 inodes: InodeFilter,
173 throttle: Option<Throttle>,
174 skip_root: bool,
175 use_root_path: bool,
176 pub event_rx: Receiver<TraversalEvent>,
177}
178
179impl BackgroundTraversal {
180 pub fn start(
183 root_idx: TreeIndex,
184 walk_options: &WalkOptions,
185 input: Vec<PathBuf>,
186 skip_root: bool,
187 use_root_path: bool,
188 ) -> anyhow::Result<BackgroundTraversal> {
189 let (entry_tx, entry_rx) = crossbeam::channel::bounded(100);
190 std::thread::Builder::new()
191 .name("dua-fs-walk-dispatcher".to_string())
192 .spawn({
193 let walk_options = walk_options.clone();
194 let mut io_errors: u64 = 0;
195 move || {
196 for root_path in input.into_iter() {
197 log::info!("Walking {root_path:?}");
198 let device_id = match crossdev::init(root_path.as_ref()) {
199 Ok(id) => id,
200 Err(_) => {
201 io_errors += 1;
202 continue;
203 }
204 };
205
206 let root_path = Arc::new(root_path);
207 for entry in walk_options
208 .iter_from_path(root_path.as_ref(), device_id, skip_root)
209 .into_iter()
210 {
211 if entry_tx
212 .send(TraversalEvent::Entry(
213 entry,
214 Arc::clone(&root_path),
215 device_id,
216 ))
217 .is_err()
218 {
219 return;
222 }
223 }
224 }
225 if entry_tx.send(TraversalEvent::Finished(io_errors)).is_err() {
226 log::error!("Failed to send TraversalEvents::Finished event");
227 }
228 }
229 })?;
230
231 Ok(Self {
232 walk_options: walk_options.clone(),
233 root_idx,
234 stats: TraversalStats::default(),
235 previous_node_idx: root_idx,
236 parent_node_idx: root_idx,
237 directory_info_per_depth_level: Vec::new(),
238 current_directory_at_depth: EntryInfo::default(),
239 previous_depth: 0,
240 inodes: InodeFilter::default(),
241 throttle: Some(Throttle::new(Duration::from_millis(250), None)),
242 skip_root,
243 use_root_path,
244 event_rx: entry_rx,
245 })
246 }
247
248 pub fn integrate_traversal_event(
256 &mut self,
257 traversal: &mut Traversal,
258 event: TraversalEvent,
259 ) -> Option<bool> {
260 match event {
261 TraversalEvent::Entry(entry, root_path, device_id) => {
262 self.stats.entries_traversed += 1;
263 let mut data = EntryData::default();
264 match entry {
265 Ok(mut entry) => {
266 if self.skip_root {
267 entry.depth -= 1;
268 data.name = entry.file_name.into()
269 } else {
270 data.name = if entry.depth < 1 && self.use_root_path {
271 (*root_path).clone()
272 } else {
273 entry.file_name.into()
274 }
275 }
276
277 let mut file_size = 0u128;
278 let mut mtime: SystemTime = UNIX_EPOCH;
279 let mut file_count = 0u64;
280 match &entry.client_state {
281 Some(Ok(ref m)) => {
282 if !m.is_dir()
283 && (self.walk_options.count_hard_links || self.inodes.add(m))
284 && (self.walk_options.cross_filesystems
285 || crossdev::is_same_device(device_id, m))
286 {
287 file_count = 1;
288 if self.walk_options.apparent_size {
289 file_size = m.len() as u128;
290 } else {
291 file_size = size_on_disk(&entry.parent_path, &data.name, m)
292 .unwrap_or_else(|_| {
293 self.stats.io_errors += 1;
294 data.metadata_io_error = true;
295 0
296 })
297 as u128;
298 }
299 } else {
300 data.entry_count = Some(0);
301 data.is_dir = true;
302 }
303
304 match m.modified() {
305 Ok(modified) => {
306 mtime = modified;
307 }
308 Err(_) => {
309 self.stats.io_errors += 1;
310 data.metadata_io_error = true;
311 }
312 }
313 }
314 Some(Err(_)) => {
315 self.stats.io_errors += 1;
316 data.metadata_io_error = true;
317 }
318 None => {}
319 }
320
321 match (entry.depth, self.previous_depth) {
322 (n, p) if n > p => {
323 self.directory_info_per_depth_level
324 .push(self.current_directory_at_depth);
325 self.current_directory_at_depth = EntryInfo {
326 size: file_size,
327 entries_count: Some(file_count),
328 };
329 self.parent_node_idx = self.previous_node_idx;
330 }
331 (n, p) if n < p => {
332 for _ in n..p {
333 set_entry_info_or_panic(
334 &mut traversal.tree,
335 self.parent_node_idx,
336 self.current_directory_at_depth,
337 );
338 let dir_info =
339 pop_or_panic(&mut self.directory_info_per_depth_level);
340
341 self.current_directory_at_depth.size += dir_info.size;
342 self.current_directory_at_depth.add_count(&dir_info);
343
344 self.parent_node_idx =
345 parent_or_panic(&mut traversal.tree, self.parent_node_idx);
346 }
347 self.current_directory_at_depth.size += file_size;
348 *self
349 .current_directory_at_depth
350 .entries_count
351 .get_or_insert(0) += file_count;
352 set_entry_info_or_panic(
353 &mut traversal.tree,
354 self.parent_node_idx,
355 self.current_directory_at_depth,
356 );
357 }
358 _ => {
359 self.current_directory_at_depth.size += file_size;
360 *self
361 .current_directory_at_depth
362 .entries_count
363 .get_or_insert(0) += file_count;
364 }
365 };
366
367 data.mtime = mtime;
368 data.size = file_size;
369 let entry_index = traversal.tree.add_node(data);
370
371 traversal
372 .tree
373 .add_edge(self.parent_node_idx, entry_index, ());
374 self.previous_node_idx = entry_index;
375 self.previous_depth = entry.depth;
376 }
377 Err(_) => {
378 if self.previous_depth == 0 {
379 data.name.clone_from(&(*root_path));
380 let entry_index = traversal.tree.add_node(data);
381 traversal
382 .tree
383 .add_edge(self.parent_node_idx, entry_index, ());
384 }
385
386 self.stats.io_errors += 1
387 }
388 }
389
390 if self.throttle.as_ref().is_some_and(|t| t.can_update()) {
391 return Some(false);
392 }
393 }
394 TraversalEvent::Finished(io_errors) => {
395 self.stats.io_errors += io_errors;
396
397 self.throttle = None;
398 self.directory_info_per_depth_level
399 .push(self.current_directory_at_depth);
400 self.current_directory_at_depth = EntryInfo::default();
401 for _ in 0..self.previous_depth {
402 let dir_info = pop_or_panic(&mut self.directory_info_per_depth_level);
403 self.current_directory_at_depth.size += dir_info.size;
404 self.current_directory_at_depth.add_count(&dir_info);
405
406 set_entry_info_or_panic(
407 &mut traversal.tree,
408 self.parent_node_idx,
409 self.current_directory_at_depth,
410 );
411 self.parent_node_idx =
412 parent_or_panic(&mut traversal.tree, self.parent_node_idx);
413 }
414 let root_size = traversal.recompute_node_size(self.root_idx);
415 set_entry_info_or_panic(
416 &mut traversal.tree,
417 self.root_idx,
418 EntryInfo {
419 size: root_size,
420 entries_count: (self.stats.entries_traversed > 0)
421 .then_some(self.stats.entries_traversed),
422 },
423 );
424 self.stats.total_bytes = Some(root_size);
425 self.stats.elapsed = Some(self.stats.start.elapsed());
426
427 return Some(true);
428 }
429 }
430 None
431 }
432}
433
434#[cfg(not(windows))]
435pub fn size_on_disk(_parent: &Path, name: &Path, meta: &Metadata) -> io::Result<u64> {
436 name.size_on_disk_fast(meta)
437}
438
439#[cfg(windows)]
440pub fn size_on_disk(parent: &Path, name: &Path, meta: &Metadata) -> io::Result<u64> {
441 parent.join(name).size_on_disk_fast(meta)
442}
443
444#[cfg(test)]
445mod tests {
446 use super::*;
447
448 #[test]
449 fn size_of_entry_data() {
450 assert!(
451 std::mem::size_of::<EntryData>() <= 80,
452 "the size of this ({}) should not exceed 80 as it affects overall memory consumption",
453 std::mem::size_of::<EntryData>()
454 );
455 }
456}