dua/
traverse.rs

1use crate::{crossdev, get_size_or_panic, inodefilter::InodeFilter, Throttle, WalkOptions};
2
3use crossbeam::channel::Receiver;
4use filesize::PathExt;
5use petgraph::{graph::NodeIndex, stable_graph::StableGraph, Directed, Direction};
6use std::{
7    fmt,
8    fs::Metadata,
9    io,
10    path::{Path, PathBuf},
11    sync::Arc,
12    time::{Duration, SystemTime, UNIX_EPOCH},
13};
14
15pub type TreeIndex = NodeIndex;
16pub type Tree = StableGraph<EntryData, (), Directed>;
17
18#[derive(Eq, PartialEq, Clone)]
19pub struct EntryData {
20    pub name: PathBuf,
21    /// The entry's size in bytes. If it's a directory, the size is the aggregated file size of all children
22    /// plus the  size of the directory entry itself
23    pub size: u128,
24    pub mtime: SystemTime,
25    pub entry_count: Option<u64>,
26    /// If set, the item meta-data could not be obtained
27    pub metadata_io_error: bool,
28    pub is_dir: bool,
29}
30
31impl Default for EntryData {
32    fn default() -> EntryData {
33        EntryData {
34            name: PathBuf::default(),
35            size: u128::default(),
36            mtime: UNIX_EPOCH,
37            entry_count: None,
38            metadata_io_error: bool::default(),
39            is_dir: false,
40        }
41    }
42}
43
44impl fmt::Debug for EntryData {
45    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
46        f.debug_struct("EntryData")
47            .field("name", &self.name)
48            .field("size", &self.size)
49            .field("entry_count", &self.entry_count)
50            // Skip mtime
51            .field("metadata_io_error", &self.metadata_io_error)
52            .finish()
53    }
54}
55
56/// The result of the previous filesystem traversal
57#[derive(Debug)]
58pub struct Traversal {
59    /// A tree representing the entire filestem traversal
60    pub tree: Tree,
61    /// The top-level node of the tree.
62    pub root_index: TreeIndex,
63}
64
65impl Default for Traversal {
66    fn default() -> Self {
67        Self::new()
68    }
69}
70
71impl Traversal {
72    pub fn new() -> Self {
73        let mut tree = Tree::new();
74        let root_index = tree.add_node(EntryData::default());
75        Self { tree, root_index }
76    }
77
78    pub fn recompute_node_size(&self, node_index: TreeIndex) -> u128 {
79        self.tree
80            .neighbors_directed(node_index, Direction::Outgoing)
81            .map(|idx| get_size_or_panic(&self.tree, idx))
82            .sum()
83    }
84}
85
86#[derive(Clone, Copy)]
87pub struct TraversalStats {
88    /// Amount of files or directories we have seen during the filesystem traversal
89    pub entries_traversed: u64,
90    /// The time at which the traversal started.
91    pub start: std::time::Instant,
92    /// The amount of time it took to finish the traversal. Set only once done.
93    pub elapsed: Option<std::time::Duration>,
94    /// Total amount of IO errors encountered when traversing the filesystem
95    pub io_errors: u64,
96    /// Total amount of bytes seen during the traversal
97    pub total_bytes: Option<u128>,
98}
99
100impl Default for TraversalStats {
101    fn default() -> Self {
102        Self {
103            entries_traversed: 0,
104            start: std::time::Instant::now(),
105            elapsed: None,
106            io_errors: 0,
107            total_bytes: None,
108        }
109    }
110}
111
112#[derive(Default, Copy, Clone)]
113pub struct EntryInfo {
114    pub size: u128,
115    pub entries_count: Option<u64>,
116}
117
118impl EntryInfo {
119    pub fn add_count(&mut self, other: &Self) {
120        self.entries_count = match (self.entries_count, other.entries_count) {
121            (Some(a), Some(b)) => Some(a + b),
122            (None, Some(b)) => Some(b),
123            (Some(a), None) => Some(a),
124            (None, None) => None,
125        };
126    }
127}
128
129pub fn set_entry_info_or_panic(
130    tree: &mut Tree,
131    node_idx: TreeIndex,
132    node_own_size: u128,
133    EntryInfo {
134        size,
135        entries_count,
136    }: EntryInfo,
137) {
138    let node = tree
139        .node_weight_mut(node_idx)
140        .expect("node for parent index we just retrieved");
141    node.size = size + node_own_size;
142    node.entry_count = entries_count.map(|n| n + 1);
143}
144
145pub fn parent_or_panic(tree: &mut Tree, parent_node_idx: TreeIndex) -> TreeIndex {
146    tree.neighbors_directed(parent_node_idx, Direction::Incoming)
147        .next()
148        .expect("every node in the iteration has a parent")
149}
150
151pub fn pop_or_panic<T>(v: &mut Vec<T>) -> T {
152    v.pop().expect("sizes per level to be in sync with graph")
153}
154
155pub type TraversalEntry =
156    Result<jwalk::DirEntry<((), Option<Result<std::fs::Metadata, jwalk::Error>>)>, jwalk::Error>;
157
158#[allow(clippy::large_enum_variant)]
159pub enum TraversalEvent {
160    Entry(TraversalEntry, Arc<PathBuf>, u64),
161    Finished(u64),
162}
163
164/// An in-progress traversal which exposes newly obtained entries
165pub struct BackgroundTraversal {
166    walk_options: WalkOptions,
167    pub root_idx: TreeIndex,
168    pub stats: TraversalStats,
169    previous_node_idx: TreeIndex,
170    parent_node_idx: TreeIndex,
171    directory_info_per_depth_level: Vec<EntryInfo>,
172    current_directory_at_depth: EntryInfo,
173    parent_node_size: u128,
174    parent_node_size_per_depth_level: Vec<u128>,
175    previous_node_size: u128,
176    previous_depth: usize,
177    inodes: InodeFilter,
178    throttle: Option<Throttle>,
179    skip_root: bool,
180    use_root_path: bool,
181    pub event_rx: Receiver<TraversalEvent>,
182}
183
184impl BackgroundTraversal {
185    /// Start a background thread to perform the actual tree walk, and dispatch the results
186    /// as events to be received on [BackgroundTraversal::event_rx].
187    pub fn start(
188        root_idx: TreeIndex,
189        walk_options: &WalkOptions,
190        input: Vec<PathBuf>,
191        skip_root: bool,
192        use_root_path: bool,
193    ) -> anyhow::Result<BackgroundTraversal> {
194        let (entry_tx, entry_rx) = crossbeam::channel::bounded(100);
195        std::thread::Builder::new()
196            .name("dua-fs-walk-dispatcher".to_string())
197            .spawn({
198                let walk_options = walk_options.clone();
199                let mut io_errors: u64 = 0;
200                move || {
201                    for root_path in input.into_iter() {
202                        log::info!("Walking {root_path:?}");
203                        let device_id = match crossdev::init(root_path.as_ref()) {
204                            Ok(id) => id,
205                            Err(_) => {
206                                io_errors += 1;
207                                continue;
208                            }
209                        };
210
211                        let root_path = Arc::new(root_path);
212                        for entry in walk_options
213                            .iter_from_path(root_path.as_ref(), device_id, skip_root)
214                            .into_iter()
215                        {
216                            if entry_tx
217                                .send(TraversalEvent::Entry(
218                                    entry,
219                                    Arc::clone(&root_path),
220                                    device_id,
221                                ))
222                                .is_err()
223                            {
224                                // The channel is closed, this means the user has
225                                // requested to quit the app. Abort the walking.
226                                return;
227                            }
228                        }
229                    }
230                    if entry_tx.send(TraversalEvent::Finished(io_errors)).is_err() {
231                        log::error!("Failed to send TraversalEvents::Finished event");
232                    }
233                }
234            })?;
235
236        Ok(Self {
237            walk_options: walk_options.clone(),
238            root_idx,
239            stats: TraversalStats::default(),
240            previous_node_idx: root_idx,
241            parent_node_idx: root_idx,
242            previous_node_size: 0,
243            parent_node_size: 0,
244            parent_node_size_per_depth_level: Vec::new(),
245            directory_info_per_depth_level: Vec::new(),
246            current_directory_at_depth: EntryInfo::default(),
247            previous_depth: 0,
248            inodes: InodeFilter::default(),
249            throttle: Some(Throttle::new(Duration::from_millis(250), None)),
250            skip_root,
251            use_root_path,
252            event_rx: entry_rx,
253        })
254    }
255
256    /// Integrate `event` into traversal `t` so its information is represented by it.
257    /// This builds the traversal tree from a directory-walk.
258    ///
259    /// Returns
260    /// * `Some(true)` if the traversal is finished
261    /// * `Some(false)` if the caller may update its state after throttling kicked in
262    /// * `None` - the event was written into the traversal, but there is nothing else to do
263    pub fn integrate_traversal_event(
264        &mut self,
265        traversal: &mut Traversal,
266        event: TraversalEvent,
267    ) -> Option<bool> {
268        match event {
269            TraversalEvent::Entry(entry, root_path, device_id) => {
270                self.stats.entries_traversed += 1;
271                let mut data = EntryData::default();
272                match entry {
273                    Ok(mut entry) => {
274                        if self.skip_root {
275                            entry.depth -= 1;
276                            data.name = entry.file_name.into()
277                        } else {
278                            data.name = if entry.depth < 1 && self.use_root_path {
279                                (*root_path).clone()
280                            } else {
281                                entry.file_name.into()
282                            }
283                        }
284
285                        let mut file_size = 0u128;
286                        let mut mtime: SystemTime = UNIX_EPOCH;
287                        let mut file_count = 0u64;
288                        match &entry.client_state {
289                            Some(Ok(ref m)) => {
290                                if self.walk_options.count_hard_links
291                                    || self.inodes.add(m)
292                                        && (self.walk_options.cross_filesystems
293                                            || crossdev::is_same_device(device_id, m))
294                                {
295                                    file_count = 1;
296                                    if self.walk_options.apparent_size {
297                                        file_size = m.len() as u128;
298                                    } else {
299                                        file_size = size_on_disk(&entry.parent_path, &data.name, m)
300                                            .unwrap_or_else(|_| {
301                                                self.stats.io_errors += 1;
302                                                data.metadata_io_error = true;
303                                                0
304                                            })
305                                            as u128;
306                                    }
307                                } else {
308                                    data.entry_count = Some(0);
309                                    data.is_dir = true;
310                                }
311
312                                match m.modified() {
313                                    Ok(modified) => {
314                                        mtime = modified;
315                                    }
316                                    Err(_) => {
317                                        self.stats.io_errors += 1;
318                                        data.metadata_io_error = true;
319                                    }
320                                }
321                            }
322                            Some(Err(_)) => {
323                                self.stats.io_errors += 1;
324                                data.metadata_io_error = true;
325                            }
326                            None => {}
327                        }
328
329                        match (entry.depth, self.previous_depth) {
330                            (n, p) if n > p => {
331                                self.directory_info_per_depth_level
332                                    .push(self.current_directory_at_depth);
333                                self.current_directory_at_depth = EntryInfo {
334                                    size: file_size,
335                                    entries_count: Some(file_count),
336                                };
337
338                                self.parent_node_size_per_depth_level
339                                    .push(self.previous_node_size);
340
341                                self.parent_node_idx = self.previous_node_idx;
342                                self.parent_node_size = self.previous_node_size;
343                            }
344                            (n, p) if n < p => {
345                                for _ in n..p {
346                                    set_entry_info_or_panic(
347                                        &mut traversal.tree,
348                                        self.parent_node_idx,
349                                        self.parent_node_size,
350                                        self.current_directory_at_depth,
351                                    );
352                                    let dir_info =
353                                        pop_or_panic(&mut self.directory_info_per_depth_level);
354
355                                    self.current_directory_at_depth.size += dir_info.size;
356                                    self.current_directory_at_depth.add_count(&dir_info);
357
358                                    self.parent_node_idx =
359                                        parent_or_panic(&mut traversal.tree, self.parent_node_idx);
360                                    self.parent_node_size =
361                                        pop_or_panic(&mut self.parent_node_size_per_depth_level);
362                                }
363                                self.current_directory_at_depth.size += file_size;
364                                *self
365                                    .current_directory_at_depth
366                                    .entries_count
367                                    .get_or_insert(0) += file_count;
368                                set_entry_info_or_panic(
369                                    &mut traversal.tree,
370                                    self.parent_node_idx,
371                                    self.parent_node_size,
372                                    self.current_directory_at_depth,
373                                );
374                            }
375                            _ => {
376                                self.current_directory_at_depth.size += file_size;
377                                *self
378                                    .current_directory_at_depth
379                                    .entries_count
380                                    .get_or_insert(0) += file_count;
381                            }
382                        };
383
384                        data.mtime = mtime;
385                        data.size = file_size;
386                        let entry_index = traversal.tree.add_node(data);
387
388                        traversal
389                            .tree
390                            .add_edge(self.parent_node_idx, entry_index, ());
391                        self.previous_node_idx = entry_index;
392                        self.previous_node_size = file_size;
393                        self.previous_depth = entry.depth;
394                    }
395                    Err(_) => {
396                        if self.previous_depth == 0 {
397                            data.name.clone_from(&(*root_path));
398                            let entry_index = traversal.tree.add_node(data);
399                            traversal
400                                .tree
401                                .add_edge(self.parent_node_idx, entry_index, ());
402                        }
403
404                        self.stats.io_errors += 1
405                    }
406                }
407
408                if self.throttle.as_ref().is_some_and(|t| t.can_update()) {
409                    return Some(false);
410                }
411            }
412            TraversalEvent::Finished(io_errors) => {
413                self.stats.io_errors += io_errors;
414
415                self.throttle = None;
416                self.directory_info_per_depth_level
417                    .push(self.current_directory_at_depth);
418                self.current_directory_at_depth = EntryInfo::default();
419                for _ in 0..self.previous_depth {
420                    let dir_info = pop_or_panic(&mut self.directory_info_per_depth_level);
421                    self.current_directory_at_depth.size += dir_info.size;
422                    self.current_directory_at_depth.add_count(&dir_info);
423
424                    set_entry_info_or_panic(
425                        &mut traversal.tree,
426                        self.parent_node_idx,
427                        self.parent_node_size,
428                        self.current_directory_at_depth,
429                    );
430                    self.parent_node_idx =
431                        parent_or_panic(&mut traversal.tree, self.parent_node_idx);
432                }
433                let root_size = traversal.recompute_node_size(self.root_idx);
434                set_entry_info_or_panic(
435                    &mut traversal.tree,
436                    self.root_idx,
437                    root_size,
438                    EntryInfo {
439                        size: root_size,
440                        entries_count: (self.stats.entries_traversed > 0)
441                            .then_some(self.stats.entries_traversed),
442                    },
443                );
444                self.stats.total_bytes = Some(root_size);
445                self.stats.elapsed = Some(self.stats.start.elapsed());
446
447                return Some(true);
448            }
449        }
450        None
451    }
452}
453
454#[cfg(not(windows))]
455pub fn size_on_disk(_parent: &Path, name: &Path, meta: &Metadata) -> io::Result<u64> {
456    name.size_on_disk_fast(meta)
457}
458
459#[cfg(windows)]
460pub fn size_on_disk(parent: &Path, name: &Path, meta: &Metadata) -> io::Result<u64> {
461    parent.join(name).size_on_disk_fast(meta)
462}
463
464#[cfg(test)]
465mod tests {
466    use super::*;
467
468    #[test]
469    fn size_of_entry_data() {
470        assert!(
471            std::mem::size_of::<EntryData>() <= 80,
472            "the size of this ({}) should not exceed 80 as it affects overall memory consumption",
473            std::mem::size_of::<EntryData>()
474        );
475    }
476}