dua/
traverse.rs

1use crate::{crossdev, get_size_or_panic, inodefilter::InodeFilter, Throttle, WalkOptions};
2
3use crossbeam::channel::Receiver;
4use filesize::PathExt;
5use petgraph::{graph::NodeIndex, stable_graph::StableGraph, Directed, Direction};
6use std::{
7    fmt,
8    fs::Metadata,
9    io,
10    path::{Path, PathBuf},
11    sync::Arc,
12    time::{Duration, SystemTime, UNIX_EPOCH},
13};
14
15pub type TreeIndex = NodeIndex;
16pub type Tree = StableGraph<EntryData, (), Directed>;
17
18#[derive(Eq, PartialEq, Clone)]
19pub struct EntryData {
20    pub name: PathBuf,
21    /// The entry's size in bytes. If it's a directory, the size is the aggregated file size of all children
22    pub size: u128,
23    pub mtime: SystemTime,
24    pub entry_count: Option<u64>,
25    /// If set, the item meta-data could not be obtained
26    pub metadata_io_error: bool,
27    pub is_dir: bool,
28}
29
30impl Default for EntryData {
31    fn default() -> EntryData {
32        EntryData {
33            name: PathBuf::default(),
34            size: u128::default(),
35            mtime: UNIX_EPOCH,
36            entry_count: None,
37            metadata_io_error: bool::default(),
38            is_dir: false,
39        }
40    }
41}
42
43impl fmt::Debug for EntryData {
44    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
45        f.debug_struct("EntryData")
46            .field("name", &self.name)
47            .field("size", &self.size)
48            .field("entry_count", &self.entry_count)
49            // Skip mtime
50            .field("metadata_io_error", &self.metadata_io_error)
51            .finish()
52    }
53}
54
55/// The result of the previous filesystem traversal
56#[derive(Debug)]
57pub struct Traversal {
58    /// A tree representing the entire filestem traversal
59    pub tree: Tree,
60    /// The top-level node of the tree.
61    pub root_index: TreeIndex,
62}
63
64impl Default for Traversal {
65    fn default() -> Self {
66        Self::new()
67    }
68}
69
70impl Traversal {
71    pub fn new() -> Self {
72        let mut tree = Tree::new();
73        let root_index = tree.add_node(EntryData::default());
74        Self { tree, root_index }
75    }
76
77    pub fn recompute_node_size(&self, node_index: TreeIndex) -> u128 {
78        self.tree
79            .neighbors_directed(node_index, Direction::Outgoing)
80            .map(|idx| get_size_or_panic(&self.tree, idx))
81            .sum()
82    }
83}
84
85#[derive(Clone, Copy)]
86pub struct TraversalStats {
87    /// Amount of files or directories we have seen during the filesystem traversal
88    pub entries_traversed: u64,
89    /// The time at which the traversal started.
90    pub start: std::time::Instant,
91    /// The amount of time it took to finish the traversal. Set only once done.
92    pub elapsed: Option<std::time::Duration>,
93    /// Total amount of IO errors encountered when traversing the filesystem
94    pub io_errors: u64,
95    /// Total amount of bytes seen during the traversal
96    pub total_bytes: Option<u128>,
97}
98
99impl Default for TraversalStats {
100    fn default() -> Self {
101        Self {
102            entries_traversed: 0,
103            start: std::time::Instant::now(),
104            elapsed: None,
105            io_errors: 0,
106            total_bytes: None,
107        }
108    }
109}
110
111#[derive(Default, Copy, Clone)]
112pub struct EntryInfo {
113    pub size: u128,
114    pub entries_count: Option<u64>,
115}
116
117impl EntryInfo {
118    pub fn add_count(&mut self, other: &Self) {
119        self.entries_count = match (self.entries_count, other.entries_count) {
120            (Some(a), Some(b)) => Some(a + b),
121            (None, Some(b)) => Some(b),
122            (Some(a), None) => Some(a),
123            (None, None) => None,
124        };
125    }
126}
127
128pub fn set_entry_info_or_panic(
129    tree: &mut Tree,
130    node_idx: TreeIndex,
131    EntryInfo {
132        size,
133        entries_count,
134    }: EntryInfo,
135) {
136    let node = tree
137        .node_weight_mut(node_idx)
138        .expect("node for parent index we just retrieved");
139    node.size = size;
140    node.entry_count = entries_count;
141}
142
143pub fn parent_or_panic(tree: &mut Tree, parent_node_idx: TreeIndex) -> TreeIndex {
144    tree.neighbors_directed(parent_node_idx, Direction::Incoming)
145        .next()
146        .expect("every node in the iteration has a parent")
147}
148
149pub fn pop_or_panic(v: &mut Vec<EntryInfo>) -> EntryInfo {
150    v.pop().expect("sizes per level to be in sync with graph")
151}
152
153pub type TraversalEntry =
154    Result<jwalk::DirEntry<((), Option<Result<std::fs::Metadata, jwalk::Error>>)>, jwalk::Error>;
155
156#[allow(clippy::large_enum_variant)]
157pub enum TraversalEvent {
158    Entry(TraversalEntry, Arc<PathBuf>, u64),
159    Finished(u64),
160}
161
162/// An in-progress traversal which exposes newly obtained entries
163pub struct BackgroundTraversal {
164    walk_options: WalkOptions,
165    pub root_idx: TreeIndex,
166    pub stats: TraversalStats,
167    previous_node_idx: TreeIndex,
168    parent_node_idx: TreeIndex,
169    directory_info_per_depth_level: Vec<EntryInfo>,
170    current_directory_at_depth: EntryInfo,
171    previous_depth: usize,
172    inodes: InodeFilter,
173    throttle: Option<Throttle>,
174    skip_root: bool,
175    use_root_path: bool,
176    pub event_rx: Receiver<TraversalEvent>,
177}
178
179impl BackgroundTraversal {
180    /// Start a background thread to perform the actual tree walk, and dispatch the results
181    /// as events to be received on [BackgroundTraversal::event_rx].
182    pub fn start(
183        root_idx: TreeIndex,
184        walk_options: &WalkOptions,
185        input: Vec<PathBuf>,
186        skip_root: bool,
187        use_root_path: bool,
188    ) -> anyhow::Result<BackgroundTraversal> {
189        let (entry_tx, entry_rx) = crossbeam::channel::bounded(100);
190        std::thread::Builder::new()
191            .name("dua-fs-walk-dispatcher".to_string())
192            .spawn({
193                let walk_options = walk_options.clone();
194                let mut io_errors: u64 = 0;
195                move || {
196                    for root_path in input.into_iter() {
197                        log::info!("Walking {root_path:?}");
198                        let device_id = match crossdev::init(root_path.as_ref()) {
199                            Ok(id) => id,
200                            Err(_) => {
201                                io_errors += 1;
202                                continue;
203                            }
204                        };
205
206                        let root_path = Arc::new(root_path);
207                        for entry in walk_options
208                            .iter_from_path(root_path.as_ref(), device_id, skip_root)
209                            .into_iter()
210                        {
211                            if entry_tx
212                                .send(TraversalEvent::Entry(
213                                    entry,
214                                    Arc::clone(&root_path),
215                                    device_id,
216                                ))
217                                .is_err()
218                            {
219                                // The channel is closed, this means the user has
220                                // requested to quit the app. Abort the walking.
221                                return;
222                            }
223                        }
224                    }
225                    if entry_tx.send(TraversalEvent::Finished(io_errors)).is_err() {
226                        log::error!("Failed to send TraversalEvents::Finished event");
227                    }
228                }
229            })?;
230
231        Ok(Self {
232            walk_options: walk_options.clone(),
233            root_idx,
234            stats: TraversalStats::default(),
235            previous_node_idx: root_idx,
236            parent_node_idx: root_idx,
237            directory_info_per_depth_level: Vec::new(),
238            current_directory_at_depth: EntryInfo::default(),
239            previous_depth: 0,
240            inodes: InodeFilter::default(),
241            throttle: Some(Throttle::new(Duration::from_millis(250), None)),
242            skip_root,
243            use_root_path,
244            event_rx: entry_rx,
245        })
246    }
247
248    /// Integrate `event` into traversal `t` so its information is represented by it.
249    /// This builds the traversal tree from a directory-walk.
250    ///
251    /// Returns
252    /// * `Some(true)` if the traversal is finished
253    /// * `Some(false)` if the caller may update its state after throttling kicked in
254    /// * `None` - the event was written into the traversal, but there is nothing else to do
255    pub fn integrate_traversal_event(
256        &mut self,
257        traversal: &mut Traversal,
258        event: TraversalEvent,
259    ) -> Option<bool> {
260        match event {
261            TraversalEvent::Entry(entry, root_path, device_id) => {
262                self.stats.entries_traversed += 1;
263                let mut data = EntryData::default();
264                match entry {
265                    Ok(mut entry) => {
266                        if self.skip_root {
267                            entry.depth -= 1;
268                            data.name = entry.file_name.into()
269                        } else {
270                            data.name = if entry.depth < 1 && self.use_root_path {
271                                (*root_path).clone()
272                            } else {
273                                entry.file_name.into()
274                            }
275                        }
276
277                        let mut file_size = 0u128;
278                        let mut mtime: SystemTime = UNIX_EPOCH;
279                        let mut file_count = 0u64;
280                        match &entry.client_state {
281                            Some(Ok(ref m)) => {
282                                if !m.is_dir()
283                                    && (self.walk_options.count_hard_links || self.inodes.add(m))
284                                    && (self.walk_options.cross_filesystems
285                                        || crossdev::is_same_device(device_id, m))
286                                {
287                                    file_count = 1;
288                                    if self.walk_options.apparent_size {
289                                        file_size = m.len() as u128;
290                                    } else {
291                                        file_size = size_on_disk(&entry.parent_path, &data.name, m)
292                                            .unwrap_or_else(|_| {
293                                                self.stats.io_errors += 1;
294                                                data.metadata_io_error = true;
295                                                0
296                                            })
297                                            as u128;
298                                    }
299                                } else {
300                                    data.entry_count = Some(0);
301                                    data.is_dir = true;
302                                }
303
304                                match m.modified() {
305                                    Ok(modified) => {
306                                        mtime = modified;
307                                    }
308                                    Err(_) => {
309                                        self.stats.io_errors += 1;
310                                        data.metadata_io_error = true;
311                                    }
312                                }
313                            }
314                            Some(Err(_)) => {
315                                self.stats.io_errors += 1;
316                                data.metadata_io_error = true;
317                            }
318                            None => {}
319                        }
320
321                        match (entry.depth, self.previous_depth) {
322                            (n, p) if n > p => {
323                                self.directory_info_per_depth_level
324                                    .push(self.current_directory_at_depth);
325                                self.current_directory_at_depth = EntryInfo {
326                                    size: file_size,
327                                    entries_count: Some(file_count),
328                                };
329                                self.parent_node_idx = self.previous_node_idx;
330                            }
331                            (n, p) if n < p => {
332                                for _ in n..p {
333                                    set_entry_info_or_panic(
334                                        &mut traversal.tree,
335                                        self.parent_node_idx,
336                                        self.current_directory_at_depth,
337                                    );
338                                    let dir_info =
339                                        pop_or_panic(&mut self.directory_info_per_depth_level);
340
341                                    self.current_directory_at_depth.size += dir_info.size;
342                                    self.current_directory_at_depth.add_count(&dir_info);
343
344                                    self.parent_node_idx =
345                                        parent_or_panic(&mut traversal.tree, self.parent_node_idx);
346                                }
347                                self.current_directory_at_depth.size += file_size;
348                                *self
349                                    .current_directory_at_depth
350                                    .entries_count
351                                    .get_or_insert(0) += file_count;
352                                set_entry_info_or_panic(
353                                    &mut traversal.tree,
354                                    self.parent_node_idx,
355                                    self.current_directory_at_depth,
356                                );
357                            }
358                            _ => {
359                                self.current_directory_at_depth.size += file_size;
360                                *self
361                                    .current_directory_at_depth
362                                    .entries_count
363                                    .get_or_insert(0) += file_count;
364                            }
365                        };
366
367                        data.mtime = mtime;
368                        data.size = file_size;
369                        let entry_index = traversal.tree.add_node(data);
370
371                        traversal
372                            .tree
373                            .add_edge(self.parent_node_idx, entry_index, ());
374                        self.previous_node_idx = entry_index;
375                        self.previous_depth = entry.depth;
376                    }
377                    Err(_) => {
378                        if self.previous_depth == 0 {
379                            data.name.clone_from(&(*root_path));
380                            let entry_index = traversal.tree.add_node(data);
381                            traversal
382                                .tree
383                                .add_edge(self.parent_node_idx, entry_index, ());
384                        }
385
386                        self.stats.io_errors += 1
387                    }
388                }
389
390                if self.throttle.as_ref().is_some_and(|t| t.can_update()) {
391                    return Some(false);
392                }
393            }
394            TraversalEvent::Finished(io_errors) => {
395                self.stats.io_errors += io_errors;
396
397                self.throttle = None;
398                self.directory_info_per_depth_level
399                    .push(self.current_directory_at_depth);
400                self.current_directory_at_depth = EntryInfo::default();
401                for _ in 0..self.previous_depth {
402                    let dir_info = pop_or_panic(&mut self.directory_info_per_depth_level);
403                    self.current_directory_at_depth.size += dir_info.size;
404                    self.current_directory_at_depth.add_count(&dir_info);
405
406                    set_entry_info_or_panic(
407                        &mut traversal.tree,
408                        self.parent_node_idx,
409                        self.current_directory_at_depth,
410                    );
411                    self.parent_node_idx =
412                        parent_or_panic(&mut traversal.tree, self.parent_node_idx);
413                }
414                let root_size = traversal.recompute_node_size(self.root_idx);
415                set_entry_info_or_panic(
416                    &mut traversal.tree,
417                    self.root_idx,
418                    EntryInfo {
419                        size: root_size,
420                        entries_count: (self.stats.entries_traversed > 0)
421                            .then_some(self.stats.entries_traversed),
422                    },
423                );
424                self.stats.total_bytes = Some(root_size);
425                self.stats.elapsed = Some(self.stats.start.elapsed());
426
427                return Some(true);
428            }
429        }
430        None
431    }
432}
433
434#[cfg(not(windows))]
435pub fn size_on_disk(_parent: &Path, name: &Path, meta: &Metadata) -> io::Result<u64> {
436    name.size_on_disk_fast(meta)
437}
438
439#[cfg(windows)]
440pub fn size_on_disk(parent: &Path, name: &Path, meta: &Metadata) -> io::Result<u64> {
441    parent.join(name).size_on_disk_fast(meta)
442}
443
444#[cfg(test)]
445mod tests {
446    use super::*;
447
448    #[test]
449    fn size_of_entry_data() {
450        assert!(
451            std::mem::size_of::<EntryData>() <= 80,
452            "the size of this ({}) should not exceed 80 as it affects overall memory consumption",
453            std::mem::size_of::<EntryData>()
454        );
455    }
456}