Skip to main content

parallel_disk_usage/
fs_tree_builder.rs

1use super::{
2    data_tree::DataTree,
3    device::DeviceBoundary,
4    get_size::GetSize,
5    hardlink::{RecordHardlinks, RecordHardlinksArgument},
6    os_string_display::OsStringDisplay,
7    reporter::{error_report::Operation::*, ErrorReport, Event, Reporter},
8    size,
9    tree_builder::{Info, TreeBuilder},
10};
11use device_id::get_device_id;
12use pipe_trait::Pipe;
13use std::{
14    fs::{read_dir, symlink_metadata},
15    path::PathBuf,
16};
17
18/// Build a [`DataTree`] from a directory tree using [`From`] or [`Into`].
19///
20/// **Example:**
21///
22/// ```no_run
23/// # use parallel_disk_usage::fs_tree_builder::FsTreeBuilder;
24/// use parallel_disk_usage::{
25///     data_tree::DataTree,
26///     device::DeviceBoundary,
27///     get_size::GetApparentSize,
28///     os_string_display::OsStringDisplay,
29///     reporter::{ErrorOnlyReporter, ErrorReport},
30///     size::Bytes,
31///     hardlink::HardlinkIgnorant,
32/// };
33/// let builder = FsTreeBuilder {
34///     root: std::env::current_dir().unwrap(),
35///     hardlinks_recorder: &HardlinkIgnorant,
36///     size_getter: GetApparentSize,
37///     reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT),
38///     device_boundary: DeviceBoundary::Cross,
39///     max_depth: 10,
40/// };
41/// let data_tree: DataTree<OsStringDisplay, Bytes> = builder.into();
42/// ```
43#[derive(Debug)]
44pub struct FsTreeBuilder<'a, Size, SizeGetter, HardlinksRecorder, Report>
45where
46    Report: Reporter<Size> + Sync + ?Sized,
47    Size: size::Size + Send + Sync,
48    SizeGetter: GetSize<Size = Size> + Sync,
49    HardlinksRecorder: RecordHardlinks<Size, Report> + Sync + ?Sized,
50{
51    /// Root of the directory tree.
52    pub root: PathBuf,
53    /// Returns size of an item.
54    pub size_getter: SizeGetter,
55    /// Handle to detect and record hardlinks.
56    pub hardlinks_recorder: &'a HardlinksRecorder,
57    /// Reports progress to external system.
58    pub reporter: &'a Report,
59    /// Whether to cross device boundary into a different filesystem.
60    pub device_boundary: DeviceBoundary,
61    /// Deepest level of descendant display in the graph. The sizes beyond the max depth still count toward total.
62    pub max_depth: u64,
63}
64
65impl<'a, Size, SizeGetter, HardlinksRecorder, Report>
66    From<FsTreeBuilder<'a, Size, SizeGetter, HardlinksRecorder, Report>>
67    for DataTree<OsStringDisplay, Size>
68where
69    Report: Reporter<Size> + Sync + ?Sized,
70    Size: size::Size + Send + Sync,
71    SizeGetter: GetSize<Size = Size> + Sync,
72    HardlinksRecorder: RecordHardlinks<Size, Report> + Sync + ?Sized,
73{
74    /// Create a [`DataTree`] from an [`FsTreeBuilder`].
75    fn from(builder: FsTreeBuilder<Size, SizeGetter, HardlinksRecorder, Report>) -> Self {
76        let FsTreeBuilder {
77            root,
78            size_getter,
79            hardlinks_recorder,
80            reporter,
81            device_boundary,
82            max_depth,
83        } = builder;
84
85        // `root` would be inspected multiple times, but its impact on performance is insignificant
86        // before the (usually) massive fs tree `root` contains.
87        let root_dev = match device_boundary {
88            DeviceBoundary::Cross => None,
89            DeviceBoundary::Stay => match symlink_metadata(&root) {
90                Err(error) => {
91                    reporter.report(Event::EncounterError(ErrorReport {
92                        operation: SymlinkMetadata,
93                        path: &root,
94                        error,
95                    }));
96                    return DataTree::file(OsStringDisplay::os_string_from(&root), Size::default());
97                }
98                Ok(stats) => Some(get_device_id(&stats)),
99            },
100        };
101
102        TreeBuilder::<PathBuf, OsStringDisplay, Size, _, _> {
103            name: OsStringDisplay::os_string_from(&root),
104
105            path: root,
106
107            get_info: |path| {
108                let (is_dir, size, same_device) = match symlink_metadata(path) {
109                    Err(error) => {
110                        reporter.report(Event::EncounterError(ErrorReport {
111                            operation: SymlinkMetadata,
112                            path,
113                            error,
114                        }));
115                        return Info {
116                            size: Size::default(),
117                            children: Vec::new(),
118                        };
119                    }
120                    Ok(stats) => {
121                        // `stats` should be dropped ASAP to avoid piling up kernel memory usage
122                        let is_dir = stats.is_dir();
123                        let same_device =
124                            root_dev.is_none_or(|root_dev| get_device_id(&stats) == root_dev);
125                        let size = size_getter.get_size(&stats);
126                        reporter.report(Event::ReceiveData(size));
127                        hardlinks_recorder
128                            .record_hardlinks(RecordHardlinksArgument::new(
129                                path, &stats, size, reporter,
130                            ))
131                            .ok(); // ignore the error for now
132                        (is_dir, size, same_device)
133                    }
134                };
135
136                let children: Vec<_> = if is_dir && same_device {
137                    match read_dir(path) {
138                        Err(error) => {
139                            reporter.report(Event::EncounterError(ErrorReport {
140                                operation: ReadDirectory,
141                                path,
142                                error,
143                            }));
144                            return Info {
145                                size,
146                                children: Vec::new(),
147                            };
148                        }
149                        Ok(entries) => entries,
150                    }
151                    .filter_map(|entry| match entry {
152                        Err(error) => {
153                            reporter.report(Event::EncounterError(ErrorReport {
154                                operation: AccessEntry,
155                                path,
156                                error,
157                            }));
158                            None
159                        }
160                        Ok(entry) => entry.file_name().pipe(OsStringDisplay::from).pipe(Some),
161                    })
162                    .collect()
163                } else {
164                    Vec::new()
165                };
166
167                Info { size, children }
168            },
169
170            join_path: |prefix, name| prefix.join(&name.0),
171
172            max_depth,
173        }
174        .into()
175    }
176}
177
178mod device_id;