parallel_disk_usage/
fs_tree_builder.rs

1use super::{
2    data_tree::DataTree,
3    get_size::GetSize,
4    hardlink::{RecordHardlinks, RecordHardlinksArgument},
5    os_string_display::OsStringDisplay,
6    reporter::{error_report::Operation::*, ErrorReport, Event, Reporter},
7    size,
8    tree_builder::{Info, TreeBuilder},
9};
10use pipe_trait::Pipe;
11use std::{
12    fs::{read_dir, symlink_metadata},
13    path::PathBuf,
14};
15
16/// Build a [`DataTree`] from a directory tree using [`From`] or [`Into`].
17///
18/// **Example:**
19///
20/// ```no_run
21/// # use parallel_disk_usage::fs_tree_builder::FsTreeBuilder;
22/// use parallel_disk_usage::{
23///     data_tree::DataTree,
24///     get_size::GetApparentSize,
25///     os_string_display::OsStringDisplay,
26///     reporter::{ErrorOnlyReporter, ErrorReport},
27///     size::Bytes,
28///     hardlink::HardlinkIgnorant,
29/// };
30/// let builder = FsTreeBuilder {
31///     root: std::env::current_dir().unwrap(),
32///     hardlinks_recorder: &HardlinkIgnorant,
33///     size_getter: GetApparentSize,
34///     reporter: &ErrorOnlyReporter::new(ErrorReport::SILENT),
35///     max_depth: 10,
36/// };
37/// let data_tree: DataTree<OsStringDisplay, Bytes> = builder.into();
38/// ```
39#[derive(Debug)]
40pub struct FsTreeBuilder<'a, Size, SizeGetter, HardlinksRecorder, Report>
41where
42    Report: Reporter<Size> + Sync + ?Sized,
43    Size: size::Size + Send + Sync,
44    SizeGetter: GetSize<Size = Size> + Sync,
45    HardlinksRecorder: RecordHardlinks<Size, Report> + Sync + ?Sized,
46{
47    /// Root of the directory tree.
48    pub root: PathBuf,
49    /// Returns size of an item.
50    pub size_getter: SizeGetter,
51    /// Handle to detect and record hardlinks.
52    pub hardlinks_recorder: &'a HardlinksRecorder,
53    /// Reports progress to external system.
54    pub reporter: &'a Report,
55    /// Deepest level of descendent display in the graph. The sizes beyond the max depth still count toward total.
56    pub max_depth: u64,
57}
58
59impl<'a, Size, SizeGetter, HardlinksRecorder, Report>
60    From<FsTreeBuilder<'a, Size, SizeGetter, HardlinksRecorder, Report>>
61    for DataTree<OsStringDisplay, Size>
62where
63    Report: Reporter<Size> + Sync + ?Sized,
64    Size: size::Size + Send + Sync,
65    SizeGetter: GetSize<Size = Size> + Sync,
66    HardlinksRecorder: RecordHardlinks<Size, Report> + Sync + ?Sized,
67{
68    /// Create a [`DataTree`] from an [`FsTreeBuilder`].
69    fn from(builder: FsTreeBuilder<Size, SizeGetter, HardlinksRecorder, Report>) -> Self {
70        let FsTreeBuilder {
71            root,
72            size_getter,
73            hardlinks_recorder,
74            reporter,
75            max_depth,
76        } = builder;
77
78        TreeBuilder::<PathBuf, OsStringDisplay, Size, _, _> {
79            name: OsStringDisplay::os_string_from(&root),
80
81            path: root,
82
83            get_info: |path| {
84                let (is_dir, size) = match symlink_metadata(path) {
85                    Err(error) => {
86                        reporter.report(Event::EncounterError(ErrorReport {
87                            operation: SymlinkMetadata,
88                            path,
89                            error,
90                        }));
91                        return Info {
92                            size: Size::default(),
93                            children: Vec::new(),
94                        };
95                    }
96                    Ok(stats) => {
97                        // `stats` should be dropped ASAP to avoid piling up kernel memory usage
98                        let is_dir = stats.is_dir();
99                        let size = size_getter.get_size(&stats);
100                        reporter.report(Event::ReceiveData(size));
101                        hardlinks_recorder
102                            .record_hardlinks(RecordHardlinksArgument::new(
103                                path, &stats, size, reporter,
104                            ))
105                            .ok(); // ignore the error for now
106                        (is_dir, size)
107                    }
108                };
109
110                let children: Vec<_> = if is_dir {
111                    match read_dir(path) {
112                        Err(error) => {
113                            reporter.report(Event::EncounterError(ErrorReport {
114                                operation: ReadDirectory,
115                                path,
116                                error,
117                            }));
118                            return Info::default();
119                        }
120                        Ok(entries) => entries,
121                    }
122                    .filter_map(|entry| match entry {
123                        Err(error) => {
124                            reporter.report(Event::EncounterError(ErrorReport {
125                                operation: AccessEntry,
126                                path,
127                                error,
128                            }));
129                            None
130                        }
131                        Ok(entry) => entry.file_name().pipe(OsStringDisplay::from).pipe(Some),
132                    })
133                    .collect()
134                } else {
135                    Vec::new()
136                };
137
138                Info { size, children }
139            },
140
141            join_path: |prefix, name| prefix.join(&name.0),
142
143            max_depth,
144        }
145        .into()
146    }
147}