parallel_disk_usage/app/
sub.rs

1use crate::{
2    args::{Depth, Fraction},
3    data_tree::DataTree,
4    fs_tree_builder::FsTreeBuilder,
5    get_size::GetSize,
6    hardlink::{DeduplicateSharedSize, HardlinkIgnorant, RecordHardlinks},
7    json_data::{BinaryVersion, JsonData, JsonDataBody, JsonShared, JsonTree, SchemaVersion},
8    os_string_display::OsStringDisplay,
9    reporter::ParallelReporter,
10    runtime_error::RuntimeError,
11    size,
12    status_board::GLOBAL_STATUS_BOARD,
13    visualizer::{BarAlignment, ColumnWidthDistribution, Direction, Visualizer},
14};
15use pipe_trait::Pipe;
16use serde::Serialize;
17use std::{io::stdout, iter::once, path::PathBuf};
18
19/// The sub program of the main application.
20pub struct Sub<Size, SizeGetter, HardlinksHandler, Report>
21where
22    Report: ParallelReporter<Size> + Sync,
23    Size: size::Size + Into<u64> + Serialize + Send + Sync,
24    SizeGetter: GetSize<Size = Size> + Copy + Sync,
25    HardlinksHandler: RecordHardlinks<Size, Report> + HardlinkSubroutines<Size> + Sync,
26    JsonTree<Size>: Into<JsonDataBody>,
27{
28    /// List of files and/or directories.
29    pub files: Vec<PathBuf>,
30    /// Print JSON data instead of an ASCII chart.
31    pub json_output: Option<JsonOutputParam>,
32    /// Format to be used to [`display`](size::Size::display) the sizes returned by [`size_getter`](Self::size_getter).
33    pub bytes_format: Size::DisplayFormat,
34    /// The direction of the visualization.
35    pub direction: Direction,
36    /// The alignment of the bars.
37    pub bar_alignment: BarAlignment,
38    /// Distribution and number of characters/blocks can be placed in a line.
39    pub column_width_distribution: ColumnWidthDistribution,
40    /// Maximum number of levels that should be visualized.
41    pub max_depth: Depth,
42    /// [Get the size](GetSize) of files/directories.
43    pub size_getter: SizeGetter,
44    /// Handle to detect, record, and deduplicate hardlinks.
45    pub hardlinks_handler: HardlinksHandler,
46    /// Reports measurement progress.
47    pub reporter: Report,
48    /// Minimal size proportion required to appear.
49    pub min_ratio: Fraction,
50    /// Preserve order of entries.
51    pub no_sort: bool,
52}
53
54impl<Size, SizeGetter, HardlinksHandler, Report> Sub<Size, SizeGetter, HardlinksHandler, Report>
55where
56    Size: size::Size + Into<u64> + Serialize + Send + Sync,
57    Report: ParallelReporter<Size> + Sync,
58    SizeGetter: GetSize<Size = Size> + Copy + Sync,
59    HardlinksHandler: RecordHardlinks<Size, Report> + HardlinkSubroutines<Size> + Sync,
60    JsonTree<Size>: Into<JsonDataBody>,
61{
62    /// Run the sub program.
63    pub fn run(self) -> Result<(), RuntimeError> {
64        let Sub {
65            files,
66            json_output,
67            bytes_format,
68            direction,
69            bar_alignment,
70            column_width_distribution,
71            max_depth,
72            size_getter,
73            hardlinks_handler,
74            reporter,
75            min_ratio,
76            no_sort,
77        } = self;
78
79        let max_depth = max_depth.get();
80
81        let mut iter = files
82            .into_iter()
83            .map(|root| -> DataTree<OsStringDisplay, Size> {
84                FsTreeBuilder {
85                    reporter: &reporter,
86                    root,
87                    size_getter,
88                    hardlinks_recorder: &hardlinks_handler,
89                    max_depth,
90                }
91                .into()
92            });
93
94        let data_tree = if let Some(data_tree) = iter.next() {
95            data_tree
96        } else {
97            return Sub {
98                files: vec![".".into()],
99                hardlinks_handler,
100                reporter,
101                ..self
102            }
103            .run();
104        };
105
106        let only_one_arg = iter.len() == 0; // ExactSizeIterator::is_empty is unstable
107        let data_tree = if only_one_arg {
108            data_tree
109        } else {
110            let children: Vec<_> = once(data_tree).chain(iter).collect();
111
112            // This name is for hardlinks deduplication to work correctly as empty string is considered to be the start of any path.
113            // It would be changed into "(total)" later.
114            let fake_root_name = OsStringDisplay::os_string_from("");
115
116            DataTree::dir(fake_root_name, Size::default(), children)
117                .into_par_retained(|_, depth| depth + 1 < max_depth)
118        };
119
120        if reporter.destroy().is_err() {
121            eprintln!("[warning] Failed to destroy the thread that reports progress");
122        }
123
124        let min_ratio: f32 = min_ratio.into();
125        let (data_tree, deduplication_record) = {
126            let mut data_tree = data_tree;
127            if min_ratio > 0.0 {
128                data_tree.par_cull_insignificant_data(min_ratio);
129            }
130            if !no_sort {
131                data_tree.par_sort_by(|left, right| left.size().cmp(&right.size()).reverse());
132            }
133            let deduplication_record = hardlinks_handler.deduplicate(&mut data_tree);
134            if !only_one_arg {
135                assert_eq!(data_tree.name().as_os_str().to_str(), Some(""));
136                *data_tree.name_mut() = OsStringDisplay::os_string_from("(total)");
137            }
138            (data_tree, deduplication_record)
139        };
140
141        GLOBAL_STATUS_BOARD.clear_line(0);
142
143        if let Some(json_output) = json_output {
144            let JsonOutputParam {
145                shared_details,
146                shared_summary,
147            } = json_output;
148            let tree = data_tree
149                .into_reflection() // I really want to use std::mem::transmute here but can't.
150                .par_convert_names_to_utf8() // TODO: allow non-UTF8 somehow.
151                .expect("convert all names from raw string to UTF-8");
152
153            let deduplication_result = if !shared_details && !shared_summary {
154                Ok(JsonShared::default())
155            } else {
156                // `try` expression would be extremely useful right now but it sadly requires nightly
157                || -> Result<_, RuntimeError> {
158                    let mut shared = deduplication_record
159                        .map_err(HardlinksHandler::convert_error)?
160                        .pipe(HardlinksHandler::json_report)?
161                        .unwrap_or_default();
162                    if !shared_details {
163                        shared.details = None;
164                    }
165                    if !shared_summary {
166                        shared.summary = None;
167                    }
168                    Ok(shared)
169                }()
170            };
171
172            // errors caused by failing deduplication shouldn't prevent the JSON data from being printed
173            let (shared, deduplication_result) = match deduplication_result {
174                Ok(shared) => (shared, Ok(())),
175                Err(error) => (JsonShared::default(), Err(error)),
176            };
177
178            let json_tree = JsonTree { tree, shared };
179            let json_data = JsonData {
180                schema_version: SchemaVersion,
181                binary_version: Some(BinaryVersion::current()),
182                body: json_tree.into(),
183            };
184
185            return serde_json::to_writer(stdout(), &json_data)
186                .map_err(RuntimeError::SerializationFailure)
187                .or(deduplication_result);
188        }
189
190        let visualizer = Visualizer {
191            data_tree: &data_tree,
192            bytes_format,
193            direction,
194            bar_alignment,
195            column_width_distribution,
196        };
197
198        print!("{visualizer}"); // visualizer already ends with "\n", println! isn't needed here.
199
200        let deduplication_record = deduplication_record.map_err(HardlinksHandler::convert_error)?;
201        HardlinksHandler::print_report(deduplication_record, bytes_format)?;
202
203        Ok(())
204    }
205}
206
207/// Value to pass to [`Sub::json_output`] to decide how much details should be
208/// put in the output JSON object.
209#[derive(Debug, Clone, Copy)]
210pub struct JsonOutputParam {
211    /// Whether to include `.shared.details` in the JSON output.
212    pub shared_details: bool,
213    /// Whether to include `.shared.summary` in the JSON output.
214    pub shared_summary: bool,
215}
216
217impl JsonOutputParam {
218    /// Infer from the CLI flags.
219    pub(super) fn from_cli_flags(
220        output_json: bool,
221        omit_shared_details: bool,
222        omit_shared_summary: bool,
223    ) -> Option<Self> {
224        output_json.then_some(JsonOutputParam {
225            shared_details: !omit_shared_details,
226            shared_summary: !omit_shared_summary,
227        })
228    }
229}
230
231/// Subroutines used by [`Sub`] to deduplicate sizes of detected hardlinks and report about it.
232pub trait HardlinkSubroutines<Size: size::Size>: DeduplicateSharedSize<Size> {
233    /// Convert the error to runtime error.
234    fn convert_error(error: Self::Error) -> RuntimeError;
235    /// Handle the report.
236    fn print_report(
237        report: Self::Report,
238        bytes_format: Size::DisplayFormat,
239    ) -> Result<(), RuntimeError>;
240    /// Create a JSON serializable object from the report.
241    fn json_report(report: Self::Report) -> Result<Option<JsonShared<Size>>, RuntimeError>;
242}
243
244impl<Size> HardlinkSubroutines<Size> for HardlinkIgnorant
245where
246    DataTree<OsStringDisplay, Size>: Send,
247    Size: size::Size + Sync,
248{
249    #[inline]
250    fn convert_error(error: Self::Error) -> RuntimeError {
251        match error {}
252    }
253
254    #[inline]
255    fn print_report((): Self::Report, _: Size::DisplayFormat) -> Result<(), RuntimeError> {
256        Ok(())
257    }
258
259    #[inline]
260    fn json_report((): Self::Report) -> Result<Option<JsonShared<Size>>, RuntimeError> {
261        Ok(None)
262    }
263}
264
265#[cfg(unix)]
266mod unix_ext;