Skip to main content

parallel_disk_usage/app/
sub.rs

1use crate::{
2    args::{Depth, Fraction},
3    data_tree::DataTree,
4    device::DeviceBoundary,
5    fs_tree_builder::FsTreeBuilder,
6    get_size::GetSize,
7    hardlink::{DeduplicateSharedSize, HardlinkIgnorant, RecordHardlinks},
8    json_data::{BinaryVersion, JsonData, JsonDataBody, JsonShared, JsonTree, SchemaVersion},
9    os_string_display::OsStringDisplay,
10    reporter::ParallelReporter,
11    runtime_error::RuntimeError,
12    size,
13    status_board::GLOBAL_STATUS_BOARD,
14    visualizer::{BarAlignment, ColumnWidthDistribution, Direction, Visualizer},
15};
16use pipe_trait::Pipe;
17use serde::Serialize;
18use std::{io::stdout, iter::once, path::PathBuf};
19
20/// The sub program of the main application.
21pub struct Sub<Size, SizeGetter, HardlinksHandler, Report>
22where
23    Report: ParallelReporter<Size> + Sync,
24    Size: size::Size + Into<u64> + Serialize + Send + Sync,
25    SizeGetter: GetSize<Size = Size> + Copy + Sync,
26    HardlinksHandler: RecordHardlinks<Size, Report> + HardlinkSubroutines<Size> + Sync,
27    JsonTree<Size>: Into<JsonDataBody>,
28{
29    /// List of files and/or directories.
30    pub files: Vec<PathBuf>,
31    /// Print JSON data instead of an ASCII chart.
32    pub json_output: Option<JsonOutputParam>,
33    /// Format to be used to [`display`](size::Size::display) the sizes returned by [`size_getter`](Self::size_getter).
34    pub bytes_format: Size::DisplayFormat,
35    /// The direction of the visualization.
36    pub direction: Direction,
37    /// The alignment of the bars.
38    pub bar_alignment: BarAlignment,
39    /// Distribution and number of characters/blocks can be placed in a line.
40    pub column_width_distribution: ColumnWidthDistribution,
41    /// Maximum number of levels that should be visualized.
42    pub max_depth: Depth,
43    /// [Get the size](GetSize) of files/directories.
44    pub size_getter: SizeGetter,
45    /// Handle to detect, record, and deduplicate hardlinks.
46    pub hardlinks_handler: HardlinksHandler,
47    /// Whether to cross device boundary into a different filesystem.
48    pub device_boundary: DeviceBoundary,
49    /// Reports measurement progress.
50    pub reporter: Report,
51    /// Minimal size proportion required to appear.
52    pub min_ratio: Fraction,
53    /// Preserve order of entries.
54    pub no_sort: bool,
55}
56
57impl<Size, SizeGetter, HardlinksHandler, Report> Sub<Size, SizeGetter, HardlinksHandler, Report>
58where
59    Size: size::Size + Into<u64> + Serialize + Send + Sync,
60    Report: ParallelReporter<Size> + Sync,
61    SizeGetter: GetSize<Size = Size> + Copy + Sync,
62    HardlinksHandler: RecordHardlinks<Size, Report> + HardlinkSubroutines<Size> + Sync,
63    JsonTree<Size>: Into<JsonDataBody>,
64{
65    /// Run the sub program.
66    pub fn run(self) -> Result<(), RuntimeError> {
67        let Sub {
68            files,
69            json_output,
70            bytes_format,
71            direction,
72            bar_alignment,
73            column_width_distribution,
74            max_depth,
75            size_getter,
76            hardlinks_handler,
77            device_boundary,
78            reporter,
79            min_ratio,
80            no_sort,
81        } = self;
82
83        let max_depth = max_depth.get();
84
85        let mut iter = files
86            .into_iter()
87            .map(|root| -> DataTree<OsStringDisplay, Size> {
88                FsTreeBuilder {
89                    reporter: &reporter,
90                    root,
91                    size_getter,
92                    hardlinks_recorder: &hardlinks_handler,
93                    device_boundary,
94                    max_depth,
95                }
96                .into()
97            });
98
99        let data_tree = if let Some(data_tree) = iter.next() {
100            data_tree
101        } else {
102            return Sub {
103                files: vec![".".into()],
104                hardlinks_handler,
105                reporter,
106                ..self
107            }
108            .run();
109        };
110
111        let only_one_arg = iter.len() == 0; // ExactSizeIterator::is_empty is unstable
112        let data_tree = if only_one_arg {
113            data_tree
114        } else {
115            let children: Vec<_> = once(data_tree).chain(iter).collect();
116
117            // This name is for hardlinks deduplication to work correctly as empty string is considered to be the start of any path.
118            // It would be changed into "(total)" later.
119            let fake_root_name = OsStringDisplay::os_string_from("");
120
121            DataTree::dir(fake_root_name, Size::default(), children)
122                .into_par_retained(|_, depth| depth + 1 < max_depth)
123        };
124
125        if reporter.destroy().is_err() {
126            eprintln!("[warning] Failed to destroy the thread that reports progress");
127        }
128
129        let min_ratio: f32 = min_ratio.into();
130        let (data_tree, deduplication_record) = {
131            let mut data_tree = data_tree;
132            if min_ratio > 0.0 {
133                data_tree.par_cull_insignificant_data(min_ratio);
134            }
135            if !no_sort {
136                data_tree.par_sort_by(|left, right| left.size().cmp(&right.size()).reverse());
137            }
138            let deduplication_record = hardlinks_handler.deduplicate(&mut data_tree);
139            if !only_one_arg {
140                assert_eq!(data_tree.name().as_os_str().to_str(), Some(""));
141                *data_tree.name_mut() = OsStringDisplay::os_string_from("(total)");
142            }
143            (data_tree, deduplication_record)
144        };
145
146        GLOBAL_STATUS_BOARD.clear_line(0);
147
148        if let Some(json_output) = json_output {
149            let JsonOutputParam {
150                shared_details,
151                shared_summary,
152            } = json_output;
153            let tree = data_tree
154                .into_reflection() // I really want to use std::mem::transmute here but can't.
155                .par_convert_names_to_utf8() // TODO: allow non-UTF8 somehow.
156                .expect("convert all names from raw string to UTF-8");
157
158            let deduplication_result = if !shared_details && !shared_summary {
159                Ok(JsonShared::default())
160            } else {
161                // `try` expression would be extremely useful right now but it sadly requires nightly
162                || -> Result<_, RuntimeError> {
163                    let mut shared = deduplication_record
164                        .map_err(HardlinksHandler::convert_error)?
165                        .pipe(HardlinksHandler::json_report)?
166                        .unwrap_or_default();
167                    if !shared_details {
168                        shared.details = None;
169                    }
170                    if !shared_summary {
171                        shared.summary = None;
172                    }
173                    Ok(shared)
174                }()
175            };
176
177            // errors caused by failing deduplication shouldn't prevent the JSON data from being printed
178            let (shared, deduplication_result) = match deduplication_result {
179                Ok(shared) => (shared, Ok(())),
180                Err(error) => (JsonShared::default(), Err(error)),
181            };
182
183            let json_tree = JsonTree { tree, shared };
184            let json_data = JsonData {
185                schema_version: SchemaVersion,
186                binary_version: Some(BinaryVersion::current()),
187                body: json_tree.into(),
188            };
189
190            return serde_json::to_writer(stdout(), &json_data)
191                .map_err(RuntimeError::SerializationFailure)
192                .or(deduplication_result);
193        }
194
195        let visualizer = Visualizer {
196            data_tree: &data_tree,
197            bytes_format,
198            direction,
199            bar_alignment,
200            column_width_distribution,
201        };
202
203        print!("{visualizer}"); // visualizer already ends with "\n", println! isn't needed here.
204
205        let deduplication_record = deduplication_record.map_err(HardlinksHandler::convert_error)?;
206        HardlinksHandler::print_report(deduplication_record, bytes_format)?;
207
208        Ok(())
209    }
210}
211
212/// Value to pass to [`Sub::json_output`] to decide how much details should be
213/// put in the output JSON object.
214#[derive(Debug, Clone, Copy)]
215pub struct JsonOutputParam {
216    /// Whether to include `.shared.details` in the JSON output.
217    pub shared_details: bool,
218    /// Whether to include `.shared.summary` in the JSON output.
219    pub shared_summary: bool,
220}
221
222impl JsonOutputParam {
223    /// Infer from the CLI flags.
224    pub(super) fn from_cli_flags(
225        output_json: bool,
226        omit_shared_details: bool,
227        omit_shared_summary: bool,
228    ) -> Option<Self> {
229        output_json.then_some(JsonOutputParam {
230            shared_details: !omit_shared_details,
231            shared_summary: !omit_shared_summary,
232        })
233    }
234}
235
236/// Subroutines used by [`Sub`] to deduplicate sizes of detected hardlinks and report about it.
237pub trait HardlinkSubroutines<Size: size::Size>: DeduplicateSharedSize<Size> {
238    /// Convert the error to runtime error.
239    fn convert_error(error: Self::Error) -> RuntimeError;
240    /// Handle the report.
241    fn print_report(
242        report: Self::Report,
243        bytes_format: Size::DisplayFormat,
244    ) -> Result<(), RuntimeError>;
245    /// Create a JSON serializable object from the report.
246    fn json_report(report: Self::Report) -> Result<Option<JsonShared<Size>>, RuntimeError>;
247}
248
249impl<Size> HardlinkSubroutines<Size> for HardlinkIgnorant
250where
251    DataTree<OsStringDisplay, Size>: Send,
252    Size: size::Size + Sync,
253{
254    #[inline]
255    fn convert_error(error: Self::Error) -> RuntimeError {
256        match error {}
257    }
258
259    #[inline]
260    fn print_report((): Self::Report, _: Size::DisplayFormat) -> Result<(), RuntimeError> {
261        Ok(())
262    }
263
264    #[inline]
265    fn json_report((): Self::Report) -> Result<Option<JsonShared<Size>>, RuntimeError> {
266        Ok(None)
267    }
268}
269
270#[cfg(unix)]
271mod unix_ext;