parallel_disk_usage/data_tree/
hardlink.rs

1use super::DataTree;
2use crate::size;
3use assert_cmp::debug_assert_op;
4use rayon::prelude::*;
5use std::{ffi::OsStr, path::Path};
6
7impl<Name, Size> DataTree<Name, Size>
8where
9    Self: Send,
10    Name: AsRef<OsStr>,
11    Size: size::Size + Sync,
12{
13    /// Reduce the size of the directories that have hardlinks.
14    #[cfg_attr(not(unix), expect(unused))]
15    pub(crate) fn par_deduplicate_hardlinks(&mut self, hardlink_info: &[(Size, Vec<&Path>)]) {
16        if hardlink_info.is_empty() {
17            return;
18        }
19
20        let prefix = self.name().as_ref();
21        let sub_hardlink_info: Vec<(Size, Vec<&Path>)> = hardlink_info
22            .iter()
23            .filter(|(_, link_paths)| link_paths.len() > 1)
24            .map(|(size, link_paths)| {
25                let link_suffices: Vec<&Path> = link_paths
26                    .iter()
27                    .map(|link_path| link_path.strip_prefix(prefix))
28                    .filter_map(Result::ok)
29                    .collect();
30                (*size, link_suffices)
31            })
32            .filter(|(_, link_paths)| link_paths.len() > 1)
33            .collect();
34
35        for (size, link_suffices) in &sub_hardlink_info {
36            let number_of_links = link_suffices.len();
37            debug_assert_op!(number_of_links > 1);
38            self.size -= *size * (number_of_links - 1);
39        }
40
41        self.children
42            .par_iter_mut()
43            .for_each(|child| child.par_deduplicate_hardlinks(&sub_hardlink_info))
44    }
45}