parallel_disk_usage/hardlink/
aware.rs

1use super::{
2    hardlink_list, DeduplicateSharedSize, HardlinkList, LinkPathList, RecordHardlinks,
3    RecordHardlinksArgument,
4};
5use crate::{
6    data_tree::DataTree,
7    inode::InodeNumber,
8    os_string_display::OsStringDisplay,
9    reporter::{event::HardlinkDetection, Event, Reporter},
10    size,
11};
12use derive_more::{AsMut, AsRef, Display, Error, From, Into};
13use pipe_trait::Pipe;
14use smart_default::SmartDefault;
15use std::{convert::Infallible, fmt::Debug, os::unix::fs::MetadataExt, path::Path};
16
17/// Be aware of hardlinks. Treat them as links that share space.
18/// Detect files with more than 1 links and record them.
19/// Deduplicate them (remove duplicated size) from total size to
20/// accurately reflect the real size of their containers.
21#[derive(Debug, SmartDefault, Clone, AsRef, AsMut, From, Into)]
22pub struct Aware<Size> {
23    /// Map an inode number to its size and detected paths.
24    record: HardlinkList<Size>,
25}
26
27pub use Aware as HardlinkAware;
28
29impl<Size> Aware<Size> {
30    /// Create new hardlinks handler.
31    pub fn new() -> Self {
32        HardlinkList::default().pipe(Aware::from)
33    }
34
35    /// Create a detector/recorder of hardlinks.
36    pub fn from_record(record: HardlinkList<Size>) -> Self {
37        Aware::from(record)
38    }
39}
40
41/// Error that occurs when [`Aware::record_hardlinks`] fails.
42#[derive(Debug, Display, Error)]
43#[non_exhaustive]
44pub enum ReportHardlinksError<Size> {
45    /// Fail to add an entry to the record.
46    #[display("Fail to add an entry to record: {_0}")]
47    AddToRecord(hardlink_list::AddError<Size>),
48}
49
50impl<Size, Report> RecordHardlinks<Size, Report> for Aware<Size>
51where
52    Size: size::Size + Eq + Debug,
53    Report: Reporter<Size> + ?Sized,
54{
55    type Error = ReportHardlinksError<Size>;
56
57    fn record_hardlinks(
58        &self,
59        argument: RecordHardlinksArgument<Size, Report>,
60    ) -> Result<(), Self::Error> {
61        let RecordHardlinksArgument {
62            path,
63            stats,
64            size,
65            reporter,
66        } = argument;
67
68        if stats.is_dir() {
69            return Ok(());
70        }
71
72        let links = stats.nlink();
73        if links <= 1 {
74            return Ok(());
75        }
76
77        reporter.report(Event::DetectHardlink(HardlinkDetection {
78            path,
79            stats,
80            size,
81            links,
82        }));
83
84        let ino = InodeNumber::get(stats);
85        self.record
86            .add(ino, size, links, path)
87            .map_err(ReportHardlinksError::AddToRecord)
88    }
89}
90
91impl<Size> DeduplicateSharedSize<Size> for Aware<Size>
92where
93    DataTree<OsStringDisplay, Size>: Send,
94    Size: size::Size + Sync,
95{
96    type Report = HardlinkList<Size>;
97    type Error = Infallible;
98    fn deduplicate(
99        self,
100        data_tree: &mut DataTree<OsStringDisplay, Size>,
101    ) -> Result<Self::Report, Self::Error> {
102        let record: Self::Report = self.into();
103        let hardlink_info: Box<[(Size, LinkPathList)]> = record
104            .iter()
105            .map(|values| (*values.size(), values.paths().clone()))
106            .collect();
107        let hardlink_info: Box<[(Size, Vec<&Path>)]> = hardlink_info
108            .iter()
109            .map(|(size, paths)| (*size, paths.iter().map(AsRef::as_ref).collect()))
110            .collect();
111        data_tree.par_deduplicate_hardlinks(&hardlink_info);
112        Ok(record)
113    }
114}