Skip to main content

parallel_disk_usage/hardlink/
aware.rs

1use super::{
2    hardlink_list, DeduplicateSharedSize, HardlinkList, LinkPathList, RecordHardlinks,
3    RecordHardlinksArgument,
4};
5use crate::{
6    data_tree::DataTree,
7    device::DeviceNumber,
8    inode::InodeNumber,
9    os_string_display::OsStringDisplay,
10    reporter::{event::HardlinkDetection, Event, Reporter},
11    size,
12};
13use derive_more::{AsMut, AsRef, Display, Error, From, Into};
14use pipe_trait::Pipe;
15use smart_default::SmartDefault;
16use std::{convert::Infallible, fmt::Debug, os::unix::fs::MetadataExt, path::Path};
17
18/// Be aware of hardlinks. Treat them as links that share space.
19/// Detect files with more than 1 links and record them.
20/// Deduplicate them (remove duplicated size) from total size to
21/// accurately reflect the real size of their containers.
22#[derive(Debug, SmartDefault, Clone, AsRef, AsMut, From, Into)]
23pub struct Aware<Size> {
24    /// Map each file (identified by inode number and device number) to its size and detected paths.
25    record: HardlinkList<Size>,
26}
27
28pub use Aware as HardlinkAware;
29
30impl<Size> Aware<Size> {
31    /// Create new hardlinks handler.
32    pub fn new() -> Self {
33        HardlinkList::default().pipe(Aware::from)
34    }
35
36    /// Create a detector/recorder of hardlinks.
37    pub fn from_record(record: HardlinkList<Size>) -> Self {
38        Aware::from(record)
39    }
40}
41
42/// Error that occurs when [`Aware::record_hardlinks`] fails.
43#[derive(Debug, Display, Error)]
44#[non_exhaustive]
45pub enum ReportHardlinksError<Size> {
46    /// Fail to add an entry to the record.
47    #[display("Fail to add an entry to record: {_0}")]
48    AddToRecord(hardlink_list::AddError<Size>),
49}
50
51impl<Size, Report> RecordHardlinks<Size, Report> for Aware<Size>
52where
53    Size: size::Size + Eq + Debug,
54    Report: Reporter<Size> + ?Sized,
55{
56    type Error = ReportHardlinksError<Size>;
57
58    fn record_hardlinks(
59        &self,
60        argument: RecordHardlinksArgument<Size, Report>,
61    ) -> Result<(), Self::Error> {
62        let RecordHardlinksArgument {
63            path,
64            stats,
65            size,
66            reporter,
67        } = argument;
68
69        if stats.is_dir() {
70            return Ok(());
71        }
72
73        let links = stats.nlink();
74        if links <= 1 {
75            return Ok(());
76        }
77
78        reporter.report(Event::DetectHardlink(HardlinkDetection {
79            path,
80            stats,
81            size,
82            links,
83        }));
84
85        let ino = InodeNumber::get(stats);
86        let dev = DeviceNumber::get(stats);
87        self.record
88            .add(ino, dev, size, links, path)
89            .map_err(ReportHardlinksError::AddToRecord)
90    }
91}
92
93impl<Size> DeduplicateSharedSize<Size> for Aware<Size>
94where
95    DataTree<OsStringDisplay, Size>: Send,
96    Size: size::Size + Sync,
97{
98    type Report = HardlinkList<Size>;
99    type Error = Infallible;
100    fn deduplicate(
101        self,
102        data_tree: &mut DataTree<OsStringDisplay, Size>,
103    ) -> Result<Self::Report, Self::Error> {
104        let record: Self::Report = self.into();
105        let hardlink_info: Box<[(Size, LinkPathList)]> = record
106            .iter()
107            .map(|values| (*values.size(), values.paths().clone()))
108            .collect();
109        let hardlink_info: Box<[(Size, Vec<&Path>)]> = hardlink_info
110            .iter()
111            .map(|(size, paths)| (*size, paths.iter().map(AsRef::as_ref).collect()))
112            .collect();
113        data_tree.par_deduplicate_hardlinks(&hardlink_info);
114        Ok(record)
115    }
116}