dupe_krill/
file.rs

1use crate::hasher::Hasher;
2use crate::metadata::Metadata;
3use smallvec::SmallVec;
4use std::cell::RefCell;
5use std::cmp::{max, Ordering};
6use std::io;
7use std::path::Path;
8
9#[derive(Debug, Clone)]
10pub struct FileSet {
11    /// Tracks number of hardlinks from stat to also count unseen links outside scanned dirs
12    pub max_hardlinks: u64,
13    pub paths: SmallVec<[Box<Path>; 1]>,
14}
15
16impl FileSet {
17    pub fn new(path: Box<Path>, max_hardlinks: u64) -> Self {
18        let mut paths = SmallVec::new();
19        paths.push(path);
20        Self { max_hardlinks, paths }
21    }
22
23    pub fn push(&mut self, path: Box<Path>) {
24        self.paths.push(path);
25    }
26
27    /// Number of known hardlinks to this file content
28    pub fn links(&self) -> u64 {
29        max(self.max_hardlinks, self.paths.len() as u64)
30    }
31}
32
33#[derive(Debug)]
34/// File content is efficiently compared using this struct's `PartialOrd` implementation
35pub struct FileContent {
36    path: Box<Path>,
37    metadata: Metadata,
38    /// Hashes of content, calculated incrementally
39    hashes: RefCell<Hasher>,
40}
41
42impl FileContent {
43    pub fn from_path(path: Box<Path>) -> Result<Self, io::Error> {
44        let m = Metadata::from_path(&path)?;
45        Ok(Self::new(path, m))
46    }
47
48    #[must_use]
49    pub fn new(path: Box<Path>, metadata: Metadata) -> Self {
50        Self {
51            path,
52            metadata,
53            hashes: RefCell::new(Hasher::new()),
54        }
55    }
56}
57
58impl Eq for FileContent {}
59
60impl PartialEq for FileContent {
61    fn eq(&self, other: &Self) -> bool {
62        self.partial_cmp(other) == Some(Ordering::Equal)
63    }
64}
65
66impl Ord for FileContent {
67    fn cmp(&self, other: &Self) -> Ordering {
68        self.compare(other).unwrap_or(Ordering::Greater)
69    }
70}
71
72/// That does the bulk of hasing and comparisons
73impl PartialOrd for FileContent {
74    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
75        self.compare(other).ok()
76    }
77}
78
79impl FileContent {
80    fn compare(&self, other: &Self) -> io::Result<Ordering> {
81        // Fast pointer comparison
82        if std::ptr::eq(self, other) {
83            return Ok(Ordering::Equal);
84        }
85
86        // Different file sizes mean they're obviously different.
87        // Also different devices mean they're not the same as far as we're concerned
88        // (since search is intended for hardlinking and hardlinking only works within the same device).
89        let cmp = self.metadata.cmp(&other.metadata);
90        if cmp != Ordering::Equal {
91            return Ok(cmp);
92        }
93
94        let mut hashes1 = self.hashes.borrow_mut();
95        let mut hashes2 = other.hashes.borrow_mut();
96
97        hashes1.compare(&mut hashes2, self.metadata.size, &self.path, &other.path)
98    }
99}