1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
use crate::hasher::Hasher;
use crate::metadata::Metadata;
use smallvec::SmallVec;
use std::cell::RefCell;
use std::cmp::max;
use std::cmp::Ordering;
use std::io;
use std::path::Path;

#[derive(Debug, Clone)]
pub struct FileSet {
    /// Tracks number of hardlinks from stat to also count unseen links outside scanned dirs
    pub max_hardlinks: u64,
    pub paths: SmallVec<[Box<Path>; 1]>,
}

impl FileSet {
    pub fn new(path: Box<Path>, max_hardlinks: u64) -> Self {
        let mut paths = SmallVec::new();
        paths.push(path);
        FileSet {
            max_hardlinks,
            paths,
        }
    }

    pub fn push(&mut self, path: Box<Path>) {
        self.paths.push(path);
    }

    /// Number of known hardlinks to this file content
    pub fn links(&self) -> u64 {
        max(self.max_hardlinks, self.paths.len() as u64)
    }
}

#[derive(Debug)]
/// File content is efficiently compared using this struct's `PartialOrd` implementation
pub struct FileContent {
    path: Box<Path>,
    metadata: Metadata,
    /// Hashes of content, calculated incrementally
    hashes: RefCell<Hasher>,
}

impl FileContent {
    pub fn from_path(path: Box<Path>) -> Result<Self, io::Error> {
        let m = Metadata::from_path(&path)?;
        Ok(Self::new(path, m))
    }

    pub fn new(path: Box<Path>, metadata: Metadata) -> Self {
        FileContent {
            path,
            metadata,
            hashes: RefCell::new(Hasher::new()),
        }
    }
}

impl Eq for FileContent {}

impl PartialEq for FileContent {
    fn eq(&self, other: &Self) -> bool {
        self.partial_cmp(other)
            .map_or(false, |o| o == Ordering::Equal)
    }
}

impl Ord for FileContent {
    fn cmp(&self, other: &Self) -> Ordering {
        self.compare(other).unwrap_or(Ordering::Greater)
    }
}

/// That does the bulk of hasing and comparisons
impl PartialOrd for FileContent {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        self.compare(other).ok()
    }
}

impl FileContent {
    fn compare(&self, other: &Self) -> io::Result<Ordering> {
        // Fast pointer comparison
        if std::ptr::eq(self, other) {
            return Ok(Ordering::Equal);
        }

        // Different file sizes mean they're obviously different.
        // Also different devices mean they're not the same as far as we're concerned
        // (since search is intended for hardlinking and hardlinking only works within the same device).
        let cmp = self.metadata.cmp(&other.metadata);
        if cmp != Ordering::Equal {
            return Ok(cmp);
        }

        let mut hashes1 = self.hashes.borrow_mut();
        let mut hashes2 = other.hashes.borrow_mut();

        hashes1.compare(&mut hashes2, self.metadata.size, &self.path, &other.path)
    }
}