Skip to main content

compare_dir/
file_comparer.rs

1use crate::{FileHasher, FileItem};
2use indicatif::FormattedDuration;
3use std::cmp::Ordering;
4use std::fs;
5use std::io::Read;
6use std::path::PathBuf;
7
8/// How a file is classified during comparison.
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub enum Classification {
11    /// File exists only in the first directory.
12    OnlyInDir1,
13    /// File exists only in the second directory.
14    OnlyInDir2,
15    /// File exists in both directories.
16    InBoth,
17}
18
19/// Compares the content of two files.
20pub struct FileComparer<'a> {
21    file1: &'a FileItem,
22    file2: &'a FileItem,
23    pub buffer_size: usize,
24    pub hashers: Option<(&'a FileHasher, &'a FileHasher)>,
25}
26
27impl<'a> FileComparer<'a> {
28    pub const DEFAULT_BUFFER_SIZE_KB: usize = 2 * 1024;
29    pub const DEFAULT_BUFFER_SIZE: usize = Self::DEFAULT_BUFFER_SIZE_KB * 1024;
30
31    pub fn new(file1: &'a FileItem, file2: &'a FileItem) -> Self {
32        Self {
33            file1,
34            file2,
35            buffer_size: Self::DEFAULT_BUFFER_SIZE,
36            hashers: None,
37        }
38    }
39
40    pub fn sizes(&self) -> (u64, u64) {
41        (self.file1.size(), self.file2.size())
42    }
43
44    pub fn modified(&self) -> (std::time::SystemTime, std::time::SystemTime) {
45        (self.file1.modified(), self.file2.modified())
46    }
47
48    pub(crate) fn compare_contents(&self) -> anyhow::Result<bool> {
49        let len1 = self.file1.size();
50        let len2 = self.file2.size();
51        if len1 != len2 {
52            return Ok(false);
53        }
54        if len1 == 0 {
55            return Ok(true);
56        }
57
58        if let Some((hasher1, hasher2)) = self.hashers {
59            let (hash1, hash2) = rayon::join(
60                || hasher1.get_hash(self.file1),
61                || hasher2.get_hash(self.file2),
62            );
63            return Ok(hash1? == hash2?);
64        }
65
66        let start_time = std::time::Instant::now();
67        let mut f1 = fs::File::open(self.file1.path())?;
68        let mut f2 = fs::File::open(self.file2.path())?;
69        if self.buffer_size == 0 {
70            let mmap1 = unsafe { memmap2::MmapOptions::new().map(&f1)? };
71            let mmap2 = unsafe { memmap2::MmapOptions::new().map(&f2)? };
72            let result = mmap1[..] == mmap2[..];
73            log::debug!(
74                "Compared in {}: '{}'",
75                FormattedDuration(start_time.elapsed()),
76                self.file1
77            );
78            return Ok(result);
79        }
80
81        let mut buf1 = vec![0u8; self.buffer_size];
82        let mut buf2 = vec![0u8; self.buffer_size];
83        loop {
84            // Safety from Deadlocks: rayon::join is specifically designed for nested parallelism.
85            // It uses work-stealing, meaning if all threads in the pool are busy, the thread
86            // calling join will just execute both tasks itself.
87            let (n1, n2) = rayon::join(|| f1.read(&mut buf1), || f2.read(&mut buf2));
88            let n1 = n1?;
89            let n2 = n2?;
90            if n1 != n2 || buf1[..n1] != buf2[..n2] {
91                log::debug!(
92                    "Compared in {}: '{}'",
93                    FormattedDuration(start_time.elapsed()),
94                    self.file1
95                );
96                return Ok(false);
97            }
98            if n1 == 0 {
99                log::debug!(
100                    "Compared in {}: '{}'",
101                    FormattedDuration(start_time.elapsed()),
102                    self.file1
103                );
104                return Ok(true);
105            }
106        }
107    }
108}
109
110/// Detailed result of comparing a single file.
111#[derive(Debug, Clone)]
112pub struct FileComparisonResult {
113    /// The path relative to the root of the directories.
114    pub relative_path: PathBuf,
115    /// Whether the file exists in one or both directories.
116    pub classification: Classification,
117    /// Comparison of the last modified time, if applicable.
118    pub modified_time_comparison: Option<Ordering>,
119    /// Comparison of the file size, if applicable.
120    pub size_comparison: Option<Ordering>,
121    /// Whether the content is byte-for-byte identical, if applicable.
122    pub is_content_same: Option<bool>,
123}
124
125impl FileComparisonResult {
126    pub fn new(relative_path: PathBuf, classification: Classification) -> Self {
127        Self {
128            relative_path,
129            classification,
130            modified_time_comparison: None,
131            size_comparison: None,
132            is_content_same: None,
133        }
134    }
135
136    pub fn update(
137        &mut self,
138        comparer: &FileComparer,
139        should_compare_content: bool,
140    ) -> anyhow::Result<()> {
141        let (t1, t2) = comparer.modified();
142        self.modified_time_comparison = Some(t1.cmp(&t2));
143
144        let (s1, s2) = comparer.sizes();
145        self.size_comparison = Some(s1.cmp(&s2));
146
147        if should_compare_content && s1 == s2 {
148            self.is_content_same = Some(comparer.compare_contents()?);
149        }
150        Ok(())
151    }
152
153    /// True if the two files are identical; i.e., modified times and sizes are
154    /// the same. Contents are the same too, or content comparison was skipped.
155    pub fn is_identical(&self) -> bool {
156        self.classification == Classification::InBoth
157            && self.modified_time_comparison == Some(Ordering::Equal)
158            && self.size_comparison == Some(Ordering::Equal)
159            && self.is_content_same != Some(false)
160    }
161
162    pub fn to_symbol_string(&self) -> String {
163        String::from_iter([
164            match self.classification {
165                Classification::OnlyInDir1 => '>',
166                Classification::OnlyInDir2 => '<',
167                Classification::InBoth => '=',
168            },
169            match self.modified_time_comparison {
170                None => ' ',
171                Some(Ordering::Greater) => '>',
172                Some(Ordering::Less) => '<',
173                Some(Ordering::Equal) => '=',
174            },
175            match self.size_comparison {
176                None => ' ',
177                Some(Ordering::Greater) => '>',
178                Some(Ordering::Less) => '<',
179                Some(Ordering::Equal) => {
180                    if self.is_content_same == Some(false) {
181                        '!'
182                    } else {
183                        '='
184                    }
185                }
186            },
187        ])
188    }
189
190    pub fn to_string(&self, dir1_name: &str, dir2_name: &str) -> String {
191        let mut parts = Vec::new();
192        match self.classification {
193            Classification::OnlyInDir1 => parts.push(format!("Only in {}", dir1_name)),
194            Classification::OnlyInDir2 => parts.push(format!("Only in {}", dir2_name)),
195            Classification::InBoth => {}
196        }
197        let mut has_equals = false;
198        match self.modified_time_comparison {
199            Some(Ordering::Greater) => parts.push(format!("{} is newer", dir1_name)),
200            Some(Ordering::Less) => parts.push(format!("{} is newer", dir2_name)),
201            Some(Ordering::Equal) => has_equals = true,
202            None => {}
203        }
204        match self.size_comparison {
205            Some(Ordering::Greater) => parts.push(format!("Size of {} is larger", dir1_name)),
206            Some(Ordering::Less) => parts.push(format!("Size of {} is larger", dir2_name)),
207            Some(Ordering::Equal) => has_equals = true,
208            None => {}
209        }
210        match self.is_content_same {
211            Some(false) => parts.push("Contents differ".to_string()),
212            Some(true) => has_equals = true,
213            None => {}
214        }
215
216        if parts.is_empty() {
217            if !has_equals {
218                return "Unknown".to_string();
219            }
220            return "Identical".to_string();
221        }
222        parts.join(", ")
223    }
224}
225
226#[cfg(test)]
227mod tests {
228    use super::*;
229
230    fn check_compare(content1: &[u8], content2: &[u8], expected: bool) -> anyhow::Result<()> {
231        let dir1 = tempfile::tempdir()?;
232        let dir2 = tempfile::tempdir()?;
233        let file1_path = dir1.path().join("file");
234        let file2_path = dir2.path().join("file");
235        fs::write(&file1_path, content1)?;
236        fs::write(&file2_path, content2)?;
237        let file1 = FileItem::try_from(file1_path.as_path())?;
238        let file2 = FileItem::try_from(file2_path.as_path())?;
239
240        // Without hashers
241        let mut comparer = FileComparer::new(&file1, &file2);
242        comparer.buffer_size = 8192;
243        assert_eq!(comparer.compare_contents()?, expected);
244
245        // Use mmap without hashers
246        comparer.buffer_size = 0;
247        assert_eq!(comparer.compare_contents()?, expected);
248
249        // With hashers
250        let hasher1 = FileHasher::new_with_cache(&[dir1.path()])?;
251        let hasher2 = FileHasher::new_with_cache(&[dir2.path()])?;
252        comparer.hashers = Some((&hasher1, &hasher2));
253        assert_eq!(comparer.compare_contents()?, expected);
254
255        Ok(())
256    }
257
258    #[test]
259    fn compare_contents_identical() -> anyhow::Result<()> {
260        check_compare(b"hello world", b"hello world", true)
261    }
262
263    #[test]
264    fn compare_contents_different() -> anyhow::Result<()> {
265        check_compare(b"hello world", b"hello rust", false)
266    }
267
268    #[test]
269    fn compare_contents_different_size() -> anyhow::Result<()> {
270        check_compare(b"hello world", b"hello", false)
271    }
272
273    #[test]
274    fn compare_contents_empty_files() -> anyhow::Result<()> {
275        check_compare(b"", b"", true)
276    }
277
278    #[test]
279    fn comparison_result_empty() {
280        let result = FileComparisonResult::new(PathBuf::from("test.txt"), Classification::InBoth);
281        assert!(!result.is_identical());
282        assert_eq!(result.to_string("dir1", "dir2"), "Unknown");
283        assert_eq!(result.to_symbol_string(), "=  ");
284    }
285
286    #[test]
287    fn comparison_result_contents_skipped() {
288        let mut result =
289            FileComparisonResult::new(PathBuf::from("test.txt"), Classification::InBoth);
290        result.modified_time_comparison = Some(Ordering::Equal);
291        result.size_comparison = Some(Ordering::Equal);
292        assert!(result.is_identical());
293        assert_eq!(result.to_string("dir1", "dir2"), "Identical");
294        assert_eq!(result.to_symbol_string(), "===");
295    }
296}