Skip to main content

compare_dir/
file_comparer.rs

1use crate::{FileHasher, FileItem, SystemTimeExt};
2use indicatif::FormattedDuration;
3use std::cmp::Ordering;
4use std::fs;
5use std::io::Read;
6use std::path::PathBuf;
7use std::time::SystemTime;
8
9/// How a file is classified during comparison.
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum Classification {
12    /// File exists only in the first directory.
13    OnlyInDir1,
14    /// File exists only in the second directory.
15    OnlyInDir2,
16    /// File exists in both directories.
17    InBoth,
18}
19
20/// Compares the content of two files.
21pub struct FileComparer<'a> {
22    file1: &'a FileItem,
23    file2: &'a FileItem,
24    pub buffer_size: usize,
25    pub hashers: Option<(&'a FileHasher, &'a FileHasher)>,
26}
27
28impl<'a> FileComparer<'a> {
29    pub const DEFAULT_BUFFER_SIZE_KB: usize = 2 * 1024;
30    pub const DEFAULT_BUFFER_SIZE: usize = Self::DEFAULT_BUFFER_SIZE_KB * 1024;
31
32    pub fn new(file1: &'a FileItem, file2: &'a FileItem) -> Self {
33        Self {
34            file1,
35            file2,
36            buffer_size: Self::DEFAULT_BUFFER_SIZE,
37            hashers: None,
38        }
39    }
40
41    pub fn sizes(&self) -> (u64, u64) {
42        (self.file1.size(), self.file2.size())
43    }
44
45    pub fn modified(&self) -> (std::time::SystemTime, std::time::SystemTime) {
46        (self.file1.modified(), self.file2.modified())
47    }
48
49    pub(crate) fn compare_contents(&self) -> anyhow::Result<bool> {
50        let len1 = self.file1.size();
51        let len2 = self.file2.size();
52        if len1 != len2 {
53            return Ok(false);
54        }
55        if len1 == 0 {
56            return Ok(true);
57        }
58
59        if let Some((hasher1, hasher2)) = self.hashers {
60            let (hash1, hash2) = rayon::join(
61                || hasher1.get_hash(self.file1),
62                || hasher2.get_hash(self.file2),
63            );
64            return Ok(hash1? == hash2?);
65        }
66
67        let start_time = std::time::Instant::now();
68        let mut f1 = fs::File::open(self.file1.path())?;
69        let mut f2 = fs::File::open(self.file2.path())?;
70        if self.buffer_size == 0 {
71            let mmap1 = unsafe { memmap2::MmapOptions::new().map(&f1)? };
72            let mmap2 = unsafe { memmap2::MmapOptions::new().map(&f2)? };
73            let result = mmap1[..] == mmap2[..];
74            log::debug!(
75                "Compared in {}: '{}'",
76                FormattedDuration(start_time.elapsed()),
77                self.file1
78            );
79            return Ok(result);
80        }
81
82        let mut buf1 = vec![0u8; self.buffer_size];
83        let mut buf2 = vec![0u8; self.buffer_size];
84        loop {
85            // Safety from Deadlocks: rayon::join is specifically designed for nested parallelism.
86            // It uses work-stealing, meaning if all threads in the pool are busy, the thread
87            // calling join will just execute both tasks itself.
88            let (n1, n2) = rayon::join(|| f1.read(&mut buf1), || f2.read(&mut buf2));
89            let n1 = n1?;
90            let n2 = n2?;
91            if n1 != n2 || buf1[..n1] != buf2[..n2] {
92                log::debug!(
93                    "Compared in {}: '{}'",
94                    FormattedDuration(start_time.elapsed()),
95                    self.file1
96                );
97                return Ok(false);
98            }
99            if n1 == 0 {
100                log::debug!(
101                    "Compared in {}: '{}'",
102                    FormattedDuration(start_time.elapsed()),
103                    self.file1
104                );
105                return Ok(true);
106            }
107        }
108    }
109}
110
111/// Detailed result of comparing a single file.
112#[derive(Debug, Clone)]
113pub struct FileComparisonResult {
114    /// The path relative to the root of the directories.
115    pub relative_path: PathBuf,
116    /// Whether the file exists in one or both directories.
117    pub classification: Classification,
118    /// Comparison of the last modified time, if applicable.
119    pub modified_time_comparison: Option<Ordering>,
120    /// Comparison of the file size, if applicable.
121    pub size_comparison: Option<Ordering>,
122    /// Whether the content is byte-for-byte identical, if applicable.
123    pub is_content_same: Option<bool>,
124}
125
126impl FileComparisonResult {
127    pub fn new(relative_path: PathBuf, classification: Classification) -> Self {
128        Self {
129            relative_path,
130            classification,
131            modified_time_comparison: None,
132            size_comparison: None,
133            is_content_same: None,
134        }
135    }
136
137    pub fn update(
138        &mut self,
139        comparer: &FileComparer,
140        should_compare_content: bool,
141    ) -> anyhow::Result<()> {
142        let (t1, t2) = comparer.modified();
143        self.modified_time_comparison = Some(t1.cmp(&t2));
144
145        let (s1, s2) = comparer.sizes();
146        self.size_comparison = Some(s1.cmp(&s2));
147
148        if should_compare_content && s1 == s2 {
149            self.is_content_same = Some(comparer.compare_contents()?);
150        }
151        Ok(())
152    }
153
154    pub(crate) fn update_moodified(&mut self, t1: SystemTime, t2: SystemTime) {
155        self.modified_time_comparison = Some(if t1.eq_nearly(t2) {
156            Ordering::Equal
157        } else {
158            t1.cmp(&t2)
159        })
160    }
161
162    pub(crate) fn update_size(&mut self, s1: u64, s2: u64) {
163        self.size_comparison = Some(s1.cmp(&s2));
164    }
165
166    /// True if the two files are identical; i.e., modified times and sizes are
167    /// the same. Contents are the same too, or content comparison was skipped.
168    pub fn is_identical(&self) -> bool {
169        self.classification == Classification::InBoth
170            && self.modified_time_comparison == Some(Ordering::Equal)
171            && self.size_comparison == Some(Ordering::Equal)
172            && self.is_content_same != Some(false)
173    }
174
175    pub(crate) fn is_identical_content(&self) -> Option<bool> {
176        match self.size_comparison {
177            None | Some(Ordering::Equal) => self.is_content_same,
178            _ => Some(false),
179        }
180    }
181
182    pub fn to_symbol_string(&self) -> String {
183        String::from_iter([
184            match self.classification {
185                Classification::OnlyInDir1 => '>',
186                Classification::OnlyInDir2 => '<',
187                Classification::InBoth => '=',
188            },
189            match self.modified_time_comparison {
190                None => ' ',
191                Some(Ordering::Greater) => '>',
192                Some(Ordering::Less) => '<',
193                Some(Ordering::Equal) => '=',
194            },
195            match self.size_comparison {
196                None => ' ',
197                Some(Ordering::Greater) => '>',
198                Some(Ordering::Less) => '<',
199                Some(Ordering::Equal) => {
200                    if self.is_content_same == Some(false) {
201                        '!'
202                    } else {
203                        '='
204                    }
205                }
206            },
207        ])
208    }
209
210    pub fn to_string(&self, dir1_name: &str, dir2_name: &str) -> String {
211        let mut parts = Vec::new();
212        match self.classification {
213            Classification::OnlyInDir1 => parts.push(format!("Only in {}", dir1_name)),
214            Classification::OnlyInDir2 => parts.push(format!("Only in {}", dir2_name)),
215            Classification::InBoth => {}
216        }
217        let mut has_equals = false;
218        match self.modified_time_comparison {
219            Some(Ordering::Greater) => parts.push(format!("{} is newer", dir1_name)),
220            Some(Ordering::Less) => parts.push(format!("{} is newer", dir2_name)),
221            Some(Ordering::Equal) => has_equals = true,
222            None => {}
223        }
224        match self.size_comparison {
225            Some(Ordering::Greater) => parts.push(format!("Size of {} is larger", dir1_name)),
226            Some(Ordering::Less) => parts.push(format!("Size of {} is larger", dir2_name)),
227            Some(Ordering::Equal) => has_equals = true,
228            None => {}
229        }
230        match self.is_content_same {
231            Some(false) => parts.push("Contents differ".to_string()),
232            Some(true) => has_equals = true,
233            None => {}
234        }
235
236        if parts.is_empty() {
237            if !has_equals {
238                return "Unknown".to_string();
239            }
240            return "Identical".to_string();
241        }
242        parts.join(", ")
243    }
244}
245
246#[cfg(test)]
247mod tests {
248    use super::*;
249
250    fn check_compare(content1: &[u8], content2: &[u8], expected: bool) -> anyhow::Result<()> {
251        let dir1 = tempfile::tempdir()?;
252        let dir2 = tempfile::tempdir()?;
253        let file1_path = dir1.path().join("file");
254        let file2_path = dir2.path().join("file");
255        fs::write(&file1_path, content1)?;
256        fs::write(&file2_path, content2)?;
257        let file1 = FileItem::try_from(file1_path.as_path())?;
258        let file2 = FileItem::try_from(file2_path.as_path())?;
259
260        // Without hashers
261        let mut comparer = FileComparer::new(&file1, &file2);
262        comparer.buffer_size = 8192;
263        assert_eq!(comparer.compare_contents()?, expected);
264
265        // Use mmap without hashers
266        comparer.buffer_size = 0;
267        assert_eq!(comparer.compare_contents()?, expected);
268
269        // With hashers
270        let hasher1 = FileHasher::new_with_cache(&[dir1.path()])?;
271        let hasher2 = FileHasher::new_with_cache(&[dir2.path()])?;
272        comparer.hashers = Some((&hasher1, &hasher2));
273        assert_eq!(comparer.compare_contents()?, expected);
274
275        Ok(())
276    }
277
278    #[test]
279    fn compare_contents_identical() -> anyhow::Result<()> {
280        check_compare(b"hello world", b"hello world", true)
281    }
282
283    #[test]
284    fn compare_contents_different() -> anyhow::Result<()> {
285        check_compare(b"hello world", b"hello rust", false)
286    }
287
288    #[test]
289    fn compare_contents_different_size() -> anyhow::Result<()> {
290        check_compare(b"hello world", b"hello", false)
291    }
292
293    #[test]
294    fn compare_contents_empty_files() -> anyhow::Result<()> {
295        check_compare(b"", b"", true)
296    }
297
298    #[test]
299    fn comparison_result_empty() {
300        let result = FileComparisonResult::new(PathBuf::from("test.txt"), Classification::InBoth);
301        assert!(!result.is_identical());
302        assert_eq!(result.to_string("dir1", "dir2"), "Unknown");
303        assert_eq!(result.to_symbol_string(), "=  ");
304    }
305
306    #[test]
307    fn comparison_result_contents_skipped() {
308        let mut result =
309            FileComparisonResult::new(PathBuf::from("test.txt"), Classification::InBoth);
310        result.modified_time_comparison = Some(Ordering::Equal);
311        result.size_comparison = Some(Ordering::Equal);
312        assert!(result.is_identical());
313        assert_eq!(result.to_string("dir1", "dir2"), "Identical");
314        assert_eq!(result.to_symbol_string(), "===");
315    }
316}