Skip to main content

compare_dir/
file_comparer.rs

1use crate::file_hasher::FileHasher;
2use std::cmp::Ordering;
3use std::fs;
4use std::io::{self, Read};
5use std::path::{Path, PathBuf};
6
7/// How a file is classified during comparison.
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Classification {
10    /// File exists only in the first directory.
11    OnlyInDir1,
12    /// File exists only in the second directory.
13    OnlyInDir2,
14    /// File exists in both directories.
15    InBoth,
16}
17
18/// Compares the content of two files.
19pub struct FileComparer<'a> {
20    path1: &'a Path,
21    path2: &'a Path,
22    pub buffer_size: usize,
23    pub hashers: Option<(&'a FileHasher, &'a FileHasher)>,
24}
25
26impl<'a> FileComparer<'a> {
27    pub const DEFAULT_BUFFER_SIZE_KB: usize = 64;
28    pub const DEFAULT_BUFFER_SIZE: usize = Self::DEFAULT_BUFFER_SIZE_KB * 1024;
29
30    pub fn new(path1: &'a Path, path2: &'a Path) -> Self {
31        Self {
32            path1,
33            path2,
34            buffer_size: Self::DEFAULT_BUFFER_SIZE,
35            hashers: None,
36        }
37    }
38
39    pub fn metadata(&self) -> io::Result<(fs::Metadata, fs::Metadata)> {
40        let m1 = fs::metadata(self.path1)?;
41        let m2 = fs::metadata(self.path2)?;
42        Ok((m1, m2))
43    }
44
45    pub(crate) fn compare_contents(&self) -> io::Result<bool> {
46        if let Some((hasher1, hasher2)) = self.hashers {
47            let (hash1, hash2) = rayon::join(
48                || hasher1.get_hash(self.path1),
49                || hasher2.get_hash(self.path2),
50            );
51            return Ok(hash1? == hash2?);
52        }
53
54        let start_time = std::time::Instant::now();
55        let mut f1 = fs::File::open(self.path1)?;
56        let mut f2 = fs::File::open(self.path2)?;
57        if self.buffer_size == 0 {
58            let len1 = f1.metadata()?.len();
59            let len2 = f2.metadata()?.len();
60            if len1 != len2 {
61                return Ok(false);
62            }
63            if len1 == 0 {
64                return Ok(true);
65            }
66            let mmap1 = unsafe { memmap2::MmapOptions::new().map(&f1)? };
67            let mmap2 = unsafe { memmap2::MmapOptions::new().map(&f2)? };
68            let result = mmap1[..] == mmap2[..];
69            log::debug!("Compared in {:?}: {:?}", start_time.elapsed(), self.path1);
70            return Ok(result);
71        }
72
73        let mut buf1 = vec![0u8; self.buffer_size];
74        let mut buf2 = vec![0u8; self.buffer_size];
75        loop {
76            // Safety from Deadlocks: rayon::join is specifically designed for nested parallelism.
77            // It uses work-stealing, meaning if all threads in the pool are busy, the thread
78            // calling join will just execute both tasks itself.
79            let (n1, n2) = rayon::join(|| f1.read(&mut buf1), || f2.read(&mut buf2));
80            let n1 = n1?;
81            let n2 = n2?;
82            if n1 != n2 || buf1[..n1] != buf2[..n2] {
83                log::debug!("Compared in {:?}: {:?}", start_time.elapsed(), self.path1);
84                return Ok(false);
85            }
86            if n1 == 0 {
87                log::debug!("Compared in {:?}: {:?}", start_time.elapsed(), self.path1);
88                return Ok(true);
89            }
90        }
91    }
92}
93
94/// Detailed result of comparing a single file.
95#[derive(Debug, Clone)]
96pub struct FileComparisonResult {
97    /// The path relative to the root of the directories.
98    pub relative_path: PathBuf,
99    /// Whether the file exists in one or both directories.
100    pub classification: Classification,
101    /// Comparison of the last modified time, if applicable.
102    pub modified_time_comparison: Option<Ordering>,
103    /// Comparison of the file size, if applicable.
104    pub size_comparison: Option<Ordering>,
105    /// Whether the content is byte-for-byte identical, if applicable.
106    pub is_content_same: Option<bool>,
107}
108
109impl FileComparisonResult {
110    pub fn new(relative_path: PathBuf, classification: Classification) -> Self {
111        Self {
112            relative_path,
113            classification,
114            modified_time_comparison: None,
115            size_comparison: None,
116            is_content_same: None,
117        }
118    }
119
120    pub fn update(
121        &mut self,
122        comparer: &FileComparer,
123        should_compare_content: bool,
124    ) -> anyhow::Result<()> {
125        let (m1, m2) = comparer.metadata()?;
126        let t1 = m1.modified()?;
127        let t2 = m2.modified()?;
128        self.modified_time_comparison = Some(t1.cmp(&t2));
129
130        let s1 = m1.len();
131        let s2 = m2.len();
132        self.size_comparison = Some(s1.cmp(&s2));
133
134        if should_compare_content && s1 == s2 {
135            self.is_content_same = Some(comparer.compare_contents()?);
136        }
137        Ok(())
138    }
139
140    /// True if the two files are identical; i.e., modified times and sizes are
141    /// the same. Contents are the same too, or content comparison was skipped.
142    pub fn is_identical(&self) -> bool {
143        self.classification == Classification::InBoth
144            && self.modified_time_comparison == Some(Ordering::Equal)
145            && self.size_comparison == Some(Ordering::Equal)
146            && self.is_content_same != Some(false)
147    }
148
149    pub fn to_symbol_string(&self) -> String {
150        String::from_iter([
151            match self.classification {
152                Classification::OnlyInDir1 => '>',
153                Classification::OnlyInDir2 => '<',
154                Classification::InBoth => '=',
155            },
156            match self.modified_time_comparison {
157                None => ' ',
158                Some(Ordering::Greater) => '>',
159                Some(Ordering::Less) => '<',
160                Some(Ordering::Equal) => '=',
161            },
162            match self.size_comparison {
163                None => ' ',
164                Some(Ordering::Greater) => '>',
165                Some(Ordering::Less) => '<',
166                Some(Ordering::Equal) => {
167                    if self.is_content_same == Some(false) {
168                        '!'
169                    } else {
170                        '='
171                    }
172                }
173            },
174        ])
175    }
176
177    pub fn to_string(&self, dir1_name: &str, dir2_name: &str) -> String {
178        let mut parts = Vec::new();
179        match self.classification {
180            Classification::OnlyInDir1 => parts.push(format!("Only in {}", dir1_name)),
181            Classification::OnlyInDir2 => parts.push(format!("Only in {}", dir2_name)),
182            Classification::InBoth => {}
183        }
184        let mut has_equals = false;
185        match self.modified_time_comparison {
186            Some(Ordering::Greater) => parts.push(format!("{} is newer", dir1_name)),
187            Some(Ordering::Less) => parts.push(format!("{} is newer", dir2_name)),
188            Some(Ordering::Equal) => has_equals = true,
189            None => {}
190        }
191        match self.size_comparison {
192            Some(Ordering::Greater) => parts.push(format!("Size of {} is larger", dir1_name)),
193            Some(Ordering::Less) => parts.push(format!("Size of {} is larger", dir2_name)),
194            Some(Ordering::Equal) => has_equals = true,
195            None => {}
196        }
197        match self.is_content_same {
198            Some(false) => parts.push("Contents differ".to_string()),
199            Some(true) => has_equals = true,
200            None => {}
201        }
202
203        if parts.is_empty() {
204            if !has_equals {
205                return "Unknown".to_string();
206            }
207            return "Identical".to_string();
208        }
209        parts.join(", ")
210    }
211}
212
213#[cfg(test)]
214mod tests {
215    use super::*;
216
217    fn check_compare(content1: &[u8], content2: &[u8], expected: bool) -> io::Result<()> {
218        let dir1 = tempfile::tempdir()?;
219        let dir2 = tempfile::tempdir()?;
220        let f1_path = dir1.path().join("file");
221        let f2_path = dir2.path().join("file");
222        fs::write(&f1_path, content1)?;
223        fs::write(&f2_path, content2)?;
224
225        // Without hashers
226        let mut comparer = FileComparer::new(&f1_path, &f2_path);
227        comparer.buffer_size = 8192;
228        assert_eq!(comparer.compare_contents()?, expected);
229
230        // Use mmap without hashers
231        comparer.buffer_size = 0;
232        assert_eq!(comparer.compare_contents()?, expected);
233
234        // With hashers
235        let hasher1 = FileHasher::new(dir1.path().to_path_buf());
236        let hasher2 = FileHasher::new(dir2.path().to_path_buf());
237        comparer.hashers = Some((&hasher1, &hasher2));
238        assert_eq!(comparer.compare_contents()?, expected);
239
240        Ok(())
241    }
242
243    #[test]
244    fn compare_contents_identical() -> io::Result<()> {
245        check_compare(b"hello world", b"hello world", true)
246    }
247
248    #[test]
249    fn compare_contents_different() -> io::Result<()> {
250        check_compare(b"hello world", b"hello rust", false)
251    }
252
253    #[test]
254    fn compare_contents_different_size() -> io::Result<()> {
255        check_compare(b"hello world", b"hello", false)
256    }
257
258    #[test]
259    fn compare_contents_empty_files() -> io::Result<()> {
260        check_compare(b"", b"", true)
261    }
262
263    #[test]
264    fn comparison_result_empty() {
265        let result = FileComparisonResult::new(PathBuf::from("test.txt"), Classification::InBoth);
266        assert!(!result.is_identical());
267        assert_eq!(result.to_string("dir1", "dir2"), "Unknown");
268        assert_eq!(result.to_symbol_string(), "=  ");
269    }
270
271    #[test]
272    fn comparison_result_contents_skipped() {
273        let mut result =
274            FileComparisonResult::new(PathBuf::from("test.txt"), Classification::InBoth);
275        result.modified_time_comparison = Some(Ordering::Equal);
276        result.size_comparison = Some(Ordering::Equal);
277        assert!(result.is_identical());
278        assert_eq!(result.to_string("dir1", "dir2"), "Identical");
279        assert_eq!(result.to_symbol_string(), "===");
280    }
281}