Skip to main content

compare_dir/
file_comparer.rs

1use crate::file_hasher::FileHasher;
2use std::cmp::Ordering;
3use std::fs;
4use std::io::{self, Read};
5use std::path::{Path, PathBuf};
6
7/// How a file is classified during comparison.
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Classification {
10    /// File exists only in the first directory.
11    OnlyInDir1,
12    /// File exists only in the second directory.
13    OnlyInDir2,
14    /// File exists in both directories.
15    InBoth,
16}
17
18/// Compares the content of two files.
19pub struct FileComparer<'a> {
20    path1: &'a Path,
21    path2: &'a Path,
22    pub buffer_size: usize,
23    pub hashers: Option<(&'a FileHasher, &'a FileHasher)>,
24}
25
26impl<'a> FileComparer<'a> {
27    pub const DEFAULT_BUFFER_SIZE_KB: usize = 64;
28    pub const DEFAULT_BUFFER_SIZE: usize = Self::DEFAULT_BUFFER_SIZE_KB * 1024;
29
30    pub fn new(path1: &'a Path, path2: &'a Path) -> Self {
31        Self {
32            path1,
33            path2,
34            buffer_size: Self::DEFAULT_BUFFER_SIZE,
35            hashers: None,
36        }
37    }
38
39    pub fn metadata(&self) -> io::Result<(fs::Metadata, fs::Metadata)> {
40        let m1 = fs::metadata(self.path1)?;
41        let m2 = fs::metadata(self.path2)?;
42        Ok((m1, m2))
43    }
44
45    pub(crate) fn compare_contents(&self) -> io::Result<bool> {
46        if let Some((hasher1, hasher2)) = self.hashers {
47            let hash1 = hasher1.get_hash(self.path1)?;
48            let hash2 = hasher2.get_hash(self.path2)?;
49            return Ok(hash1 == hash2);
50        }
51
52        let mut f1 = fs::File::open(self.path1)?;
53        let mut f2 = fs::File::open(self.path2)?;
54
55        if self.buffer_size == 0 {
56            let len1 = f1.metadata()?.len();
57            let len2 = f2.metadata()?.len();
58            if len1 != len2 {
59                return Ok(false);
60            }
61            if len1 == 0 {
62                return Ok(true);
63            }
64
65            let mmap1 = unsafe { memmap2::MmapOptions::new().map(&f1)? };
66            let mmap2 = unsafe { memmap2::MmapOptions::new().map(&f2)? };
67            return Ok(mmap1[..] == mmap2[..]);
68        }
69
70        let mut buf1 = vec![0u8; self.buffer_size];
71        let mut buf2 = vec![0u8; self.buffer_size];
72
73        loop {
74            // Safety from Deadlocks: rayon::join is specifically designed for nested parallelism.
75            // It uses work-stealing, meaning if all threads in the pool are busy, the thread
76            // calling join will just execute both tasks itself.
77            let (n1, n2) = rayon::join(|| f1.read(&mut buf1), || f2.read(&mut buf2));
78            let n1 = n1?;
79            let n2 = n2?;
80
81            if n1 != n2 || buf1[..n1] != buf2[..n2] {
82                return Ok(false);
83            }
84
85            if n1 == 0 {
86                return Ok(true);
87            }
88        }
89    }
90}
91
92/// Detailed result of comparing a single file.
93#[derive(Debug, Clone)]
94pub struct FileComparisonResult {
95    /// The path relative to the root of the directories.
96    pub relative_path: PathBuf,
97    /// Whether the file exists in one or both directories.
98    pub classification: Classification,
99    /// Comparison of the last modified time, if applicable.
100    pub modified_time_comparison: Option<Ordering>,
101    /// Comparison of the file size, if applicable.
102    pub size_comparison: Option<Ordering>,
103    /// Whether the content is byte-for-byte identical, if applicable.
104    pub is_content_same: Option<bool>,
105}
106
107impl FileComparisonResult {
108    pub fn new(relative_path: PathBuf, classification: Classification) -> Self {
109        Self {
110            relative_path,
111            classification,
112            modified_time_comparison: None,
113            size_comparison: None,
114            is_content_same: None,
115        }
116    }
117
118    pub fn update(
119        &mut self,
120        comparer: &FileComparer,
121        should_compare_content: bool,
122    ) -> anyhow::Result<()> {
123        let (m1, m2) = comparer.metadata()?;
124        let t1 = m1.modified()?;
125        let t2 = m2.modified()?;
126        self.modified_time_comparison = Some(t1.cmp(&t2));
127
128        let s1 = m1.len();
129        let s2 = m2.len();
130        self.size_comparison = Some(s1.cmp(&s2));
131
132        if should_compare_content && s1 == s2 {
133            log::trace!("Comparing content: {:?}", self.relative_path);
134            self.is_content_same = Some(comparer.compare_contents()?);
135        }
136        Ok(())
137    }
138
139    /// True if the two files are identical; i.e., modified times and sizes are
140    /// the same. Contents are the same too, or content comparison was skipped.
141    pub fn is_identical(&self) -> bool {
142        self.classification == Classification::InBoth
143            && self.modified_time_comparison == Some(Ordering::Equal)
144            && self.size_comparison == Some(Ordering::Equal)
145            && self.is_content_same != Some(false)
146    }
147
148    pub fn to_symbol_string(&self) -> String {
149        String::from_iter([
150            match self.classification {
151                Classification::OnlyInDir1 => '>',
152                Classification::OnlyInDir2 => '<',
153                Classification::InBoth => '=',
154            },
155            match self.modified_time_comparison {
156                None => ' ',
157                Some(Ordering::Greater) => '>',
158                Some(Ordering::Less) => '<',
159                Some(Ordering::Equal) => '=',
160            },
161            match self.size_comparison {
162                None => ' ',
163                Some(Ordering::Greater) => '>',
164                Some(Ordering::Less) => '<',
165                Some(Ordering::Equal) => {
166                    if self.is_content_same == Some(false) {
167                        '!'
168                    } else {
169                        '='
170                    }
171                }
172            },
173        ])
174    }
175
176    pub fn to_string(&self, dir1_name: &str, dir2_name: &str) -> String {
177        let mut parts = Vec::new();
178        match self.classification {
179            Classification::OnlyInDir1 => parts.push(format!("Only in {}", dir1_name)),
180            Classification::OnlyInDir2 => parts.push(format!("Only in {}", dir2_name)),
181            Classification::InBoth => {}
182        }
183        match self.modified_time_comparison {
184            Some(Ordering::Greater) => parts.push(format!("{} is newer", dir1_name)),
185            Some(Ordering::Less) => parts.push(format!("{} is newer", dir2_name)),
186            Some(Ordering::Equal) | None => {}
187        }
188        match self.size_comparison {
189            Some(Ordering::Greater) => parts.push(format!("Size of {} is larger", dir1_name)),
190            Some(Ordering::Less) => parts.push(format!("Size of {} is larger", dir2_name)),
191            Some(Ordering::Equal) | None => {}
192        }
193        if self.is_content_same == Some(false) {
194            parts.push("Contents differ".to_string());
195        }
196
197        if parts.is_empty() {
198            "Identical".to_string()
199        } else {
200            parts.join(", ")
201        }
202    }
203}
204
205#[cfg(test)]
206mod tests {
207    use super::*;
208    use std::io::Write;
209    use tempfile::NamedTempFile;
210
211    fn check_compare(content1: &[u8], content2: &[u8], expected: bool) -> io::Result<()> {
212        let mut f1 = NamedTempFile::new()?;
213        let mut f2 = NamedTempFile::new()?;
214        f1.write_all(content1)?;
215        f2.write_all(content2)?;
216        f1.as_file().sync_all()?;
217        f2.as_file().sync_all()?;
218
219        // Without hashers
220        let mut comparer = FileComparer::new(f1.path(), f2.path());
221        comparer.buffer_size = 8192;
222        assert_eq!(comparer.compare_contents()?, expected);
223
224        comparer.buffer_size = 0;
225        assert_eq!(comparer.compare_contents()?, expected);
226
227        // With hashers
228        let dir1 = f1.path().parent().unwrap();
229        let dir2 = f2.path().parent().unwrap();
230
231        let hasher1 = FileHasher::new(dir1.to_path_buf());
232        let hasher2 = FileHasher::new(dir2.to_path_buf());
233
234        let mut comparer_hash = FileComparer::new(f1.path(), f2.path());
235        comparer_hash.hashers = Some((&hasher1, &hasher2));
236
237        assert_eq!(comparer_hash.compare_contents()?, expected);
238
239        Ok(())
240    }
241
242    #[test]
243    fn test_compare_contents_identical() -> io::Result<()> {
244        check_compare(b"hello world", b"hello world", true)
245    }
246
247    #[test]
248    fn test_compare_contents_different() -> io::Result<()> {
249        check_compare(b"hello world", b"hello rust", false)
250    }
251
252    #[test]
253    fn test_compare_contents_different_size() -> io::Result<()> {
254        check_compare(b"hello world", b"hello", false)
255    }
256
257    #[test]
258    fn test_compare_contents_empty_files() -> io::Result<()> {
259        check_compare(b"", b"", true)
260    }
261}