Skip to main content

compare_dir/
file_comparer.rs

1use crate::file_hasher::FileHasher;
2use std::cmp::Ordering;
3use std::fs;
4use std::io::{self, Read};
5use std::path::{Path, PathBuf};
6
7/// How a file is classified during comparison.
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Classification {
10    /// File exists only in the first directory.
11    OnlyInDir1,
12    /// File exists only in the second directory.
13    OnlyInDir2,
14    /// File exists in both directories.
15    InBoth,
16}
17
18/// Compares the content of two files.
19pub struct FileComparer<'a> {
20    path1: &'a Path,
21    path2: &'a Path,
22    pub buffer_size: usize,
23    pub hashers: Option<(&'a FileHasher, &'a FileHasher)>,
24}
25
26impl<'a> FileComparer<'a> {
27    pub const DEFAULT_BUFFER_SIZE_KB: usize = 64;
28    pub const DEFAULT_BUFFER_SIZE: usize = Self::DEFAULT_BUFFER_SIZE_KB * 1024;
29
30    pub fn new(path1: &'a Path, path2: &'a Path) -> Self {
31        Self {
32            path1,
33            path2,
34            buffer_size: Self::DEFAULT_BUFFER_SIZE,
35            hashers: None,
36        }
37    }
38
39    pub fn metadata(&self) -> io::Result<(fs::Metadata, fs::Metadata)> {
40        let m1 = fs::metadata(self.path1)?;
41        let m2 = fs::metadata(self.path2)?;
42        Ok((m1, m2))
43    }
44
45    pub(crate) fn compare_contents(&self) -> io::Result<bool> {
46        if let Some((hasher1, hasher2)) = self.hashers {
47            let (hash1, hash2) = rayon::join(
48                || hasher1.get_hash(self.path1),
49                || hasher2.get_hash(self.path2),
50            );
51            return Ok(hash1? == hash2?);
52        }
53
54        let mut f1 = fs::File::open(self.path1)?;
55        let mut f2 = fs::File::open(self.path2)?;
56        if self.buffer_size == 0 {
57            let len1 = f1.metadata()?.len();
58            let len2 = f2.metadata()?.len();
59            if len1 != len2 {
60                return Ok(false);
61            }
62            if len1 == 0 {
63                return Ok(true);
64            }
65            let mmap1 = unsafe { memmap2::MmapOptions::new().map(&f1)? };
66            let mmap2 = unsafe { memmap2::MmapOptions::new().map(&f2)? };
67            return Ok(mmap1[..] == mmap2[..]);
68        }
69
70        let mut buf1 = vec![0u8; self.buffer_size];
71        let mut buf2 = vec![0u8; self.buffer_size];
72        loop {
73            // Safety from Deadlocks: rayon::join is specifically designed for nested parallelism.
74            // It uses work-stealing, meaning if all threads in the pool are busy, the thread
75            // calling join will just execute both tasks itself.
76            let (n1, n2) = rayon::join(|| f1.read(&mut buf1), || f2.read(&mut buf2));
77            let n1 = n1?;
78            let n2 = n2?;
79            if n1 != n2 || buf1[..n1] != buf2[..n2] {
80                return Ok(false);
81            }
82            if n1 == 0 {
83                return Ok(true);
84            }
85        }
86    }
87}
88
89/// Detailed result of comparing a single file.
90#[derive(Debug, Clone)]
91pub struct FileComparisonResult {
92    /// The path relative to the root of the directories.
93    pub relative_path: PathBuf,
94    /// Whether the file exists in one or both directories.
95    pub classification: Classification,
96    /// Comparison of the last modified time, if applicable.
97    pub modified_time_comparison: Option<Ordering>,
98    /// Comparison of the file size, if applicable.
99    pub size_comparison: Option<Ordering>,
100    /// Whether the content is byte-for-byte identical, if applicable.
101    pub is_content_same: Option<bool>,
102}
103
104impl FileComparisonResult {
105    pub fn new(relative_path: PathBuf, classification: Classification) -> Self {
106        Self {
107            relative_path,
108            classification,
109            modified_time_comparison: None,
110            size_comparison: None,
111            is_content_same: None,
112        }
113    }
114
115    pub fn update(
116        &mut self,
117        comparer: &FileComparer,
118        should_compare_content: bool,
119    ) -> anyhow::Result<()> {
120        let (m1, m2) = comparer.metadata()?;
121        let t1 = m1.modified()?;
122        let t2 = m2.modified()?;
123        self.modified_time_comparison = Some(t1.cmp(&t2));
124
125        let s1 = m1.len();
126        let s2 = m2.len();
127        self.size_comparison = Some(s1.cmp(&s2));
128
129        if should_compare_content && s1 == s2 {
130            log::trace!("Comparing content: {:?}", self.relative_path);
131            self.is_content_same = Some(comparer.compare_contents()?);
132        }
133        Ok(())
134    }
135
136    /// True if the two files are identical; i.e., modified times and sizes are
137    /// the same. Contents are the same too, or content comparison was skipped.
138    pub fn is_identical(&self) -> bool {
139        self.classification == Classification::InBoth
140            && self.modified_time_comparison == Some(Ordering::Equal)
141            && self.size_comparison == Some(Ordering::Equal)
142            && self.is_content_same != Some(false)
143    }
144
145    pub fn to_symbol_string(&self) -> String {
146        String::from_iter([
147            match self.classification {
148                Classification::OnlyInDir1 => '>',
149                Classification::OnlyInDir2 => '<',
150                Classification::InBoth => '=',
151            },
152            match self.modified_time_comparison {
153                None => '?',
154                Some(Ordering::Greater) => '>',
155                Some(Ordering::Less) => '<',
156                Some(Ordering::Equal) => '=',
157            },
158            match self.size_comparison {
159                None => '?',
160                Some(Ordering::Greater) => '>',
161                Some(Ordering::Less) => '<',
162                Some(Ordering::Equal) => {
163                    if self.is_content_same == Some(false) {
164                        '!'
165                    } else {
166                        '='
167                    }
168                }
169            },
170        ])
171    }
172
173    pub fn to_string(&self, dir1_name: &str, dir2_name: &str) -> String {
174        let mut parts = Vec::new();
175        match self.classification {
176            Classification::OnlyInDir1 => parts.push(format!("Only in {}", dir1_name)),
177            Classification::OnlyInDir2 => parts.push(format!("Only in {}", dir2_name)),
178            Classification::InBoth => {}
179        }
180        let mut has_equals = false;
181        match self.modified_time_comparison {
182            Some(Ordering::Greater) => parts.push(format!("{} is newer", dir1_name)),
183            Some(Ordering::Less) => parts.push(format!("{} is newer", dir2_name)),
184            Some(Ordering::Equal) => has_equals = true,
185            None => {}
186        }
187        match self.size_comparison {
188            Some(Ordering::Greater) => parts.push(format!("Size of {} is larger", dir1_name)),
189            Some(Ordering::Less) => parts.push(format!("Size of {} is larger", dir2_name)),
190            Some(Ordering::Equal) => has_equals = true,
191            None => {}
192        }
193        match self.is_content_same {
194            Some(false) => parts.push("Contents differ".to_string()),
195            Some(true) => has_equals = true,
196            None => {}
197        }
198
199        if parts.is_empty() {
200            if !has_equals {
201                return "Unknown".to_string();
202            }
203            return "Identical".to_string();
204        }
205        parts.join(", ")
206    }
207}
208
209#[cfg(test)]
210mod tests {
211    use super::*;
212    use std::io::Write;
213    use tempfile::NamedTempFile;
214
215    fn check_compare(content1: &[u8], content2: &[u8], expected: bool) -> io::Result<()> {
216        let mut f1 = NamedTempFile::new()?;
217        let mut f2 = NamedTempFile::new()?;
218        f1.write_all(content1)?;
219        f2.write_all(content2)?;
220        f1.as_file().sync_all()?;
221        f2.as_file().sync_all()?;
222
223        // Without hashers
224        let mut comparer = FileComparer::new(f1.path(), f2.path());
225        comparer.buffer_size = 8192;
226        assert_eq!(comparer.compare_contents()?, expected);
227
228        comparer.buffer_size = 0;
229        assert_eq!(comparer.compare_contents()?, expected);
230
231        // With hashers
232        let dir1 = f1.path().parent().unwrap();
233        let dir2 = f2.path().parent().unwrap();
234
235        let hasher1 = FileHasher::new(dir1.to_path_buf());
236        let hasher2 = FileHasher::new(dir2.to_path_buf());
237
238        let mut comparer_hash = FileComparer::new(f1.path(), f2.path());
239        comparer_hash.hashers = Some((&hasher1, &hasher2));
240
241        assert_eq!(comparer_hash.compare_contents()?, expected);
242
243        Ok(())
244    }
245
246    #[test]
247    fn test_compare_contents_identical() -> io::Result<()> {
248        check_compare(b"hello world", b"hello world", true)
249    }
250
251    #[test]
252    fn test_compare_contents_different() -> io::Result<()> {
253        check_compare(b"hello world", b"hello rust", false)
254    }
255
256    #[test]
257    fn test_compare_contents_different_size() -> io::Result<()> {
258        check_compare(b"hello world", b"hello", false)
259    }
260
261    #[test]
262    fn test_compare_contents_empty_files() -> io::Result<()> {
263        check_compare(b"", b"", true)
264    }
265
266    #[test]
267    fn test_comparison_result_empty() {
268        let result = FileComparisonResult::new(PathBuf::from("test.txt"), Classification::InBoth);
269        assert!(!result.is_identical());
270        assert_eq!(result.to_string("dir1", "dir2"), "Unknown");
271        assert_eq!(result.to_symbol_string(), "=??");
272    }
273
274    #[test]
275    fn test_comparison_result_contents_skipped() {
276        let mut result =
277            FileComparisonResult::new(PathBuf::from("test.txt"), Classification::InBoth);
278        result.modified_time_comparison = Some(Ordering::Equal);
279        result.size_comparison = Some(Ordering::Equal);
280        assert!(result.is_identical());
281        assert_eq!(result.to_string("dir1", "dir2"), "Identical");
282        assert_eq!(result.to_symbol_string(), "===");
283    }
284}