Skip to main content

compare_dir/
file_comparer.rs

1use crate::file_hasher::FileHasher;
2use std::cmp::Ordering;
3use std::fs;
4use std::io::{self, Read};
5use std::path::{Path, PathBuf};
6
7/// How a file is classified during comparison.
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Classification {
10    /// File exists only in the first directory.
11    OnlyInDir1,
12    /// File exists only in the second directory.
13    OnlyInDir2,
14    /// File exists in both directories.
15    InBoth,
16}
17
18/// Compares the content of two files.
19pub struct FileComparer<'a> {
20    path1: &'a Path,
21    path2: &'a Path,
22    pub buffer_size: usize,
23    pub hashers: Option<(&'a FileHasher, &'a FileHasher)>,
24}
25
26impl<'a> FileComparer<'a> {
27    pub const DEFAULT_BUFFER_SIZE_KB: usize = 64;
28    pub const DEFAULT_BUFFER_SIZE: usize = Self::DEFAULT_BUFFER_SIZE_KB * 1024;
29
30    pub fn new(path1: &'a Path, path2: &'a Path) -> Self {
31        Self {
32            path1,
33            path2,
34            buffer_size: Self::DEFAULT_BUFFER_SIZE,
35            hashers: None,
36        }
37    }
38
39    pub fn metadata(&self) -> io::Result<(fs::Metadata, fs::Metadata)> {
40        let m1 = fs::metadata(self.path1)?;
41        let m2 = fs::metadata(self.path2)?;
42        Ok((m1, m2))
43    }
44
45    pub(crate) fn compare_contents(&self) -> io::Result<bool> {
46        if let Some((hasher1, hasher2)) = self.hashers {
47            let (hash1, hash2) = rayon::join(
48                || hasher1.get_hash(self.path1),
49                || hasher2.get_hash(self.path2),
50            );
51            return Ok(hash1? == hash2?);
52        }
53
54        let mut f1 = fs::File::open(self.path1)?;
55        let mut f2 = fs::File::open(self.path2)?;
56        if self.buffer_size == 0 {
57            let len1 = f1.metadata()?.len();
58            let len2 = f2.metadata()?.len();
59            if len1 != len2 {
60                return Ok(false);
61            }
62            if len1 == 0 {
63                return Ok(true);
64            }
65            let mmap1 = unsafe { memmap2::MmapOptions::new().map(&f1)? };
66            let mmap2 = unsafe { memmap2::MmapOptions::new().map(&f2)? };
67            return Ok(mmap1[..] == mmap2[..]);
68        }
69
70        let mut buf1 = vec![0u8; self.buffer_size];
71        let mut buf2 = vec![0u8; self.buffer_size];
72        loop {
73            // Safety from Deadlocks: rayon::join is specifically designed for nested parallelism.
74            // It uses work-stealing, meaning if all threads in the pool are busy, the thread
75            // calling join will just execute both tasks itself.
76            let (n1, n2) = rayon::join(|| f1.read(&mut buf1), || f2.read(&mut buf2));
77            let n1 = n1?;
78            let n2 = n2?;
79            if n1 != n2 || buf1[..n1] != buf2[..n2] {
80                return Ok(false);
81            }
82            if n1 == 0 {
83                return Ok(true);
84            }
85        }
86    }
87}
88
89/// Detailed result of comparing a single file.
90#[derive(Debug, Clone)]
91pub struct FileComparisonResult {
92    /// The path relative to the root of the directories.
93    pub relative_path: PathBuf,
94    /// Whether the file exists in one or both directories.
95    pub classification: Classification,
96    /// Comparison of the last modified time, if applicable.
97    pub modified_time_comparison: Option<Ordering>,
98    /// Comparison of the file size, if applicable.
99    pub size_comparison: Option<Ordering>,
100    /// Whether the content is byte-for-byte identical, if applicable.
101    pub is_content_same: Option<bool>,
102}
103
104impl FileComparisonResult {
105    pub fn new(relative_path: PathBuf, classification: Classification) -> Self {
106        Self {
107            relative_path,
108            classification,
109            modified_time_comparison: None,
110            size_comparison: None,
111            is_content_same: None,
112        }
113    }
114
115    pub fn update(
116        &mut self,
117        comparer: &FileComparer,
118        should_compare_content: bool,
119    ) -> anyhow::Result<()> {
120        let (m1, m2) = comparer.metadata()?;
121        let t1 = m1.modified()?;
122        let t2 = m2.modified()?;
123        self.modified_time_comparison = Some(t1.cmp(&t2));
124
125        let s1 = m1.len();
126        let s2 = m2.len();
127        self.size_comparison = Some(s1.cmp(&s2));
128
129        if should_compare_content && s1 == s2 {
130            log::trace!("Comparing content: {:?}", self.relative_path);
131            self.is_content_same = Some(comparer.compare_contents()?);
132        }
133        Ok(())
134    }
135
136    /// True if the two files are identical; i.e., modified times and sizes are
137    /// the same. Contents are the same too, or content comparison was skipped.
138    pub fn is_identical(&self) -> bool {
139        self.classification == Classification::InBoth
140            && self.modified_time_comparison == Some(Ordering::Equal)
141            && self.size_comparison == Some(Ordering::Equal)
142            && self.is_content_same != Some(false)
143    }
144
145    pub fn to_symbol_string(&self) -> String {
146        String::from_iter([
147            match self.classification {
148                Classification::OnlyInDir1 => '>',
149                Classification::OnlyInDir2 => '<',
150                Classification::InBoth => '=',
151            },
152            match self.modified_time_comparison {
153                None => '?',
154                Some(Ordering::Greater) => '>',
155                Some(Ordering::Less) => '<',
156                Some(Ordering::Equal) => '=',
157            },
158            match self.size_comparison {
159                None => '?',
160                Some(Ordering::Greater) => '>',
161                Some(Ordering::Less) => '<',
162                Some(Ordering::Equal) => {
163                    if self.is_content_same == Some(false) {
164                        '!'
165                    } else {
166                        '='
167                    }
168                }
169            },
170        ])
171    }
172
173    pub fn to_string(&self, dir1_name: &str, dir2_name: &str) -> String {
174        let mut parts = Vec::new();
175        match self.classification {
176            Classification::OnlyInDir1 => parts.push(format!("Only in {}", dir1_name)),
177            Classification::OnlyInDir2 => parts.push(format!("Only in {}", dir2_name)),
178            Classification::InBoth => {}
179        }
180        let mut has_equals = false;
181        match self.modified_time_comparison {
182            Some(Ordering::Greater) => parts.push(format!("{} is newer", dir1_name)),
183            Some(Ordering::Less) => parts.push(format!("{} is newer", dir2_name)),
184            Some(Ordering::Equal) => has_equals = true,
185            None => {}
186        }
187        match self.size_comparison {
188            Some(Ordering::Greater) => parts.push(format!("Size of {} is larger", dir1_name)),
189            Some(Ordering::Less) => parts.push(format!("Size of {} is larger", dir2_name)),
190            Some(Ordering::Equal) => has_equals = true,
191            None => {}
192        }
193        match self.is_content_same {
194            Some(false) => parts.push("Contents differ".to_string()),
195            Some(true) => has_equals = true,
196            None => {}
197        }
198
199        if parts.is_empty() {
200            if !has_equals {
201                return "Unknown".to_string();
202            }
203            return "Identical".to_string();
204        }
205        parts.join(", ")
206    }
207}
208
209#[cfg(test)]
210mod tests {
211    use super::*;
212
213    fn check_compare(content1: &[u8], content2: &[u8], expected: bool) -> io::Result<()> {
214        let dir1 = tempfile::tempdir()?;
215        let dir2 = tempfile::tempdir()?;
216        let f1_path = dir1.path().join("file");
217        let f2_path = dir2.path().join("file");
218        fs::write(&f1_path, content1)?;
219        fs::write(&f2_path, content2)?;
220
221        // Without hashers
222        let mut comparer = FileComparer::new(&f1_path, &f2_path);
223        comparer.buffer_size = 8192;
224        assert_eq!(comparer.compare_contents()?, expected);
225
226        // Use mmap without hashers
227        comparer.buffer_size = 0;
228        assert_eq!(comparer.compare_contents()?, expected);
229
230        // With hashers
231        let hasher1 = FileHasher::new(dir1.path().to_path_buf());
232        let hasher2 = FileHasher::new(dir2.path().to_path_buf());
233        comparer.hashers = Some((&hasher1, &hasher2));
234        assert_eq!(comparer.compare_contents()?, expected);
235
236        Ok(())
237    }
238
239    #[test]
240    fn test_compare_contents_identical() -> io::Result<()> {
241        check_compare(b"hello world", b"hello world", true)
242    }
243
244    #[test]
245    fn test_compare_contents_different() -> io::Result<()> {
246        check_compare(b"hello world", b"hello rust", false)
247    }
248
249    #[test]
250    fn test_compare_contents_different_size() -> io::Result<()> {
251        check_compare(b"hello world", b"hello", false)
252    }
253
254    #[test]
255    fn test_compare_contents_empty_files() -> io::Result<()> {
256        check_compare(b"", b"", true)
257    }
258
259    #[test]
260    fn test_comparison_result_empty() {
261        let result = FileComparisonResult::new(PathBuf::from("test.txt"), Classification::InBoth);
262        assert!(!result.is_identical());
263        assert_eq!(result.to_string("dir1", "dir2"), "Unknown");
264        assert_eq!(result.to_symbol_string(), "=??");
265    }
266
267    #[test]
268    fn test_comparison_result_contents_skipped() {
269        let mut result =
270            FileComparisonResult::new(PathBuf::from("test.txt"), Classification::InBoth);
271        result.modified_time_comparison = Some(Ordering::Equal);
272        result.size_comparison = Some(Ordering::Equal);
273        assert!(result.is_identical());
274        assert_eq!(result.to_string("dir1", "dir2"), "Identical");
275        assert_eq!(result.to_symbol_string(), "===");
276    }
277}