Skip to main content

compare_dir/
file_comparer.rs

1use std::cmp::Ordering;
2use std::fs;
3use std::io::{self, Read};
4use std::path::{Path, PathBuf};
5
6/// How a file is classified during comparison.
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum Classification {
9    /// File exists only in the first directory.
10    OnlyInDir1,
11    /// File exists only in the second directory.
12    OnlyInDir2,
13    /// File exists in both directories.
14    InBoth,
15}
16
17/// Compares the content of two files.
18pub struct FileComparer<'a> {
19    path1: &'a Path,
20    path2: &'a Path,
21    pub buffer_size: usize,
22}
23
24impl<'a> FileComparer<'a> {
25    pub const DEFAULT_BUFFER_SIZE: usize = 64 * 1024;
26
27    pub fn new(path1: &'a Path, path2: &'a Path) -> Self {
28        Self {
29            path1,
30            path2,
31            buffer_size: Self::DEFAULT_BUFFER_SIZE,
32        }
33    }
34
35    pub fn metadata(&self) -> io::Result<(fs::Metadata, fs::Metadata)> {
36        let m1 = fs::metadata(self.path1)?;
37        let m2 = fs::metadata(self.path2)?;
38        Ok((m1, m2))
39    }
40
41    pub(crate) fn compare_contents(&self) -> io::Result<bool> {
42        let mut f1 = fs::File::open(self.path1)?;
43        let mut f2 = fs::File::open(self.path2)?;
44
45        if self.buffer_size == 0 {
46            let len1 = f1.metadata()?.len();
47            let len2 = f2.metadata()?.len();
48            if len1 != len2 {
49                return Ok(false);
50            }
51            if len1 == 0 {
52                return Ok(true);
53            }
54
55            let mmap1 = unsafe { memmap2::MmapOptions::new().map(&f1)? };
56            let mmap2 = unsafe { memmap2::MmapOptions::new().map(&f2)? };
57            return Ok(mmap1[..] == mmap2[..]);
58        }
59
60        let mut buf1 = vec![0u8; self.buffer_size];
61        let mut buf2 = vec![0u8; self.buffer_size];
62
63        loop {
64            // Safety from Deadlocks: rayon::join is specifically designed for nested parallelism.
65            // It uses work-stealing, meaning if all threads in the pool are busy, the thread
66            // calling join will just execute both tasks itself.
67            let (n1, n2) = rayon::join(|| f1.read(&mut buf1), || f2.read(&mut buf2));
68            let n1 = n1?;
69            let n2 = n2?;
70
71            if n1 != n2 || buf1[..n1] != buf2[..n2] {
72                return Ok(false);
73            }
74
75            if n1 == 0 {
76                return Ok(true);
77            }
78        }
79    }
80}
81
82/// Detailed result of comparing a single file.
83#[derive(Debug, Clone)]
84pub struct FileComparisonResult {
85    /// The path relative to the root of the directories.
86    pub relative_path: PathBuf,
87    /// Whether the file exists in one or both directories.
88    pub classification: Classification,
89    /// Comparison of the last modified time, if applicable.
90    pub modified_time_comparison: Option<Ordering>,
91    /// Comparison of the file size, if applicable.
92    pub size_comparison: Option<Ordering>,
93    /// Whether the content is byte-for-byte identical, if applicable.
94    pub is_content_same: Option<bool>,
95}
96
97impl FileComparisonResult {
98    pub fn new(relative_path: PathBuf, classification: Classification) -> Self {
99        Self {
100            relative_path,
101            classification,
102            modified_time_comparison: None,
103            size_comparison: None,
104            is_content_same: None,
105        }
106    }
107
108    pub(crate) fn update(&mut self, comparer: &FileComparer) -> anyhow::Result<()> {
109        let (m1, m2) = comparer.metadata()?;
110        let t1 = m1.modified()?;
111        let t2 = m2.modified()?;
112        self.modified_time_comparison = Some(t1.cmp(&t2));
113
114        let s1 = m1.len();
115        let s2 = m2.len();
116        self.size_comparison = Some(s1.cmp(&s2));
117
118        if s1 == s2 {
119            log::info!("Comparing content: {:?}", self.relative_path);
120            self.is_content_same = Some(comparer.compare_contents()?);
121        }
122        Ok(())
123    }
124
125    pub fn is_identical(&self) -> bool {
126        self.classification == Classification::InBoth
127            && self.modified_time_comparison == Some(Ordering::Equal)
128            && self.size_comparison == Some(Ordering::Equal)
129            && self.is_content_same == Some(true)
130    }
131
132    pub fn to_string(&self, dir1_name: &str, dir2_name: &str) -> String {
133        let mut parts = Vec::new();
134        match self.classification {
135            Classification::OnlyInDir1 => parts.push(format!("Only in {}", dir1_name)),
136            Classification::OnlyInDir2 => parts.push(format!("Only in {}", dir2_name)),
137            Classification::InBoth => {}
138        }
139
140        if let Some(comp) = &self.modified_time_comparison {
141            match comp {
142                Ordering::Greater => parts.push(format!("{} is newer", dir1_name)),
143                Ordering::Less => parts.push(format!("{} is newer", dir2_name)),
144                Ordering::Equal => {}
145            }
146        }
147
148        if let Some(comp) = &self.size_comparison {
149            match comp {
150                Ordering::Greater => parts.push(format!("Size of {} is larger", dir1_name)),
151                Ordering::Less => parts.push(format!("Size of {} is larger", dir2_name)),
152                Ordering::Equal => {}
153            }
154        }
155
156        if let Some(same) = self.is_content_same
157            && !same
158        {
159            parts.push("Content differ".to_string());
160        }
161
162        format!("{}: {}", self.relative_path.display(), parts.join(", "))
163    }
164}
165
166#[cfg(test)]
167mod tests {
168    use super::*;
169    use std::io::Write;
170    use tempfile::NamedTempFile;
171
172    #[test]
173    fn test_compare_contents_identical() -> io::Result<()> {
174        let mut f1 = NamedTempFile::new()?;
175        let mut f2 = NamedTempFile::new()?;
176        f1.write_all(b"hello world")?;
177        f2.write_all(b"hello world")?;
178        let mut comparer = FileComparer::new(f1.path(), f2.path());
179
180        // Test stream code path
181        comparer.buffer_size = 8192;
182        assert!(comparer.compare_contents()?);
183
184        // Test mmap code path
185        comparer.buffer_size = 0;
186        assert!(comparer.compare_contents()?);
187        Ok(())
188    }
189
190    #[test]
191    fn test_compare_contents_different() -> io::Result<()> {
192        let mut f1 = NamedTempFile::new()?;
193        let mut f2 = NamedTempFile::new()?;
194        f1.write_all(b"hello world")?;
195        f2.write_all(b"hello rust")?;
196        let mut comparer = FileComparer::new(f1.path(), f2.path());
197
198        // Test stream code path
199        comparer.buffer_size = 8192;
200        assert!(!comparer.compare_contents()?);
201
202        // Test mmap code path
203        comparer.buffer_size = 0;
204        assert!(!comparer.compare_contents()?);
205        Ok(())
206    }
207
208    #[test]
209    fn test_compare_contents_different_size() -> io::Result<()> {
210        let mut f1 = NamedTempFile::new()?;
211        let mut f2 = NamedTempFile::new()?;
212        f1.write_all(b"hello world")?;
213        f2.write_all(b"hello")?;
214        let mut comparer = FileComparer::new(f1.path(), f2.path());
215
216        // Test stream code path
217        comparer.buffer_size = 8192;
218        assert!(!comparer.compare_contents()?);
219
220        // Test mmap code path
221        comparer.buffer_size = 0;
222        assert!(!comparer.compare_contents()?);
223        Ok(())
224    }
225
226    #[test]
227    fn test_compare_contents_empty_files() -> io::Result<()> {
228        let f1 = NamedTempFile::new()?;
229        let f2 = NamedTempFile::new()?;
230        let mut comparer = FileComparer::new(f1.path(), f2.path());
231
232        // Test stream code path
233        comparer.buffer_size = 8192;
234        assert!(comparer.compare_contents()?);
235
236        // Test mmap code path
237        comparer.buffer_size = 0;
238        assert!(comparer.compare_contents()?);
239        Ok(())
240    }
241}