Skip to main content

compare_dir/
file_comparer.rs

1use std::cmp::Ordering;
2use std::fs;
3use std::io::{self, Read};
4use std::path::{Path, PathBuf};
5
6/// How a file is classified during comparison.
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum Classification {
9    /// File exists only in the first directory.
10    OnlyInDir1,
11    /// File exists only in the second directory.
12    OnlyInDir2,
13    /// File exists in both directories.
14    InBoth,
15}
16
17/// Compares the content of two files.
18pub struct FileComparer<'a> {
19    path1: &'a Path,
20    path2: &'a Path,
21    pub buffer_size: usize,
22}
23
24impl<'a> FileComparer<'a> {
25    pub const DEFAULT_BUFFER_SIZE: usize = 64 * 1024;
26
27    pub fn new(path1: &'a Path, path2: &'a Path) -> Self {
28        Self {
29            path1,
30            path2,
31            buffer_size: Self::DEFAULT_BUFFER_SIZE,
32        }
33    }
34
35    pub fn metadata(&self) -> io::Result<(fs::Metadata, fs::Metadata)> {
36        let m1 = fs::metadata(self.path1)?;
37        let m2 = fs::metadata(self.path2)?;
38        Ok((m1, m2))
39    }
40
41    pub(crate) fn compare_contents(&self) -> io::Result<bool> {
42        let mut f1 = fs::File::open(self.path1)?;
43        let mut f2 = fs::File::open(self.path2)?;
44
45        if self.buffer_size == 0 {
46            let len1 = f1.metadata()?.len();
47            let len2 = f2.metadata()?.len();
48            if len1 != len2 {
49                return Ok(false);
50            }
51            if len1 == 0 {
52                return Ok(true);
53            }
54
55            let mmap1 = unsafe { memmap2::MmapOptions::new().map(&f1)? };
56            let mmap2 = unsafe { memmap2::MmapOptions::new().map(&f2)? };
57            return Ok(mmap1[..] == mmap2[..]);
58        }
59
60        let mut buf1 = vec![0u8; self.buffer_size];
61        let mut buf2 = vec![0u8; self.buffer_size];
62
63        loop {
64            // Safety from Deadlocks: rayon::join is specifically designed for nested parallelism.
65            // It uses work-stealing, meaning if all threads in the pool are busy, the thread
66            // calling join will just execute both tasks itself.
67            let (n1, n2) = rayon::join(|| f1.read(&mut buf1), || f2.read(&mut buf2));
68            let n1 = n1?;
69            let n2 = n2?;
70
71            if n1 != n2 || buf1[..n1] != buf2[..n2] {
72                return Ok(false);
73            }
74
75            if n1 == 0 {
76                return Ok(true);
77            }
78        }
79    }
80}
81
82/// Detailed result of comparing a single file.
83#[derive(Debug, Clone)]
84pub struct FileComparisonResult {
85    /// The path relative to the root of the directories.
86    pub relative_path: PathBuf,
87    /// Whether the file exists in one or both directories.
88    pub classification: Classification,
89    /// Comparison of the last modified time, if applicable.
90    pub modified_time_comparison: Option<Ordering>,
91    /// Comparison of the file size, if applicable.
92    pub size_comparison: Option<Ordering>,
93    /// Whether the content is byte-for-byte identical, if applicable.
94    pub is_content_same: Option<bool>,
95}
96
97impl FileComparisonResult {
98    pub fn new(relative_path: PathBuf, classification: Classification) -> Self {
99        Self {
100            relative_path,
101            classification,
102            modified_time_comparison: None,
103            size_comparison: None,
104            is_content_same: None,
105        }
106    }
107
108    pub fn update(&mut self, comparer: &FileComparer) -> anyhow::Result<()> {
109        let (m1, m2) = comparer.metadata()?;
110        let t1 = m1.modified()?;
111        let t2 = m2.modified()?;
112        self.modified_time_comparison = Some(t1.cmp(&t2));
113
114        let s1 = m1.len();
115        let s2 = m2.len();
116        self.size_comparison = Some(s1.cmp(&s2));
117
118        if s1 == s2 {
119            log::info!("Comparing content: {:?}", self.relative_path);
120            self.is_content_same = Some(comparer.compare_contents()?);
121        }
122        Ok(())
123    }
124
125    pub fn is_identical(&self) -> bool {
126        self.classification == Classification::InBoth
127            && self.modified_time_comparison == Some(Ordering::Equal)
128            && self.size_comparison == Some(Ordering::Equal)
129            && self.is_content_same == Some(true)
130    }
131
132    pub fn to_symbol_string(&self) -> String {
133        let c1 = match self.classification {
134            Classification::OnlyInDir1 => '>',
135            Classification::OnlyInDir2 => '<',
136            Classification::InBoth => '=',
137        };
138        let c2 = match self.modified_time_comparison {
139            None => ' ',
140            Some(Ordering::Greater) => '>',
141            Some(Ordering::Less) => '<',
142            Some(Ordering::Equal) => '=',
143        };
144        let c3 = match self.size_comparison {
145            None => ' ',
146            Some(Ordering::Greater) => '>',
147            Some(Ordering::Less) => '<',
148            Some(Ordering::Equal) => {
149                if self.is_content_same == Some(false) {
150                    '!'
151                } else {
152                    '='
153                }
154            }
155        };
156        format!("{}{}{}", c1, c2, c3)
157    }
158
159    pub fn to_string(&self, dir1_name: &str, dir2_name: &str) -> String {
160        let mut parts = Vec::new();
161        match self.classification {
162            Classification::OnlyInDir1 => parts.push(format!("Only in {}", dir1_name)),
163            Classification::OnlyInDir2 => parts.push(format!("Only in {}", dir2_name)),
164            Classification::InBoth => {}
165        }
166
167        if let Some(comp) = &self.modified_time_comparison {
168            match comp {
169                Ordering::Greater => parts.push(format!("{} is newer", dir1_name)),
170                Ordering::Less => parts.push(format!("{} is newer", dir2_name)),
171                Ordering::Equal => {}
172            }
173        }
174
175        if let Some(comp) = &self.size_comparison {
176            match comp {
177                Ordering::Greater => parts.push(format!("Size of {} is larger", dir1_name)),
178                Ordering::Less => parts.push(format!("Size of {} is larger", dir2_name)),
179                Ordering::Equal => {}
180            }
181        }
182
183        if let Some(same) = self.is_content_same
184            && !same
185        {
186            parts.push("Content differ".to_string());
187        }
188
189        if parts.is_empty() {
190            "Identical".to_string()
191        } else {
192            parts.join(", ")
193        }
194    }
195}
196
197#[cfg(test)]
198mod tests {
199    use super::*;
200    use std::io::Write;
201    use tempfile::NamedTempFile;
202
203    #[test]
204    fn test_compare_contents_identical() -> io::Result<()> {
205        let mut f1 = NamedTempFile::new()?;
206        let mut f2 = NamedTempFile::new()?;
207        f1.write_all(b"hello world")?;
208        f2.write_all(b"hello world")?;
209        let mut comparer = FileComparer::new(f1.path(), f2.path());
210
211        // Test stream code path
212        comparer.buffer_size = 8192;
213        assert!(comparer.compare_contents()?);
214
215        // Test mmap code path
216        comparer.buffer_size = 0;
217        assert!(comparer.compare_contents()?);
218        Ok(())
219    }
220
221    #[test]
222    fn test_compare_contents_different() -> io::Result<()> {
223        let mut f1 = NamedTempFile::new()?;
224        let mut f2 = NamedTempFile::new()?;
225        f1.write_all(b"hello world")?;
226        f2.write_all(b"hello rust")?;
227        let mut comparer = FileComparer::new(f1.path(), f2.path());
228
229        // Test stream code path
230        comparer.buffer_size = 8192;
231        assert!(!comparer.compare_contents()?);
232
233        // Test mmap code path
234        comparer.buffer_size = 0;
235        assert!(!comparer.compare_contents()?);
236        Ok(())
237    }
238
239    #[test]
240    fn test_compare_contents_different_size() -> io::Result<()> {
241        let mut f1 = NamedTempFile::new()?;
242        let mut f2 = NamedTempFile::new()?;
243        f1.write_all(b"hello world")?;
244        f2.write_all(b"hello")?;
245        let mut comparer = FileComparer::new(f1.path(), f2.path());
246
247        // Test stream code path
248        comparer.buffer_size = 8192;
249        assert!(!comparer.compare_contents()?);
250
251        // Test mmap code path
252        comparer.buffer_size = 0;
253        assert!(!comparer.compare_contents()?);
254        Ok(())
255    }
256
257    #[test]
258    fn test_compare_contents_empty_files() -> io::Result<()> {
259        let f1 = NamedTempFile::new()?;
260        let f2 = NamedTempFile::new()?;
261        let mut comparer = FileComparer::new(f1.path(), f2.path());
262
263        // Test stream code path
264        comparer.buffer_size = 8192;
265        assert!(comparer.compare_contents()?);
266
267        // Test mmap code path
268        comparer.buffer_size = 0;
269        assert!(comparer.compare_contents()?);
270        Ok(())
271    }
272}