1use crate::file_hasher::FileHasher;
2use std::cmp::Ordering;
3use std::fs;
4use std::io::{self, Read};
5use std::path::{Path, PathBuf};
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum Classification {
10 OnlyInDir1,
12 OnlyInDir2,
14 InBoth,
16}
17
18pub struct FileComparer<'a> {
20 path1: &'a Path,
21 path2: &'a Path,
22 pub buffer_size: usize,
23 pub hashers: Option<(&'a FileHasher, &'a FileHasher)>,
24}
25
26impl<'a> FileComparer<'a> {
27 pub const DEFAULT_BUFFER_SIZE_KB: usize = 64;
28 pub const DEFAULT_BUFFER_SIZE: usize = Self::DEFAULT_BUFFER_SIZE_KB * 1024;
29
30 pub fn new(path1: &'a Path, path2: &'a Path) -> Self {
31 Self {
32 path1,
33 path2,
34 buffer_size: Self::DEFAULT_BUFFER_SIZE,
35 hashers: None,
36 }
37 }
38
39 pub fn metadata(&self) -> io::Result<(fs::Metadata, fs::Metadata)> {
40 let m1 = fs::metadata(self.path1)?;
41 let m2 = fs::metadata(self.path2)?;
42 Ok((m1, m2))
43 }
44
45 pub(crate) fn compare_contents(&self) -> io::Result<bool> {
46 if let Some((hasher1, hasher2)) = self.hashers {
47 let (hash1, hash2) = rayon::join(
48 || hasher1.get_hash(self.path1),
49 || hasher2.get_hash(self.path2),
50 );
51 return Ok(hash1? == hash2?);
52 }
53
54 let start_time = std::time::Instant::now();
55 let mut f1 = fs::File::open(self.path1)?;
56 let mut f2 = fs::File::open(self.path2)?;
57 if self.buffer_size == 0 {
58 let len1 = f1.metadata()?.len();
59 let len2 = f2.metadata()?.len();
60 if len1 != len2 {
61 return Ok(false);
62 }
63 if len1 == 0 {
64 return Ok(true);
65 }
66 let mmap1 = unsafe { memmap2::MmapOptions::new().map(&f1)? };
67 let mmap2 = unsafe { memmap2::MmapOptions::new().map(&f2)? };
68 let result = mmap1[..] == mmap2[..];
69 log::debug!("Compared in {:?}: {:?}", start_time.elapsed(), self.path1);
70 return Ok(result);
71 }
72
73 let mut buf1 = vec![0u8; self.buffer_size];
74 let mut buf2 = vec![0u8; self.buffer_size];
75 loop {
76 let (n1, n2) = rayon::join(|| f1.read(&mut buf1), || f2.read(&mut buf2));
80 let n1 = n1?;
81 let n2 = n2?;
82 if n1 != n2 || buf1[..n1] != buf2[..n2] {
83 log::debug!("Compared in {:?}: {:?}", start_time.elapsed(), self.path1);
84 return Ok(false);
85 }
86 if n1 == 0 {
87 log::debug!("Compared in {:?}: {:?}", start_time.elapsed(), self.path1);
88 return Ok(true);
89 }
90 }
91 }
92}
93
94#[derive(Debug, Clone)]
96pub struct FileComparisonResult {
97 pub relative_path: PathBuf,
99 pub classification: Classification,
101 pub modified_time_comparison: Option<Ordering>,
103 pub size_comparison: Option<Ordering>,
105 pub is_content_same: Option<bool>,
107}
108
109impl FileComparisonResult {
110 pub fn new(relative_path: PathBuf, classification: Classification) -> Self {
111 Self {
112 relative_path,
113 classification,
114 modified_time_comparison: None,
115 size_comparison: None,
116 is_content_same: None,
117 }
118 }
119
120 pub fn update(
121 &mut self,
122 comparer: &FileComparer,
123 should_compare_content: bool,
124 ) -> anyhow::Result<()> {
125 let (m1, m2) = comparer.metadata()?;
126 let t1 = m1.modified()?;
127 let t2 = m2.modified()?;
128 self.modified_time_comparison = Some(t1.cmp(&t2));
129
130 let s1 = m1.len();
131 let s2 = m2.len();
132 self.size_comparison = Some(s1.cmp(&s2));
133
134 if should_compare_content && s1 == s2 {
135 self.is_content_same = Some(comparer.compare_contents()?);
136 }
137 Ok(())
138 }
139
140 pub fn is_identical(&self) -> bool {
143 self.classification == Classification::InBoth
144 && self.modified_time_comparison == Some(Ordering::Equal)
145 && self.size_comparison == Some(Ordering::Equal)
146 && self.is_content_same != Some(false)
147 }
148
149 pub fn to_symbol_string(&self) -> String {
150 String::from_iter([
151 match self.classification {
152 Classification::OnlyInDir1 => '>',
153 Classification::OnlyInDir2 => '<',
154 Classification::InBoth => '=',
155 },
156 match self.modified_time_comparison {
157 None => ' ',
158 Some(Ordering::Greater) => '>',
159 Some(Ordering::Less) => '<',
160 Some(Ordering::Equal) => '=',
161 },
162 match self.size_comparison {
163 None => ' ',
164 Some(Ordering::Greater) => '>',
165 Some(Ordering::Less) => '<',
166 Some(Ordering::Equal) => {
167 if self.is_content_same == Some(false) {
168 '!'
169 } else {
170 '='
171 }
172 }
173 },
174 ])
175 }
176
177 pub fn to_string(&self, dir1_name: &str, dir2_name: &str) -> String {
178 let mut parts = Vec::new();
179 match self.classification {
180 Classification::OnlyInDir1 => parts.push(format!("Only in {}", dir1_name)),
181 Classification::OnlyInDir2 => parts.push(format!("Only in {}", dir2_name)),
182 Classification::InBoth => {}
183 }
184 let mut has_equals = false;
185 match self.modified_time_comparison {
186 Some(Ordering::Greater) => parts.push(format!("{} is newer", dir1_name)),
187 Some(Ordering::Less) => parts.push(format!("{} is newer", dir2_name)),
188 Some(Ordering::Equal) => has_equals = true,
189 None => {}
190 }
191 match self.size_comparison {
192 Some(Ordering::Greater) => parts.push(format!("Size of {} is larger", dir1_name)),
193 Some(Ordering::Less) => parts.push(format!("Size of {} is larger", dir2_name)),
194 Some(Ordering::Equal) => has_equals = true,
195 None => {}
196 }
197 match self.is_content_same {
198 Some(false) => parts.push("Contents differ".to_string()),
199 Some(true) => has_equals = true,
200 None => {}
201 }
202
203 if parts.is_empty() {
204 if !has_equals {
205 return "Unknown".to_string();
206 }
207 return "Identical".to_string();
208 }
209 parts.join(", ")
210 }
211}
212
213#[cfg(test)]
214mod tests {
215 use super::*;
216
217 fn check_compare(content1: &[u8], content2: &[u8], expected: bool) -> io::Result<()> {
218 let dir1 = tempfile::tempdir()?;
219 let dir2 = tempfile::tempdir()?;
220 let f1_path = dir1.path().join("file");
221 let f2_path = dir2.path().join("file");
222 fs::write(&f1_path, content1)?;
223 fs::write(&f2_path, content2)?;
224
225 let mut comparer = FileComparer::new(&f1_path, &f2_path);
227 comparer.buffer_size = 8192;
228 assert_eq!(comparer.compare_contents()?, expected);
229
230 comparer.buffer_size = 0;
232 assert_eq!(comparer.compare_contents()?, expected);
233
234 let hasher1 = FileHasher::new(dir1.path().to_path_buf());
236 let hasher2 = FileHasher::new(dir2.path().to_path_buf());
237 comparer.hashers = Some((&hasher1, &hasher2));
238 assert_eq!(comparer.compare_contents()?, expected);
239
240 Ok(())
241 }
242
243 #[test]
244 fn compare_contents_identical() -> io::Result<()> {
245 check_compare(b"hello world", b"hello world", true)
246 }
247
248 #[test]
249 fn compare_contents_different() -> io::Result<()> {
250 check_compare(b"hello world", b"hello rust", false)
251 }
252
253 #[test]
254 fn compare_contents_different_size() -> io::Result<()> {
255 check_compare(b"hello world", b"hello", false)
256 }
257
258 #[test]
259 fn compare_contents_empty_files() -> io::Result<()> {
260 check_compare(b"", b"", true)
261 }
262
263 #[test]
264 fn comparison_result_empty() {
265 let result = FileComparisonResult::new(PathBuf::from("test.txt"), Classification::InBoth);
266 assert!(!result.is_identical());
267 assert_eq!(result.to_string("dir1", "dir2"), "Unknown");
268 assert_eq!(result.to_symbol_string(), "= ");
269 }
270
271 #[test]
272 fn comparison_result_contents_skipped() {
273 let mut result =
274 FileComparisonResult::new(PathBuf::from("test.txt"), Classification::InBoth);
275 result.modified_time_comparison = Some(Ordering::Equal);
276 result.size_comparison = Some(Ordering::Equal);
277 assert!(result.is_identical());
278 assert_eq!(result.to_string("dir1", "dir2"), "Identical");
279 assert_eq!(result.to_symbol_string(), "===");
280 }
281}