1use crate::{FileHasher, FileItem, SystemTimeExt};
2use indicatif::FormattedDuration;
3use std::cmp::Ordering;
4use std::fs;
5use std::io::Read;
6use std::path::PathBuf;
7use std::time::SystemTime;
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum Classification {
12 OnlyInDir1,
14 OnlyInDir2,
16 InBoth,
18}
19
20pub struct FileComparer<'a> {
22 file1: &'a FileItem,
23 file2: &'a FileItem,
24 pub buffer_size: usize,
25 pub hashers: Option<(&'a FileHasher, &'a FileHasher)>,
26}
27
28impl<'a> FileComparer<'a> {
29 pub const DEFAULT_BUFFER_SIZE_KB: usize = 2 * 1024;
30 pub const DEFAULT_BUFFER_SIZE: usize = Self::DEFAULT_BUFFER_SIZE_KB * 1024;
31
32 pub fn new(file1: &'a FileItem, file2: &'a FileItem) -> Self {
33 Self {
34 file1,
35 file2,
36 buffer_size: Self::DEFAULT_BUFFER_SIZE,
37 hashers: None,
38 }
39 }
40
41 pub fn sizes(&self) -> (u64, u64) {
42 (self.file1.size(), self.file2.size())
43 }
44
45 pub fn modified(&self) -> (std::time::SystemTime, std::time::SystemTime) {
46 (self.file1.modified(), self.file2.modified())
47 }
48
49 pub(crate) fn compare_contents(&self) -> anyhow::Result<bool> {
50 let len1 = self.file1.size();
51 let len2 = self.file2.size();
52 if len1 != len2 {
53 return Ok(false);
54 }
55 if len1 == 0 {
56 return Ok(true);
57 }
58
59 if let Some((hasher1, hasher2)) = self.hashers {
60 let (hash1, hash2) = rayon::join(
61 || hasher1.get_hash(self.file1),
62 || hasher2.get_hash(self.file2),
63 );
64 return Ok(hash1? == hash2?);
65 }
66
67 let start_time = std::time::Instant::now();
68 let mut f1 = fs::File::open(self.file1.path())?;
69 let mut f2 = fs::File::open(self.file2.path())?;
70 if self.buffer_size == 0 {
71 let mmap1 = unsafe { memmap2::MmapOptions::new().map(&f1)? };
72 let mmap2 = unsafe { memmap2::MmapOptions::new().map(&f2)? };
73 let result = mmap1[..] == mmap2[..];
74 log::debug!(
75 "Compared in {}: '{}'",
76 FormattedDuration(start_time.elapsed()),
77 self.file1
78 );
79 return Ok(result);
80 }
81
82 let mut buf1 = vec![0u8; self.buffer_size];
83 let mut buf2 = vec![0u8; self.buffer_size];
84 loop {
85 let (n1, n2) = rayon::join(|| f1.read(&mut buf1), || f2.read(&mut buf2));
89 let n1 = n1?;
90 let n2 = n2?;
91 if n1 != n2 || buf1[..n1] != buf2[..n2] {
92 log::debug!(
93 "Compared in {}: '{}'",
94 FormattedDuration(start_time.elapsed()),
95 self.file1
96 );
97 return Ok(false);
98 }
99 if n1 == 0 {
100 log::debug!(
101 "Compared in {}: '{}'",
102 FormattedDuration(start_time.elapsed()),
103 self.file1
104 );
105 return Ok(true);
106 }
107 }
108 }
109}
110
111#[derive(Debug, Clone)]
113pub struct FileComparisonResult {
114 pub relative_path: PathBuf,
116 pub classification: Classification,
118 pub modified_time_comparison: Option<Ordering>,
120 pub size_comparison: Option<Ordering>,
122 pub is_content_same: Option<bool>,
124}
125
126impl FileComparisonResult {
127 pub fn new(relative_path: PathBuf, classification: Classification) -> Self {
128 Self {
129 relative_path,
130 classification,
131 modified_time_comparison: None,
132 size_comparison: None,
133 is_content_same: None,
134 }
135 }
136
137 pub fn update(
138 &mut self,
139 comparer: &FileComparer,
140 should_compare_content: bool,
141 ) -> anyhow::Result<()> {
142 let (t1, t2) = comparer.modified();
143 self.modified_time_comparison = Some(t1.cmp(&t2));
144
145 let (s1, s2) = comparer.sizes();
146 self.size_comparison = Some(s1.cmp(&s2));
147
148 if should_compare_content && s1 == s2 {
149 self.is_content_same = Some(comparer.compare_contents()?);
150 }
151 Ok(())
152 }
153
154 pub(crate) fn update_moodified(&mut self, t1: SystemTime, t2: SystemTime) {
155 self.modified_time_comparison = Some(if t1.eq_nearly(t2) {
156 Ordering::Equal
157 } else {
158 t1.cmp(&t2)
159 })
160 }
161
162 pub(crate) fn update_size(&mut self, s1: u64, s2: u64) {
163 self.size_comparison = Some(s1.cmp(&s2));
164 }
165
166 pub fn is_identical(&self) -> bool {
169 self.classification == Classification::InBoth
170 && self.modified_time_comparison == Some(Ordering::Equal)
171 && self.size_comparison == Some(Ordering::Equal)
172 && self.is_content_same != Some(false)
173 }
174
175 pub(crate) fn is_identical_content(&self) -> Option<bool> {
176 match self.size_comparison {
177 None | Some(Ordering::Equal) => self.is_content_same,
178 _ => Some(false),
179 }
180 }
181
182 pub fn to_symbol_string(&self) -> String {
183 String::from_iter([
184 match self.classification {
185 Classification::OnlyInDir1 => '>',
186 Classification::OnlyInDir2 => '<',
187 Classification::InBoth => '=',
188 },
189 match self.modified_time_comparison {
190 None => ' ',
191 Some(Ordering::Greater) => '>',
192 Some(Ordering::Less) => '<',
193 Some(Ordering::Equal) => '=',
194 },
195 match self.size_comparison {
196 None => ' ',
197 Some(Ordering::Greater) => '>',
198 Some(Ordering::Less) => '<',
199 Some(Ordering::Equal) => {
200 if self.is_content_same == Some(false) {
201 '!'
202 } else {
203 '='
204 }
205 }
206 },
207 ])
208 }
209
210 pub fn to_string(&self, dir1_name: &str, dir2_name: &str) -> String {
211 let mut parts = Vec::new();
212 match self.classification {
213 Classification::OnlyInDir1 => parts.push(format!("Only in {}", dir1_name)),
214 Classification::OnlyInDir2 => parts.push(format!("Only in {}", dir2_name)),
215 Classification::InBoth => {}
216 }
217 let mut has_equals = false;
218 match self.modified_time_comparison {
219 Some(Ordering::Greater) => parts.push(format!("{} is newer", dir1_name)),
220 Some(Ordering::Less) => parts.push(format!("{} is newer", dir2_name)),
221 Some(Ordering::Equal) => has_equals = true,
222 None => {}
223 }
224 match self.size_comparison {
225 Some(Ordering::Greater) => parts.push(format!("Size of {} is larger", dir1_name)),
226 Some(Ordering::Less) => parts.push(format!("Size of {} is larger", dir2_name)),
227 Some(Ordering::Equal) => has_equals = true,
228 None => {}
229 }
230 match self.is_content_same {
231 Some(false) => parts.push("Contents differ".to_string()),
232 Some(true) => has_equals = true,
233 None => {}
234 }
235
236 if parts.is_empty() {
237 if !has_equals {
238 return "Unknown".to_string();
239 }
240 return "Identical".to_string();
241 }
242 parts.join(", ")
243 }
244}
245
246#[cfg(test)]
247mod tests {
248 use super::*;
249
250 fn check_compare(content1: &[u8], content2: &[u8], expected: bool) -> anyhow::Result<()> {
251 let dir1 = tempfile::tempdir()?;
252 let dir2 = tempfile::tempdir()?;
253 let file1_path = dir1.path().join("file");
254 let file2_path = dir2.path().join("file");
255 fs::write(&file1_path, content1)?;
256 fs::write(&file2_path, content2)?;
257 let file1 = FileItem::try_from(file1_path.as_path())?;
258 let file2 = FileItem::try_from(file2_path.as_path())?;
259
260 let mut comparer = FileComparer::new(&file1, &file2);
262 comparer.buffer_size = 8192;
263 assert_eq!(comparer.compare_contents()?, expected);
264
265 comparer.buffer_size = 0;
267 assert_eq!(comparer.compare_contents()?, expected);
268
269 let hasher1 = FileHasher::new_with_cache(&[dir1.path()])?;
271 let hasher2 = FileHasher::new_with_cache(&[dir2.path()])?;
272 comparer.hashers = Some((&hasher1, &hasher2));
273 assert_eq!(comparer.compare_contents()?, expected);
274
275 Ok(())
276 }
277
278 #[test]
279 fn compare_contents_identical() -> anyhow::Result<()> {
280 check_compare(b"hello world", b"hello world", true)
281 }
282
283 #[test]
284 fn compare_contents_different() -> anyhow::Result<()> {
285 check_compare(b"hello world", b"hello rust", false)
286 }
287
288 #[test]
289 fn compare_contents_different_size() -> anyhow::Result<()> {
290 check_compare(b"hello world", b"hello", false)
291 }
292
293 #[test]
294 fn compare_contents_empty_files() -> anyhow::Result<()> {
295 check_compare(b"", b"", true)
296 }
297
298 #[test]
299 fn comparison_result_empty() {
300 let result = FileComparisonResult::new(PathBuf::from("test.txt"), Classification::InBoth);
301 assert!(!result.is_identical());
302 assert_eq!(result.to_string("dir1", "dir2"), "Unknown");
303 assert_eq!(result.to_symbol_string(), "= ");
304 }
305
306 #[test]
307 fn comparison_result_contents_skipped() {
308 let mut result =
309 FileComparisonResult::new(PathBuf::from("test.txt"), Classification::InBoth);
310 result.modified_time_comparison = Some(Ordering::Equal);
311 result.size_comparison = Some(Ordering::Equal);
312 assert!(result.is_identical());
313 assert_eq!(result.to_string("dir1", "dir2"), "Identical");
314 assert_eq!(result.to_symbol_string(), "===");
315 }
316}