1use crate::{FileHasher, FileItem, OutputFormat, SystemTimeExt};
2use indicatif::FormattedDuration;
3use std::cmp::Ordering;
4use std::fs;
5use std::io::Read;
6use std::path::PathBuf;
7use std::time::SystemTime;
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum Classification {
12 OnlyInDir1,
14 OnlyInDir2,
16 InBoth,
18}
19
20pub struct FileComparer<'a> {
22 file1: &'a FileItem,
23 file2: &'a FileItem,
24 pub buffer_size: usize,
25 pub hashers: Option<(&'a FileHasher, &'a FileHasher)>,
26}
27
28impl<'a> FileComparer<'a> {
29 pub const DEFAULT_BUFFER_SIZE_KB: usize = 2 * 1024;
30 pub const DEFAULT_BUFFER_SIZE: usize = Self::DEFAULT_BUFFER_SIZE_KB * 1024;
31
32 pub fn new(file1: &'a FileItem, file2: &'a FileItem) -> Self {
33 Self {
34 file1,
35 file2,
36 buffer_size: Self::DEFAULT_BUFFER_SIZE,
37 hashers: None,
38 }
39 }
40
41 pub fn sizes(&self) -> (u64, u64) {
42 (self.file1.size(), self.file2.size())
43 }
44
45 pub fn modified(&self) -> (std::time::SystemTime, std::time::SystemTime) {
46 (self.file1.modified(), self.file2.modified())
47 }
48
49 pub(crate) fn compare_contents(&self) -> anyhow::Result<bool> {
50 let len1 = self.file1.size();
51 let len2 = self.file2.size();
52 if len1 != len2 {
53 return Ok(false);
54 }
55 if let Some((hasher1, hasher2)) = self.hashers {
56 let (hash1, hash2) = rayon::join(
57 || hasher1.get_hash(self.file1),
58 || hasher2.get_hash(self.file2),
59 );
60 return Ok(hash1? == hash2?);
61 }
62 if len1 == 0 {
63 return Ok(true);
66 }
67
68 let start_time = std::time::Instant::now();
69 let mut f1 = fs::File::open(self.file1.path())?;
70 let mut f2 = fs::File::open(self.file2.path())?;
71 if self.buffer_size == 0 {
72 let mmap1 = unsafe { memmap2::MmapOptions::new().map(&f1)? };
73 let mmap2 = unsafe { memmap2::MmapOptions::new().map(&f2)? };
74 let result = mmap1[..] == mmap2[..];
75 log::debug!(
76 "Compared in {}: '{}'",
77 FormattedDuration(start_time.elapsed()),
78 self.file1
79 );
80 return Ok(result);
81 }
82
83 let mut buf1 = vec![0u8; self.buffer_size];
84 let mut buf2 = vec![0u8; self.buffer_size];
85 loop {
86 let (n1, n2) = rayon::join(|| f1.read(&mut buf1), || f2.read(&mut buf2));
90 let n1 = n1?;
91 let n2 = n2?;
92 if n1 != n2 || buf1[..n1] != buf2[..n2] {
93 log::debug!(
94 "Compared in {}: '{}'",
95 FormattedDuration(start_time.elapsed()),
96 self.file1
97 );
98 return Ok(false);
99 }
100 if n1 == 0 {
101 log::debug!(
102 "Compared in {}: '{}'",
103 FormattedDuration(start_time.elapsed()),
104 self.file1
105 );
106 return Ok(true);
107 }
108 }
109 }
110}
111
112#[derive(Debug, Clone)]
114pub struct FileComparisonResult {
115 pub relative_path: PathBuf,
117 pub classification: Classification,
119 pub modified_time_comparison: Option<Ordering>,
121 pub size_comparison: Option<Ordering>,
123 pub is_content_same: Option<bool>,
125}
126
127impl FileComparisonResult {
128 pub fn new(relative_path: PathBuf, classification: Classification) -> Self {
129 Self {
130 relative_path,
131 classification,
132 modified_time_comparison: None,
133 size_comparison: None,
134 is_content_same: None,
135 }
136 }
137
138 pub fn update(
139 &mut self,
140 comparer: &FileComparer,
141 should_compare_content: bool,
142 ) -> anyhow::Result<()> {
143 let (t1, t2) = comparer.modified();
144 self.modified_time_comparison = Some(t1.cmp(&t2));
145
146 let (s1, s2) = comparer.sizes();
147 self.size_comparison = Some(s1.cmp(&s2));
148
149 if should_compare_content && s1 == s2 {
150 self.is_content_same = Some(comparer.compare_contents()?);
151 }
152 Ok(())
153 }
154
155 pub(crate) fn update_moodified(&mut self, t1: SystemTime, t2: SystemTime) {
156 self.modified_time_comparison = Some(if t1.eq_nearly(t2) {
157 Ordering::Equal
158 } else {
159 t1.cmp(&t2)
160 })
161 }
162
163 pub(crate) fn update_size(&mut self, s1: u64, s2: u64) {
164 self.size_comparison = Some(s1.cmp(&s2));
165 }
166
167 pub fn is_identical(&self) -> bool {
170 self.classification == Classification::InBoth
171 && self.modified_time_comparison == Some(Ordering::Equal)
172 && self.size_comparison == Some(Ordering::Equal)
173 && self.is_content_same != Some(false)
174 }
175
176 pub(crate) fn is_identical_content(&self) -> Option<bool> {
177 match self.size_comparison {
178 None | Some(Ordering::Equal) => self.is_content_same,
179 _ => Some(false),
180 }
181 }
182
183 pub(crate) fn print(&self, output_format: OutputFormat, dir1_name: &str, dir2_name: &str) {
184 match output_format {
185 OutputFormat::Default => {
186 if !self.is_identical() {
187 println!(
188 "{}: {}",
189 self.relative_path.display(),
190 self.to_string(dir1_name, dir2_name)
191 )
192 }
193 }
194 OutputFormat::Symbol => println!(
195 "{} {}",
196 self.to_symbol_string(),
197 self.relative_path.display()
198 ),
199 _ => unreachable!(),
200 }
201 }
202
203 pub fn to_symbol_string(&self) -> String {
204 String::from_iter([
205 match self.classification {
206 Classification::OnlyInDir1 => '>',
207 Classification::OnlyInDir2 => '<',
208 Classification::InBoth => '=',
209 },
210 match self.modified_time_comparison {
211 None => ' ',
212 Some(Ordering::Greater) => '>',
213 Some(Ordering::Less) => '<',
214 Some(Ordering::Equal) => '=',
215 },
216 match self.size_comparison {
217 None => ' ',
218 Some(Ordering::Greater) => '>',
219 Some(Ordering::Less) => '<',
220 Some(Ordering::Equal) => {
221 if self.is_content_same == Some(false) {
222 '!'
223 } else {
224 '='
225 }
226 }
227 },
228 ])
229 }
230
231 pub fn to_string(&self, dir1_name: &str, dir2_name: &str) -> String {
232 let mut parts = Vec::new();
233 match self.classification {
234 Classification::OnlyInDir1 => parts.push(format!("Only in {}", dir1_name)),
235 Classification::OnlyInDir2 => parts.push(format!("Only in {}", dir2_name)),
236 Classification::InBoth => {}
237 }
238 let mut has_equals = false;
239 match self.modified_time_comparison {
240 Some(Ordering::Greater) => parts.push(format!("{} is newer", dir1_name)),
241 Some(Ordering::Less) => parts.push(format!("{} is newer", dir2_name)),
242 Some(Ordering::Equal) => has_equals = true,
243 None => {}
244 }
245 match self.size_comparison {
246 Some(Ordering::Greater) => parts.push(format!("Size of {} is larger", dir1_name)),
247 Some(Ordering::Less) => parts.push(format!("Size of {} is larger", dir2_name)),
248 Some(Ordering::Equal) => has_equals = true,
249 None => {}
250 }
251 match self.is_content_same {
252 Some(false) => parts.push("Contents differ".to_string()),
253 Some(true) => has_equals = true,
254 None => {}
255 }
256
257 if parts.is_empty() {
258 if !has_equals {
259 return "Unknown".to_string();
260 }
261 return "Identical".to_string();
262 }
263 parts.join(", ")
264 }
265}
266
267#[cfg(test)]
268mod tests {
269 use super::*;
270
271 fn check_compare(content1: &[u8], content2: &[u8], expected: bool) -> anyhow::Result<()> {
272 let dir1 = tempfile::tempdir()?;
273 let dir2 = tempfile::tempdir()?;
274 let file1_path = dir1.path().join("file");
275 let file2_path = dir2.path().join("file");
276 fs::write(&file1_path, content1)?;
277 fs::write(&file2_path, content2)?;
278 let file1 = FileItem::try_from(file1_path.as_path())?;
279 let file2 = FileItem::try_from(file2_path.as_path())?;
280
281 let mut comparer = FileComparer::new(&file1, &file2);
283 comparer.buffer_size = 8192;
284 assert_eq!(comparer.compare_contents()?, expected);
285
286 comparer.buffer_size = 0;
288 assert_eq!(comparer.compare_contents()?, expected);
289
290 let hasher1 = FileHasher::new_with_cache(&[dir1.path()])?;
292 let hasher2 = FileHasher::new_with_cache(&[dir2.path()])?;
293 comparer.hashers = Some((&hasher1, &hasher2));
294 assert_eq!(comparer.compare_contents()?, expected);
295
296 Ok(())
297 }
298
299 #[test]
300 fn compare_contents_identical() -> anyhow::Result<()> {
301 check_compare(b"hello world", b"hello world", true)
302 }
303
304 #[test]
305 fn compare_contents_different() -> anyhow::Result<()> {
306 check_compare(b"hello world", b"hello rust", false)
307 }
308
309 #[test]
310 fn compare_contents_different_size() -> anyhow::Result<()> {
311 check_compare(b"hello world", b"hello", false)
312 }
313
314 #[test]
315 fn compare_contents_empty_files() -> anyhow::Result<()> {
316 check_compare(b"", b"", true)
317 }
318
319 #[test]
320 fn comparison_result_empty() {
321 let result = FileComparisonResult::new(PathBuf::from("test.txt"), Classification::InBoth);
322 assert!(!result.is_identical());
323 assert_eq!(result.to_string("dir1", "dir2"), "Unknown");
324 assert_eq!(result.to_symbol_string(), "= ");
325 }
326
327 #[test]
328 fn comparison_result_contents_skipped() {
329 let mut result =
330 FileComparisonResult::new(PathBuf::from("test.txt"), Classification::InBoth);
331 result.modified_time_comparison = Some(Ordering::Equal);
332 result.size_comparison = Some(Ordering::Equal);
333 assert!(result.is_identical());
334 assert_eq!(result.to_string("dir1", "dir2"), "Identical");
335 assert_eq!(result.to_symbol_string(), "===");
336 }
337}