1use crate::{FileHasher, FileItem};
2use indicatif::FormattedDuration;
3use std::cmp::Ordering;
4use std::fs;
5use std::io::Read;
6use std::path::PathBuf;
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub enum Classification {
11 OnlyInDir1,
13 OnlyInDir2,
15 InBoth,
17}
18
19pub struct FileComparer<'a> {
21 file1: &'a FileItem,
22 file2: &'a FileItem,
23 pub buffer_size: usize,
24 pub hashers: Option<(&'a FileHasher, &'a FileHasher)>,
25}
26
27impl<'a> FileComparer<'a> {
28 pub const DEFAULT_BUFFER_SIZE_KB: usize = 2 * 1024;
29 pub const DEFAULT_BUFFER_SIZE: usize = Self::DEFAULT_BUFFER_SIZE_KB * 1024;
30
31 pub fn new(file1: &'a FileItem, file2: &'a FileItem) -> Self {
32 Self {
33 file1,
34 file2,
35 buffer_size: Self::DEFAULT_BUFFER_SIZE,
36 hashers: None,
37 }
38 }
39
40 pub fn sizes(&self) -> (u64, u64) {
41 (self.file1.size(), self.file2.size())
42 }
43
44 pub fn modified(&self) -> (std::time::SystemTime, std::time::SystemTime) {
45 (self.file1.modified(), self.file2.modified())
46 }
47
48 pub(crate) fn compare_contents(&self) -> anyhow::Result<bool> {
49 let len1 = self.file1.size();
50 let len2 = self.file2.size();
51 if len1 != len2 {
52 return Ok(false);
53 }
54 if len1 == 0 {
55 return Ok(true);
56 }
57
58 if let Some((hasher1, hasher2)) = self.hashers {
59 let (hash1, hash2) = rayon::join(
60 || hasher1.get_hash(self.file1),
61 || hasher2.get_hash(self.file2),
62 );
63 return Ok(hash1? == hash2?);
64 }
65
66 let start_time = std::time::Instant::now();
67 let mut f1 = fs::File::open(self.file1.path())?;
68 let mut f2 = fs::File::open(self.file2.path())?;
69 if self.buffer_size == 0 {
70 let mmap1 = unsafe { memmap2::MmapOptions::new().map(&f1)? };
71 let mmap2 = unsafe { memmap2::MmapOptions::new().map(&f2)? };
72 let result = mmap1[..] == mmap2[..];
73 log::debug!(
74 "Compared in {}: '{}'",
75 FormattedDuration(start_time.elapsed()),
76 self.file1
77 );
78 return Ok(result);
79 }
80
81 let mut buf1 = vec![0u8; self.buffer_size];
82 let mut buf2 = vec![0u8; self.buffer_size];
83 loop {
84 let (n1, n2) = rayon::join(|| f1.read(&mut buf1), || f2.read(&mut buf2));
88 let n1 = n1?;
89 let n2 = n2?;
90 if n1 != n2 || buf1[..n1] != buf2[..n2] {
91 log::debug!(
92 "Compared in {}: '{}'",
93 FormattedDuration(start_time.elapsed()),
94 self.file1
95 );
96 return Ok(false);
97 }
98 if n1 == 0 {
99 log::debug!(
100 "Compared in {}: '{}'",
101 FormattedDuration(start_time.elapsed()),
102 self.file1
103 );
104 return Ok(true);
105 }
106 }
107 }
108}
109
110#[derive(Debug, Clone)]
112pub struct FileComparisonResult {
113 pub relative_path: PathBuf,
115 pub classification: Classification,
117 pub modified_time_comparison: Option<Ordering>,
119 pub size_comparison: Option<Ordering>,
121 pub is_content_same: Option<bool>,
123}
124
125impl FileComparisonResult {
126 pub fn new(relative_path: PathBuf, classification: Classification) -> Self {
127 Self {
128 relative_path,
129 classification,
130 modified_time_comparison: None,
131 size_comparison: None,
132 is_content_same: None,
133 }
134 }
135
136 pub fn update(
137 &mut self,
138 comparer: &FileComparer,
139 should_compare_content: bool,
140 ) -> anyhow::Result<()> {
141 let (t1, t2) = comparer.modified();
142 self.modified_time_comparison = Some(t1.cmp(&t2));
143
144 let (s1, s2) = comparer.sizes();
145 self.size_comparison = Some(s1.cmp(&s2));
146
147 if should_compare_content && s1 == s2 {
148 self.is_content_same = Some(comparer.compare_contents()?);
149 }
150 Ok(())
151 }
152
153 pub fn is_identical(&self) -> bool {
156 self.classification == Classification::InBoth
157 && self.modified_time_comparison == Some(Ordering::Equal)
158 && self.size_comparison == Some(Ordering::Equal)
159 && self.is_content_same != Some(false)
160 }
161
162 pub fn to_symbol_string(&self) -> String {
163 String::from_iter([
164 match self.classification {
165 Classification::OnlyInDir1 => '>',
166 Classification::OnlyInDir2 => '<',
167 Classification::InBoth => '=',
168 },
169 match self.modified_time_comparison {
170 None => ' ',
171 Some(Ordering::Greater) => '>',
172 Some(Ordering::Less) => '<',
173 Some(Ordering::Equal) => '=',
174 },
175 match self.size_comparison {
176 None => ' ',
177 Some(Ordering::Greater) => '>',
178 Some(Ordering::Less) => '<',
179 Some(Ordering::Equal) => {
180 if self.is_content_same == Some(false) {
181 '!'
182 } else {
183 '='
184 }
185 }
186 },
187 ])
188 }
189
190 pub fn to_string(&self, dir1_name: &str, dir2_name: &str) -> String {
191 let mut parts = Vec::new();
192 match self.classification {
193 Classification::OnlyInDir1 => parts.push(format!("Only in {}", dir1_name)),
194 Classification::OnlyInDir2 => parts.push(format!("Only in {}", dir2_name)),
195 Classification::InBoth => {}
196 }
197 let mut has_equals = false;
198 match self.modified_time_comparison {
199 Some(Ordering::Greater) => parts.push(format!("{} is newer", dir1_name)),
200 Some(Ordering::Less) => parts.push(format!("{} is newer", dir2_name)),
201 Some(Ordering::Equal) => has_equals = true,
202 None => {}
203 }
204 match self.size_comparison {
205 Some(Ordering::Greater) => parts.push(format!("Size of {} is larger", dir1_name)),
206 Some(Ordering::Less) => parts.push(format!("Size of {} is larger", dir2_name)),
207 Some(Ordering::Equal) => has_equals = true,
208 None => {}
209 }
210 match self.is_content_same {
211 Some(false) => parts.push("Contents differ".to_string()),
212 Some(true) => has_equals = true,
213 None => {}
214 }
215
216 if parts.is_empty() {
217 if !has_equals {
218 return "Unknown".to_string();
219 }
220 return "Identical".to_string();
221 }
222 parts.join(", ")
223 }
224}
225
226#[cfg(test)]
227mod tests {
228 use super::*;
229
230 fn check_compare(content1: &[u8], content2: &[u8], expected: bool) -> anyhow::Result<()> {
231 let dir1 = tempfile::tempdir()?;
232 let dir2 = tempfile::tempdir()?;
233 let file1_path = dir1.path().join("file");
234 let file2_path = dir2.path().join("file");
235 fs::write(&file1_path, content1)?;
236 fs::write(&file2_path, content2)?;
237 let file1 = FileItem::try_from(file1_path.as_path())?;
238 let file2 = FileItem::try_from(file2_path.as_path())?;
239
240 let mut comparer = FileComparer::new(&file1, &file2);
242 comparer.buffer_size = 8192;
243 assert_eq!(comparer.compare_contents()?, expected);
244
245 comparer.buffer_size = 0;
247 assert_eq!(comparer.compare_contents()?, expected);
248
249 let hasher1 = FileHasher::new_with_cache(&[dir1.path()])?;
251 let hasher2 = FileHasher::new_with_cache(&[dir2.path()])?;
252 comparer.hashers = Some((&hasher1, &hasher2));
253 assert_eq!(comparer.compare_contents()?, expected);
254
255 Ok(())
256 }
257
258 #[test]
259 fn compare_contents_identical() -> anyhow::Result<()> {
260 check_compare(b"hello world", b"hello world", true)
261 }
262
263 #[test]
264 fn compare_contents_different() -> anyhow::Result<()> {
265 check_compare(b"hello world", b"hello rust", false)
266 }
267
268 #[test]
269 fn compare_contents_different_size() -> anyhow::Result<()> {
270 check_compare(b"hello world", b"hello", false)
271 }
272
273 #[test]
274 fn compare_contents_empty_files() -> anyhow::Result<()> {
275 check_compare(b"", b"", true)
276 }
277
278 #[test]
279 fn comparison_result_empty() {
280 let result = FileComparisonResult::new(PathBuf::from("test.txt"), Classification::InBoth);
281 assert!(!result.is_identical());
282 assert_eq!(result.to_string("dir1", "dir2"), "Unknown");
283 assert_eq!(result.to_symbol_string(), "= ");
284 }
285
286 #[test]
287 fn comparison_result_contents_skipped() {
288 let mut result =
289 FileComparisonResult::new(PathBuf::from("test.txt"), Classification::InBoth);
290 result.modified_time_comparison = Some(Ordering::Equal);
291 result.size_comparison = Some(Ordering::Equal);
292 assert!(result.is_identical());
293 assert_eq!(result.to_string("dir1", "dir2"), "Identical");
294 assert_eq!(result.to_symbol_string(), "===");
295 }
296}