1use crate::file_hasher::FileHasher;
2use indicatif::FormattedDuration;
3use std::cmp::Ordering;
4use std::fs;
5use std::io::{self, Read};
6use std::path::{Path, PathBuf};
7
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10pub enum Classification {
11 OnlyInDir1,
13 OnlyInDir2,
15 InBoth,
17}
18
19pub struct FileComparer<'a> {
21 path1: &'a Path,
22 path2: &'a Path,
23 pub buffer_size: usize,
24 pub hashers: Option<(&'a FileHasher, &'a FileHasher)>,
25}
26
27impl<'a> FileComparer<'a> {
28 pub const DEFAULT_BUFFER_SIZE_KB: usize = 2 * 1024;
29 pub const DEFAULT_BUFFER_SIZE: usize = Self::DEFAULT_BUFFER_SIZE_KB * 1024;
30
31 pub fn new(path1: &'a Path, path2: &'a Path) -> Self {
32 Self {
33 path1,
34 path2,
35 buffer_size: Self::DEFAULT_BUFFER_SIZE,
36 hashers: None,
37 }
38 }
39
40 pub fn metadata(&self) -> io::Result<(fs::Metadata, fs::Metadata)> {
41 let m1 = fs::metadata(self.path1)?;
42 let m2 = fs::metadata(self.path2)?;
43 Ok((m1, m2))
44 }
45
46 pub(crate) fn compare_contents(&self) -> io::Result<bool> {
47 if let Some((hasher1, hasher2)) = self.hashers {
48 let (hash1, hash2) = rayon::join(
49 || hasher1.get_hash(self.path1),
50 || hasher2.get_hash(self.path2),
51 );
52 return Ok(hash1? == hash2?);
53 }
54
55 let start_time = std::time::Instant::now();
56 let mut f1 = fs::File::open(self.path1)?;
57 let mut f2 = fs::File::open(self.path2)?;
58 if self.buffer_size == 0 {
59 let len1 = f1.metadata()?.len();
60 let len2 = f2.metadata()?.len();
61 if len1 != len2 {
62 return Ok(false);
63 }
64 if len1 == 0 {
65 return Ok(true);
66 }
67 let mmap1 = unsafe { memmap2::MmapOptions::new().map(&f1)? };
68 let mmap2 = unsafe { memmap2::MmapOptions::new().map(&f2)? };
69 let result = mmap1[..] == mmap2[..];
70 log::debug!(
71 "Compared in {}: {:?}",
72 FormattedDuration(start_time.elapsed()),
73 self.path1
74 );
75 return Ok(result);
76 }
77
78 let mut buf1 = vec![0u8; self.buffer_size];
79 let mut buf2 = vec![0u8; self.buffer_size];
80 loop {
81 let (n1, n2) = rayon::join(|| f1.read(&mut buf1), || f2.read(&mut buf2));
85 let n1 = n1?;
86 let n2 = n2?;
87 if n1 != n2 || buf1[..n1] != buf2[..n2] {
88 log::debug!(
89 "Compared in {}: {:?}",
90 FormattedDuration(start_time.elapsed()),
91 self.path1
92 );
93 return Ok(false);
94 }
95 if n1 == 0 {
96 log::debug!(
97 "Compared in {}: {:?}",
98 FormattedDuration(start_time.elapsed()),
99 self.path1
100 );
101 return Ok(true);
102 }
103 }
104 }
105}
106
107#[derive(Debug, Clone)]
109pub struct FileComparisonResult {
110 pub relative_path: PathBuf,
112 pub classification: Classification,
114 pub modified_time_comparison: Option<Ordering>,
116 pub size_comparison: Option<Ordering>,
118 pub is_content_same: Option<bool>,
120}
121
122impl FileComparisonResult {
123 pub fn new(relative_path: PathBuf, classification: Classification) -> Self {
124 Self {
125 relative_path,
126 classification,
127 modified_time_comparison: None,
128 size_comparison: None,
129 is_content_same: None,
130 }
131 }
132
133 pub fn update(
134 &mut self,
135 comparer: &FileComparer,
136 should_compare_content: bool,
137 ) -> anyhow::Result<()> {
138 let (m1, m2) = comparer.metadata()?;
139 let t1 = m1.modified()?;
140 let t2 = m2.modified()?;
141 self.modified_time_comparison = Some(t1.cmp(&t2));
142
143 let s1 = m1.len();
144 let s2 = m2.len();
145 self.size_comparison = Some(s1.cmp(&s2));
146
147 if should_compare_content && s1 == s2 {
148 self.is_content_same = Some(comparer.compare_contents()?);
149 }
150 Ok(())
151 }
152
153 pub fn is_identical(&self) -> bool {
156 self.classification == Classification::InBoth
157 && self.modified_time_comparison == Some(Ordering::Equal)
158 && self.size_comparison == Some(Ordering::Equal)
159 && self.is_content_same != Some(false)
160 }
161
162 pub fn to_symbol_string(&self) -> String {
163 String::from_iter([
164 match self.classification {
165 Classification::OnlyInDir1 => '>',
166 Classification::OnlyInDir2 => '<',
167 Classification::InBoth => '=',
168 },
169 match self.modified_time_comparison {
170 None => ' ',
171 Some(Ordering::Greater) => '>',
172 Some(Ordering::Less) => '<',
173 Some(Ordering::Equal) => '=',
174 },
175 match self.size_comparison {
176 None => ' ',
177 Some(Ordering::Greater) => '>',
178 Some(Ordering::Less) => '<',
179 Some(Ordering::Equal) => {
180 if self.is_content_same == Some(false) {
181 '!'
182 } else {
183 '='
184 }
185 }
186 },
187 ])
188 }
189
190 pub fn to_string(&self, dir1_name: &str, dir2_name: &str) -> String {
191 let mut parts = Vec::new();
192 match self.classification {
193 Classification::OnlyInDir1 => parts.push(format!("Only in {}", dir1_name)),
194 Classification::OnlyInDir2 => parts.push(format!("Only in {}", dir2_name)),
195 Classification::InBoth => {}
196 }
197 let mut has_equals = false;
198 match self.modified_time_comparison {
199 Some(Ordering::Greater) => parts.push(format!("{} is newer", dir1_name)),
200 Some(Ordering::Less) => parts.push(format!("{} is newer", dir2_name)),
201 Some(Ordering::Equal) => has_equals = true,
202 None => {}
203 }
204 match self.size_comparison {
205 Some(Ordering::Greater) => parts.push(format!("Size of {} is larger", dir1_name)),
206 Some(Ordering::Less) => parts.push(format!("Size of {} is larger", dir2_name)),
207 Some(Ordering::Equal) => has_equals = true,
208 None => {}
209 }
210 match self.is_content_same {
211 Some(false) => parts.push("Contents differ".to_string()),
212 Some(true) => has_equals = true,
213 None => {}
214 }
215
216 if parts.is_empty() {
217 if !has_equals {
218 return "Unknown".to_string();
219 }
220 return "Identical".to_string();
221 }
222 parts.join(", ")
223 }
224}
225
226#[cfg(test)]
227mod tests {
228 use super::*;
229
230 fn check_compare(content1: &[u8], content2: &[u8], expected: bool) -> anyhow::Result<()> {
231 let dir1 = tempfile::tempdir()?;
232 let dir2 = tempfile::tempdir()?;
233 let f1_path = dir1.path().join("file");
234 let f2_path = dir2.path().join("file");
235 fs::write(&f1_path, content1)?;
236 fs::write(&f2_path, content2)?;
237
238 let mut comparer = FileComparer::new(&f1_path, &f2_path);
240 comparer.buffer_size = 8192;
241 assert_eq!(comparer.compare_contents()?, expected);
242
243 comparer.buffer_size = 0;
245 assert_eq!(comparer.compare_contents()?, expected);
246
247 let hasher1 = FileHasher::new(&[dir1.path()])?;
249 let hasher2 = FileHasher::new(&[dir2.path()])?;
250 comparer.hashers = Some((&hasher1, &hasher2));
251 assert_eq!(comparer.compare_contents()?, expected);
252
253 Ok(())
254 }
255
256 #[test]
257 fn compare_contents_identical() -> anyhow::Result<()> {
258 check_compare(b"hello world", b"hello world", true)
259 }
260
261 #[test]
262 fn compare_contents_different() -> anyhow::Result<()> {
263 check_compare(b"hello world", b"hello rust", false)
264 }
265
266 #[test]
267 fn compare_contents_different_size() -> anyhow::Result<()> {
268 check_compare(b"hello world", b"hello", false)
269 }
270
271 #[test]
272 fn compare_contents_empty_files() -> anyhow::Result<()> {
273 check_compare(b"", b"", true)
274 }
275
276 #[test]
277 fn comparison_result_empty() {
278 let result = FileComparisonResult::new(PathBuf::from("test.txt"), Classification::InBoth);
279 assert!(!result.is_identical());
280 assert_eq!(result.to_string("dir1", "dir2"), "Unknown");
281 assert_eq!(result.to_symbol_string(), "= ");
282 }
283
284 #[test]
285 fn comparison_result_contents_skipped() {
286 let mut result =
287 FileComparisonResult::new(PathBuf::from("test.txt"), Classification::InBoth);
288 result.modified_time_comparison = Some(Ordering::Equal);
289 result.size_comparison = Some(Ordering::Equal);
290 assert!(result.is_identical());
291 assert_eq!(result.to_string("dir1", "dir2"), "Identical");
292 assert_eq!(result.to_symbol_string(), "===");
293 }
294}