1use crate::{
2 Classification, FileComparer, FileComparisonResult, FileHasher, FileIterator, ProgressReporter,
3};
4
5use std::cmp::Ordering;
6use std::collections::HashMap;
7use std::path::{Path, PathBuf};
8use std::sync::mpsc;
9
10#[derive(Debug, Clone)]
11enum CompareProgress {
12 StartOfComparison,
13 FileDone,
14 TotalFiles(usize),
15 Result(usize, FileComparisonResult),
16}
17
18#[derive(Default)]
19struct ComparisonSummary {
20 pub in_both: usize,
21 pub only_in_dir1: usize,
22 pub only_in_dir2: usize,
23 pub dir1_newer: usize,
24 pub dir2_newer: usize,
25 pub same_time_diff_size: usize,
26 pub same_time_size_diff_content: usize,
27}
28
29impl ComparisonSummary {
30 pub fn update(&mut self, result: &FileComparisonResult) {
31 match result.classification {
32 Classification::OnlyInDir1 => self.only_in_dir1 += 1,
33 Classification::OnlyInDir2 => self.only_in_dir2 += 1,
34 Classification::InBoth => {
35 self.in_both += 1;
36 match result.modified_time_comparison {
37 Some(Ordering::Greater) => self.dir1_newer += 1,
38 Some(Ordering::Less) => self.dir2_newer += 1,
39 _ => {
40 if result.size_comparison != Some(Ordering::Equal) {
41 self.same_time_diff_size += 1;
42 } else if result.is_content_same == Some(false) {
43 self.same_time_size_diff_content += 1;
44 }
45 }
46 }
47 }
48 }
49 }
50
51 pub fn print(
52 &self,
53 mut writer: impl std::io::Write,
54 dir1_name: &str,
55 dir2_name: &str,
56 ) -> std::io::Result<()> {
57 writeln!(writer, "Files in both: {}", self.in_both)?;
58 writeln!(writer, "Files only in {}: {}", dir1_name, self.only_in_dir1)?;
59 writeln!(writer, "Files only in {}: {}", dir2_name, self.only_in_dir2)?;
60 writeln!(
61 writer,
62 "Files in both ({} is newer): {}",
63 dir1_name, self.dir1_newer
64 )?;
65 writeln!(
66 writer,
67 "Files in both ({} is newer): {}",
68 dir2_name, self.dir2_newer
69 )?;
70 writeln!(
71 writer,
72 "Files in both (same time, different size): {}",
73 self.same_time_diff_size
74 )?;
75 writeln!(
76 writer,
77 "Files in both (same time and size, different content): {}",
78 self.same_time_size_diff_content
79 )?;
80 Ok(())
81 }
82}
83
84#[derive(Debug, Clone, Copy, PartialEq, Eq)]
86pub enum FileComparisonMethod {
87 Size,
89 Hash,
91 Rehash,
93 Full,
95}
96
97pub struct DirectoryComparer {
99 dir1: PathBuf,
100 dir2: PathBuf,
101 pub is_symbols_format: bool,
102 pub buffer_size: usize,
103 pub comparison_method: FileComparisonMethod,
104}
105
106impl DirectoryComparer {
107 pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
109 Self {
110 dir1,
111 dir2,
112 is_symbols_format: false,
113 buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
114 comparison_method: FileComparisonMethod::Hash,
115 }
116 }
117
118 pub fn set_max_threads(parallel: usize) -> anyhow::Result<()> {
121 rayon::ThreadPoolBuilder::new()
122 .num_threads(parallel)
123 .build_global()
124 .map_err(|e| anyhow::anyhow!("Failed to initialize thread pool: {}", e))?;
125 Ok(())
126 }
127
128 pub fn run(&self) -> anyhow::Result<()> {
131 if self.dir1.is_file() {
132 return self.run_file_comparer();
133 }
134
135 let progress = ProgressReporter::new();
136 progress.set_message("Scanning directories...");
137 let start_time = std::time::Instant::now();
138 let mut summary = ComparisonSummary::default();
139 let dir1_str = self.dir1.to_str().unwrap_or("dir1");
140 let dir2_str = self.dir2.to_str().unwrap_or("dir2");
141 let (tx, rx) = mpsc::channel();
142 std::thread::scope(|scope| {
143 scope.spawn(move || {
144 if let Err(e) = self.compare_streaming_ordered(tx) {
145 log::error!("Error during comparison: {}", e);
146 }
147 });
148
149 while let Ok(event) = rx.recv() {
151 match event {
152 CompareProgress::StartOfComparison => {
153 progress.set_message("Comparing files...");
154 }
155 CompareProgress::TotalFiles(total_files) => {
156 progress.set_length(total_files as u64);
157 progress.set_message("");
158 }
159 CompareProgress::Result(_, result) => {
160 summary.update(&result);
161 if self.is_symbols_format {
162 progress.suspend(|| {
163 println!(
164 "{} {}",
165 result.to_symbol_string(),
166 result.relative_path.display()
167 );
168 })
169 } else if !result.is_identical() {
170 progress.suspend(|| {
171 println!(
172 "{}: {}",
173 result.relative_path.display(),
174 result.to_string(dir1_str, dir2_str)
175 );
176 });
177 }
178 }
179 CompareProgress::FileDone => progress.inc(1),
180 }
181 }
182 });
183 progress.finish();
184 eprintln!("\n--- Comparison Summary ---");
185 summary.print(&mut std::io::stderr(), dir1_str, dir2_str)?;
186 eprintln!("Comparison finished in {:?}.", start_time.elapsed());
187 Ok(())
188 }
189
190 fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
195 let (tx_unordered, rx_unordered) = mpsc::channel();
196 std::thread::scope(|scope| {
197 scope.spawn(move || {
198 if let Err(e) = self.compare_streaming_unordered(tx_unordered) {
199 log::error!("Error during unordered comparison: {}", e);
200 }
201 });
202
203 let mut buffer = HashMap::new();
204 let mut next_index = 0;
205 for event in rx_unordered {
206 if let CompareProgress::Result(i, _) = &event {
207 let index = *i;
208 if index == next_index {
209 tx.send(event)?;
210 next_index += 1;
211 while let Some(buffered) = buffer.remove(&next_index) {
212 tx.send(buffered)?;
213 next_index += 1;
214 }
215 } else {
216 buffer.insert(index, event);
217 }
218 } else {
219 tx.send(event)?;
220 }
221 }
222 Ok::<(), anyhow::Error>(())
223 })?;
224 Ok(())
225 }
226
227 fn compare_streaming_unordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
228 let mut it1 = FileIterator::new(self.dir1.clone());
229 let mut it2 = FileIterator::new(self.dir2.clone());
230 let hashers = self.get_hashers(&self.dir1, &self.dir2)?;
231 if let Some((h1, h2)) = &hashers {
232 it1.hasher = Some(h1);
233 it2.hasher = Some(h2);
234 if self.comparison_method == FileComparisonMethod::Rehash {
235 h1.clear_cache()?;
236 h2.clear_cache()?;
237 }
238 }
239
240 let mut cur1 = it1.next();
241 let mut cur2 = it2.next();
242 let mut index = 0;
243 tx.send(CompareProgress::StartOfComparison)?;
244 rayon::scope(|scope| {
245 loop {
246 let cmp = match (&cur1, &cur2) {
247 (Some((rel1, _)), Some((rel2, _))) => rel1.cmp(rel2),
248 (Some(_), None) => Ordering::Less,
249 (None, Some(_)) => Ordering::Greater,
250 (None, None) => break,
251 };
252 match cmp {
253 Ordering::Less => {
254 let (rel1, _) = cur1.take().unwrap();
255 let result = FileComparisonResult::new(rel1, Classification::OnlyInDir1);
256 tx.send(CompareProgress::Result(index, result))?;
257 tx.send(CompareProgress::FileDone)?;
258 index += 1;
259 cur1 = it1.next();
260 }
261 Ordering::Greater => {
262 let (rel2, _) = cur2.take().unwrap();
263 let result = FileComparisonResult::new(rel2, Classification::OnlyInDir2);
264 tx.send(CompareProgress::Result(index, result))?;
265 tx.send(CompareProgress::FileDone)?;
266 index += 1;
267 cur2 = it2.next();
268 }
269 Ordering::Equal => {
270 let (rel_path, path1) = cur1.take().unwrap();
271 let (_, path2) = cur2.take().unwrap();
272 let buffer_size = self.buffer_size;
273 let tx_clone = tx.clone();
274 let i = index;
275 let should_compare = self.comparison_method != FileComparisonMethod::Size;
276 let hashers_ref = hashers.as_ref();
277 scope.spawn(move |_| {
278 let mut comparer = FileComparer::new(&path1, &path2);
279 comparer.buffer_size = buffer_size;
280 if let Some((h1, h2)) = hashers_ref {
281 comparer.hashers = Some((h1, h2));
282 }
283 let mut result =
284 FileComparisonResult::new(rel_path.clone(), Classification::InBoth);
285 if let Err(error) = result.update(&comparer, should_compare) {
286 log::error!("Error during comparison of {:?}: {}", rel_path, error);
287 }
288 if tx_clone.send(CompareProgress::Result(i, result)).is_err()
289 || tx_clone.send(CompareProgress::FileDone).is_err()
290 {
291 log::error!("Send failed during comparison of {:?}", rel_path);
292 }
293 });
294 index += 1;
295 cur1 = it1.next();
296 cur2 = it2.next();
297 }
298 }
299 }
300 tx.send(CompareProgress::TotalFiles(index))
301 })?;
302 Self::save_hashers(hashers)?;
303 Ok(())
304 }
305
306 fn get_hashers(
307 &self,
308 dir1: &Path,
309 dir2: &Path,
310 ) -> anyhow::Result<Option<(FileHasher, FileHasher)>> {
311 if self.comparison_method == FileComparisonMethod::Hash
312 || self.comparison_method == FileComparisonMethod::Rehash
313 {
314 let (h1, h2) = rayon::join(
315 || FileHasher::new(dir1.to_path_buf()),
316 || FileHasher::new(dir2.to_path_buf()),
317 );
318 return Ok(Some((h1, h2)));
319 }
320 Ok(None)
321 }
322
323 fn save_hashers(hashers: Option<(FileHasher, FileHasher)>) -> anyhow::Result<()> {
324 if let Some((h1, h2)) = hashers {
325 let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
326 r1?;
327 r2?;
328 }
329 Ok(())
330 }
331
332 fn run_file_comparer(&self) -> anyhow::Result<()> {
333 assert!(self.dir1.is_file());
334 let file1 = &self.dir1;
335 let dir1 = file1.parent().unwrap();
336 let file1_name = file1.file_name().unwrap();
337 let (dir2, file2) = if self.dir2.is_file() {
338 (self.dir2.parent().unwrap(), self.dir2.clone())
339 } else {
340 (self.dir2.as_path(), self.dir2.join(file1_name))
341 };
342
343 let mut comparer = FileComparer::new(file1, &file2);
344 comparer.buffer_size = self.buffer_size;
345 let hashers = self.get_hashers(dir1, dir2)?;
346 if let Some((h1, h2)) = &hashers {
347 if self.comparison_method == FileComparisonMethod::Rehash {
348 h1.remove_cache_entry(file1)?;
349 h2.remove_cache_entry(&file2)?;
350 }
351 comparer.hashers = Some((h1, h2));
352 }
353 let mut result = FileComparisonResult::new(PathBuf::new(), Classification::InBoth);
354 let should_compare_content = self.comparison_method != FileComparisonMethod::Size;
355 result.update(&comparer, should_compare_content)?;
356 let file1_str = file1.to_str().unwrap_or("file1");
357 if self.is_symbols_format {
358 println!("{} {}", result.to_symbol_string(), file1_str);
359 } else {
360 let file2_str = file2.to_str().unwrap_or("file2");
361 println!("{}: {}", file1_str, result.to_string(file1_str, file2_str));
362 }
363 Self::save_hashers(hashers)?;
364 Ok(())
365 }
366}
367
368#[cfg(test)]
369mod tests {
370 use super::*;
371 use std::fs;
372 use std::io::Write;
373
374 #[test]
375 fn test_comparison_summary() {
376 let mut summary = ComparisonSummary::default();
377 let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
378 let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
379 let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
380 res3.modified_time_comparison = Some(Ordering::Greater);
381
382 summary.update(&res1);
383 summary.update(&res2);
384 summary.update(&res3);
385
386 assert_eq!(summary.only_in_dir1, 1);
387 assert_eq!(summary.only_in_dir2, 1);
388 assert_eq!(summary.in_both, 1);
389 assert_eq!(summary.dir1_newer, 1);
390 }
391
392 #[test]
393 fn test_directory_comparer_integration() -> anyhow::Result<()> {
394 let dir1 = tempfile::tempdir()?;
395 let dir2 = tempfile::tempdir()?;
396
397 let file1_path = dir1.path().join("same.txt");
399 let mut file1 = fs::File::create(&file1_path)?;
400 file1.write_all(b"same content")?;
401
402 let only1_path = dir1.path().join("only1.txt");
403 let mut only1 = fs::File::create(&only1_path)?;
404 only1.write_all(b"only in dir1")?;
405
406 let file2_path = dir2.path().join("same.txt");
408 let mut file2 = fs::File::create(&file2_path)?;
409 file2.write_all(b"same content")?;
410
411 let only2_path = dir2.path().join("only2.txt");
412 let mut only2 = fs::File::create(&only2_path)?;
413 only2.write_all(b"only in dir2")?;
414
415 let diff1_path = dir1.path().join("diff.txt");
417 let mut diff1 = fs::File::create(&diff1_path)?;
418 diff1.write_all(b"content 1")?;
419
420 let diff2_path = dir2.path().join("diff.txt");
421 let mut diff2 = fs::File::create(&diff2_path)?;
422 diff2.write_all(b"content 222")?; let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
425 let (tx, rx) = mpsc::channel();
426
427 comparer.compare_streaming_ordered(tx)?;
428
429 let mut results = Vec::new();
430 while let Ok(res) = rx.recv() {
431 if let CompareProgress::Result(_, r) = res {
432 results.push(r);
433 }
434 }
435
436 results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
437
438 assert_eq!(results.len(), 4);
439
440 assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
442 assert_eq!(results[0].classification, Classification::InBoth);
443 assert!(
444 results[0].is_content_same == Some(false)
445 || results[0].size_comparison != Some(Ordering::Equal)
446 );
447
448 assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
450 assert_eq!(results[1].classification, Classification::OnlyInDir1);
451
452 assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
454 assert_eq!(results[2].classification, Classification::OnlyInDir2);
455
456 assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
458 assert_eq!(results[3].classification, Classification::InBoth);
459 assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
460
461 Ok(())
462 }
463
464 #[test]
465 fn test_directory_comparer_size_mode() -> anyhow::Result<()> {
466 let dir1 = tempfile::tempdir()?;
467 let dir2 = tempfile::tempdir()?;
468
469 let file1_path = dir1.path().join("file.txt");
470 let mut file1 = fs::File::create(&file1_path)?;
471 file1.write_all(b"content 1")?;
472
473 let file2_path = dir2.path().join("file.txt");
474 let mut file2 = fs::File::create(&file2_path)?;
475 file2.write_all(b"content 2")?; let mut comparer =
478 DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
479 comparer.comparison_method = FileComparisonMethod::Size;
480 let (tx, rx) = mpsc::channel();
481
482 comparer.compare_streaming_ordered(tx)?;
483
484 let mut results = Vec::new();
485 while let Ok(res) = rx.recv() {
486 if let CompareProgress::Result(_, r) = res {
487 results.push(r);
488 }
489 }
490
491 assert_eq!(results.len(), 1);
492 assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
493 assert_eq!(results[0].classification, Classification::InBoth);
494 assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
495 assert_eq!(results[0].is_content_same, None);
496
497 Ok(())
498 }
499}