1use crate::{
2 Classification, FileComparer, FileComparisonResult, FileHasher, FileIterator, ProgressReporter,
3};
4use globset::GlobSet;
5
6use std::cmp::Ordering;
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::sync::mpsc;
10
11#[derive(Debug, Clone)]
12enum CompareProgress {
13 StartOfComparison,
14 FileDone,
15 TotalFiles(usize),
16 Result(usize, FileComparisonResult),
17}
18
19#[derive(Default)]
20struct ComparisonSummary {
21 pub in_both: usize,
22 pub only_in_dir1: usize,
23 pub only_in_dir2: usize,
24 pub dir1_newer: usize,
25 pub dir2_newer: usize,
26 pub same_time_diff_size: usize,
27 pub same_time_size_diff_content: usize,
28}
29
30impl ComparisonSummary {
31 pub fn update(&mut self, result: &FileComparisonResult) {
32 match result.classification {
33 Classification::OnlyInDir1 => self.only_in_dir1 += 1,
34 Classification::OnlyInDir2 => self.only_in_dir2 += 1,
35 Classification::InBoth => {
36 self.in_both += 1;
37 match result.modified_time_comparison {
38 Some(Ordering::Greater) => self.dir1_newer += 1,
39 Some(Ordering::Less) => self.dir2_newer += 1,
40 _ => {
41 if result.size_comparison != Some(Ordering::Equal) {
42 self.same_time_diff_size += 1;
43 } else if result.is_content_same == Some(false) {
44 self.same_time_size_diff_content += 1;
45 }
46 }
47 }
48 }
49 }
50 }
51
52 pub fn print(
53 &self,
54 mut writer: impl std::io::Write,
55 dir1_name: &str,
56 dir2_name: &str,
57 ) -> std::io::Result<()> {
58 writeln!(writer, "Files in both: {}", self.in_both)?;
59 writeln!(writer, "Files only in {}: {}", dir1_name, self.only_in_dir1)?;
60 writeln!(writer, "Files only in {}: {}", dir2_name, self.only_in_dir2)?;
61 writeln!(
62 writer,
63 "Files in both ({} is newer): {}",
64 dir1_name, self.dir1_newer
65 )?;
66 writeln!(
67 writer,
68 "Files in both ({} is newer): {}",
69 dir2_name, self.dir2_newer
70 )?;
71 writeln!(
72 writer,
73 "Files in both (same time, different size): {}",
74 self.same_time_diff_size
75 )?;
76 writeln!(
77 writer,
78 "Files in both (same time and size, different content): {}",
79 self.same_time_size_diff_content
80 )?;
81 Ok(())
82 }
83}
84
85#[derive(Debug, Clone, Copy, PartialEq, Eq)]
87pub enum FileComparisonMethod {
88 Size,
90 Hash,
92 Rehash,
94 Full,
96}
97
98pub struct DirectoryComparer {
100 dir1: PathBuf,
101 dir2: PathBuf,
102 pub is_symbols_format: bool,
103 pub buffer_size: usize,
104 pub comparison_method: FileComparisonMethod,
105 pub exclude: Option<GlobSet>,
106}
107
108impl DirectoryComparer {
109 pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
111 Self {
112 dir1,
113 dir2,
114 is_symbols_format: false,
115 buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
116 comparison_method: FileComparisonMethod::Hash,
117 exclude: None,
118 }
119 }
120
121 pub fn set_max_threads(parallel: usize) -> anyhow::Result<()> {
124 rayon::ThreadPoolBuilder::new()
125 .num_threads(parallel)
126 .build_global()
127 .map_err(|e| anyhow::anyhow!("Failed to initialize thread pool: {}", e))?;
128 Ok(())
129 }
130
131 pub fn run(&self) -> anyhow::Result<()> {
134 if self.dir1.is_file() {
135 return self.run_file_comparer();
136 }
137
138 let progress = ProgressReporter::new();
139 progress.set_message("Scanning directories...");
140 let start_time = std::time::Instant::now();
141 let mut summary = ComparisonSummary::default();
142 let dir1_str = self.dir1.to_str().unwrap_or("dir1");
143 let dir2_str = self.dir2.to_str().unwrap_or("dir2");
144 let (tx, rx) = mpsc::channel();
145 std::thread::scope(|scope| {
146 scope.spawn(move || {
147 if let Err(e) = self.compare_streaming_ordered(tx) {
148 log::error!("Error during comparison: {}", e);
149 }
150 });
151
152 while let Ok(event) = rx.recv() {
154 match event {
155 CompareProgress::StartOfComparison => {
156 progress.set_message("Comparing files...");
157 }
158 CompareProgress::TotalFiles(total_files) => {
159 progress.set_length(total_files as u64);
160 progress.set_message("");
161 }
162 CompareProgress::Result(_, result) => {
163 summary.update(&result);
164 if self.is_symbols_format {
165 progress.suspend(|| {
166 println!(
167 "{} {}",
168 result.to_symbol_string(),
169 result.relative_path.display()
170 );
171 })
172 } else if !result.is_identical() {
173 progress.suspend(|| {
174 println!(
175 "{}: {}",
176 result.relative_path.display(),
177 result.to_string(dir1_str, dir2_str)
178 );
179 });
180 }
181 }
182 CompareProgress::FileDone => progress.inc(1),
183 }
184 }
185 });
186 progress.finish();
187 eprintln!("\n--- Comparison Summary ---");
188 summary.print(&mut std::io::stderr(), dir1_str, dir2_str)?;
189 eprintln!("Comparison finished in {:?}.", start_time.elapsed());
190 Ok(())
191 }
192
193 fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
198 let (tx_unordered, rx_unordered) = mpsc::channel();
199 std::thread::scope(|scope| {
200 scope.spawn(move || {
201 if let Err(e) = self.compare_streaming_unordered(tx_unordered) {
202 log::error!("Error during unordered comparison: {}", e);
203 }
204 });
205
206 let mut buffer = HashMap::new();
207 let mut next_index = 0;
208 for event in rx_unordered {
209 if let CompareProgress::Result(i, _) = &event {
210 let index = *i;
211 if index == next_index {
212 tx.send(event)?;
213 next_index += 1;
214 while let Some(buffered) = buffer.remove(&next_index) {
215 tx.send(buffered)?;
216 next_index += 1;
217 }
218 } else {
219 buffer.insert(index, event);
220 }
221 } else {
222 tx.send(event)?;
223 }
224 }
225 Ok::<(), anyhow::Error>(())
226 })?;
227 Ok(())
228 }
229
230 fn compare_streaming_unordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
231 let mut it1 = FileIterator::new(self.dir1.clone());
232 let mut it2 = FileIterator::new(self.dir2.clone());
233 it1.exclude = self.exclude.as_ref();
234 it2.exclude = self.exclude.as_ref();
235 let hashers = self.get_hashers(&self.dir1, &self.dir2)?;
236 if let Some((h1, h2)) = &hashers {
237 it1.hasher = Some(h1);
238 it2.hasher = Some(h2);
239 if self.comparison_method == FileComparisonMethod::Rehash {
240 h1.clear_cache()?;
241 h2.clear_cache()?;
242 }
243 }
244
245 let mut cur1 = it1.next();
246 let mut cur2 = it2.next();
247 let mut index = 0;
248 tx.send(CompareProgress::StartOfComparison)?;
249 rayon::scope(|scope| {
250 loop {
251 let cmp = match (&cur1, &cur2) {
252 (Some((rel1, _)), Some((rel2, _))) => rel1.cmp(rel2),
253 (Some(_), None) => Ordering::Less,
254 (None, Some(_)) => Ordering::Greater,
255 (None, None) => break,
256 };
257 match cmp {
258 Ordering::Less => {
259 let (rel1, _) = cur1.take().unwrap();
260 let result = FileComparisonResult::new(rel1, Classification::OnlyInDir1);
261 tx.send(CompareProgress::Result(index, result))?;
262 tx.send(CompareProgress::FileDone)?;
263 index += 1;
264 cur1 = it1.next();
265 }
266 Ordering::Greater => {
267 let (rel2, _) = cur2.take().unwrap();
268 let result = FileComparisonResult::new(rel2, Classification::OnlyInDir2);
269 tx.send(CompareProgress::Result(index, result))?;
270 tx.send(CompareProgress::FileDone)?;
271 index += 1;
272 cur2 = it2.next();
273 }
274 Ordering::Equal => {
275 let (rel_path, path1) = cur1.take().unwrap();
276 let (_, path2) = cur2.take().unwrap();
277 let buffer_size = self.buffer_size;
278 let tx_clone = tx.clone();
279 let i = index;
280 let should_compare = self.comparison_method != FileComparisonMethod::Size;
281 let hashers_ref = hashers.as_ref();
282 scope.spawn(move |_| {
283 let mut comparer = FileComparer::new(&path1, &path2);
284 comparer.buffer_size = buffer_size;
285 if let Some((h1, h2)) = hashers_ref {
286 comparer.hashers = Some((h1, h2));
287 }
288 let mut result =
289 FileComparisonResult::new(rel_path.clone(), Classification::InBoth);
290 if let Err(error) = result.update(&comparer, should_compare) {
291 log::error!("Error during comparison of {:?}: {}", rel_path, error);
292 }
293 if tx_clone.send(CompareProgress::Result(i, result)).is_err()
294 || tx_clone.send(CompareProgress::FileDone).is_err()
295 {
296 log::error!("Send failed during comparison of {:?}", rel_path);
297 }
298 });
299 index += 1;
300 cur1 = it1.next();
301 cur2 = it2.next();
302 }
303 }
304 }
305 tx.send(CompareProgress::TotalFiles(index))
306 })?;
307 Self::save_hashers(hashers)?;
308 Ok(())
309 }
310
311 fn get_hashers(
312 &self,
313 dir1: &Path,
314 dir2: &Path,
315 ) -> anyhow::Result<Option<(FileHasher, FileHasher)>> {
316 if self.comparison_method == FileComparisonMethod::Hash
317 || self.comparison_method == FileComparisonMethod::Rehash
318 {
319 let (h1, h2) = rayon::join(
320 || FileHasher::new(dir1.to_path_buf()),
321 || FileHasher::new(dir2.to_path_buf()),
322 );
323 return Ok(Some((h1, h2)));
324 }
325 Ok(None)
326 }
327
328 fn save_hashers(hashers: Option<(FileHasher, FileHasher)>) -> anyhow::Result<()> {
329 if let Some((h1, h2)) = hashers {
330 let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
331 r1?;
332 r2?;
333 }
334 Ok(())
335 }
336
337 fn run_file_comparer(&self) -> anyhow::Result<()> {
338 assert!(self.dir1.is_file());
339 let file1 = &self.dir1;
340 let dir1 = file1.parent().unwrap();
341 let file1_name = file1.file_name().unwrap();
342 let (dir2, file2) = if self.dir2.is_file() {
343 (self.dir2.parent().unwrap(), self.dir2.clone())
344 } else {
345 (self.dir2.as_path(), self.dir2.join(file1_name))
346 };
347
348 let mut comparer = FileComparer::new(file1, &file2);
349 comparer.buffer_size = self.buffer_size;
350 let hashers = self.get_hashers(dir1, dir2)?;
351 if let Some((h1, h2)) = &hashers {
352 if self.comparison_method == FileComparisonMethod::Rehash {
353 h1.remove_cache_entry(file1)?;
354 h2.remove_cache_entry(&file2)?;
355 }
356 comparer.hashers = Some((h1, h2));
357 }
358 let mut result = FileComparisonResult::new(PathBuf::new(), Classification::InBoth);
359 let should_compare_content = self.comparison_method != FileComparisonMethod::Size;
360 result.update(&comparer, should_compare_content)?;
361 let file1_str = file1.to_str().unwrap_or("file1");
362 if self.is_symbols_format {
363 println!("{} {}", result.to_symbol_string(), file1_str);
364 } else {
365 let file2_str = file2.to_str().unwrap_or("file2");
366 println!("{}: {}", file1_str, result.to_string(file1_str, file2_str));
367 }
368 Self::save_hashers(hashers)?;
369 Ok(())
370 }
371}
372
373#[cfg(test)]
374mod tests {
375 use super::*;
376 use std::fs;
377 use std::io::Write;
378
379 #[test]
380 fn test_comparison_summary() {
381 let mut summary = ComparisonSummary::default();
382 let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
383 let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
384 let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
385 res3.modified_time_comparison = Some(Ordering::Greater);
386
387 summary.update(&res1);
388 summary.update(&res2);
389 summary.update(&res3);
390
391 assert_eq!(summary.only_in_dir1, 1);
392 assert_eq!(summary.only_in_dir2, 1);
393 assert_eq!(summary.in_both, 1);
394 assert_eq!(summary.dir1_newer, 1);
395 }
396
397 #[test]
398 fn test_directory_comparer_integration() -> anyhow::Result<()> {
399 let dir1 = tempfile::tempdir()?;
400 let dir2 = tempfile::tempdir()?;
401
402 let file1_path = dir1.path().join("same.txt");
404 let mut file1 = fs::File::create(&file1_path)?;
405 file1.write_all(b"same content")?;
406
407 let only1_path = dir1.path().join("only1.txt");
408 let mut only1 = fs::File::create(&only1_path)?;
409 only1.write_all(b"only in dir1")?;
410
411 let file2_path = dir2.path().join("same.txt");
413 let mut file2 = fs::File::create(&file2_path)?;
414 file2.write_all(b"same content")?;
415
416 let only2_path = dir2.path().join("only2.txt");
417 let mut only2 = fs::File::create(&only2_path)?;
418 only2.write_all(b"only in dir2")?;
419
420 let diff1_path = dir1.path().join("diff.txt");
422 let mut diff1 = fs::File::create(&diff1_path)?;
423 diff1.write_all(b"content 1")?;
424
425 let diff2_path = dir2.path().join("diff.txt");
426 let mut diff2 = fs::File::create(&diff2_path)?;
427 diff2.write_all(b"content 222")?; let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
430 let (tx, rx) = mpsc::channel();
431
432 comparer.compare_streaming_ordered(tx)?;
433
434 let mut results = Vec::new();
435 while let Ok(res) = rx.recv() {
436 if let CompareProgress::Result(_, r) = res {
437 results.push(r);
438 }
439 }
440
441 results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
442
443 assert_eq!(results.len(), 4);
444
445 assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
447 assert_eq!(results[0].classification, Classification::InBoth);
448 assert!(
449 results[0].is_content_same == Some(false)
450 || results[0].size_comparison != Some(Ordering::Equal)
451 );
452
453 assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
455 assert_eq!(results[1].classification, Classification::OnlyInDir1);
456
457 assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
459 assert_eq!(results[2].classification, Classification::OnlyInDir2);
460
461 assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
463 assert_eq!(results[3].classification, Classification::InBoth);
464 assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
465
466 Ok(())
467 }
468
469 #[test]
470 fn test_directory_comparer_size_mode() -> anyhow::Result<()> {
471 let dir1 = tempfile::tempdir()?;
472 let dir2 = tempfile::tempdir()?;
473
474 let file1_path = dir1.path().join("file.txt");
475 let mut file1 = fs::File::create(&file1_path)?;
476 file1.write_all(b"content 1")?;
477
478 let file2_path = dir2.path().join("file.txt");
479 let mut file2 = fs::File::create(&file2_path)?;
480 file2.write_all(b"content 2")?; let mut comparer =
483 DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
484 comparer.comparison_method = FileComparisonMethod::Size;
485 let (tx, rx) = mpsc::channel();
486
487 comparer.compare_streaming_ordered(tx)?;
488
489 let mut results = Vec::new();
490 while let Ok(res) = rx.recv() {
491 if let CompareProgress::Result(_, r) = res {
492 results.push(r);
493 }
494 }
495
496 assert_eq!(results.len(), 1);
497 assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
498 assert_eq!(results[0].classification, Classification::InBoth);
499 assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
500 assert_eq!(results[0].is_content_same, None);
501
502 Ok(())
503 }
504}