1use crate::{
2 Classification, ColumnFormatter, FileComparer, FileComparisonResult, FileHasher, FileItem,
3 FileIterator, OutputFormat, Progress, ProgressBuilder, ProgressValue,
4};
5use globset::GlobSet;
6use indicatif::FormattedDuration;
7use std::{
8 cmp::Ordering,
9 io::{self, stdout},
10 path::{Path, PathBuf},
11 sync::{Arc, mpsc},
12 time,
13};
14
15#[derive(Debug, Clone)]
16enum CompareProgress {
17 StartOfComparison,
18 Progress(ProgressValue),
19 Total(ProgressValue),
20 Result(usize, FileComparisonResult),
21 Error,
22}
23
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub enum FileComparisonMethod {
27 Size,
29 Hash,
31 Rehash,
33 Full,
35}
36
37pub struct DirectoryComparer {
39 dir1: PathBuf,
40 dir2: PathBuf,
41 pub output_format: OutputFormat,
42 pub buffer_size: usize,
43 pub comparison_method: FileComparisonMethod,
44 pub exclude: Option<GlobSet>,
45 pub progress: Option<Arc<ProgressBuilder>>,
46 pub jobs: usize,
47}
48
49impl DirectoryComparer {
50 pub const DEFAULT_JOBS: usize = 8;
51
52 pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
54 Self {
55 dir1,
56 dir2,
57 output_format: OutputFormat::Default,
58 buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
59 comparison_method: FileComparisonMethod::Hash,
60 exclude: None,
61 progress: None,
62 jobs: Self::DEFAULT_JOBS,
63 }
64 }
65
66 pub fn run(&self) -> anyhow::Result<()> {
69 match self.output_format {
70 OutputFormat::Default | OutputFormat::Symbol => {}
71 _ => anyhow::bail!("Compare mode only supports default or symbol output format."),
72 }
73 if self.dir1.is_file() {
74 return self.run_file_comparer();
75 }
76
77 let mut progress = self
78 .progress
79 .as_ref()
80 .map(|progress| progress.add_spinner())
81 .unwrap_or_else(Progress::none);
82 progress.set_message("Scanning directories...");
83 let start_time = std::time::Instant::now();
84 let mut summary = ComparisonSummary::default();
85 let dir1_str = self.dir1.to_str().unwrap_or("dir1");
86 let dir2_str = self.dir2.to_str().unwrap_or("dir2");
87 let (tx, rx) = mpsc::channel();
88 std::thread::scope(|scope| {
89 scope.spawn(move || {
90 if let Err(e) = self.compare_streaming_ordered(tx) {
91 log::error!("Error during comparison: {}", e);
92 }
93 });
94
95 while let Ok(event) = rx.recv() {
97 match event {
98 CompareProgress::StartOfComparison => {
99 progress.set_message("Comparing files...");
100 }
101 CompareProgress::Total(total) => {
102 progress.set_length(total);
103 progress.set_message("");
104 }
105 CompareProgress::Result(_, result) => {
106 summary.update(&result);
107 match self.output_format {
108 OutputFormat::Symbol => progress.suspend_for(stdout(), || {
109 println!(
110 "{} {}",
111 result.to_symbol_string(),
112 result.relative_path.display()
113 );
114 }),
115 OutputFormat::Default => {
116 if !result.is_identical() {
117 progress.suspend_for(stdout(), || {
118 println!(
119 "{}: {}",
120 result.relative_path.display(),
121 result.to_string(dir1_str, dir2_str)
122 );
123 });
124 }
125 }
126 _ => unreachable!(),
127 }
128 }
129 CompareProgress::Progress(value) => progress.inc(value),
130 CompareProgress::Error => summary.num_errors += 1,
131 }
132 }
133 });
134 progress.finish();
135 eprintln!("\n--- Comparison Summary ---");
136 summary.print(&mut io::stderr(), &start_time, dir1_str, dir2_str)?;
137 Ok(())
138 }
139
140 fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
145 crate::sort_stream(
146 tx,
147 |tx_unordered| self.compare_streaming(tx_unordered),
148 |event| match event {
149 CompareProgress::Result(i, _) => Some(*i),
150 _ => None,
151 },
152 )
153 }
154
155 fn compare_streaming(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
156 let mut it1 = FileIterator::new(&self.dir1);
157 let mut it2 = FileIterator::new(&self.dir2);
158 it1.exclude = self.exclude.as_ref();
159 it2.exclude = self.exclude.as_ref();
160 let mut hashers = self.get_hashers(&self.dir1, &self.dir2)?;
161 if let Some((h1, h2)) = &mut hashers {
162 it1.cache = Some(h1.cache()?);
163 it2.cache = Some(h2.cache()?);
164 if self.comparison_method == FileComparisonMethod::Rehash {
165 h1.clear_cache()?;
166 h2.clear_cache()?;
167 }
168 }
169 let hashers_ref = hashers.as_ref();
170 std::thread::scope(|global_scope| {
171 let it1_rx = it1.spawn_in_scope(global_scope);
172 let it2_rx = it2.spawn_in_scope(global_scope);
173 let pool = crate::build_thread_pool(self.jobs)?;
174 pool.scope(move |scope| {
175 let mut cur1 = it1_rx.recv().ok();
176 let mut cur2 = it2_rx.recv().ok();
177 let mut index = 0;
178 let mut total = ProgressValue::default();
179 tx.send(CompareProgress::StartOfComparison)?;
180 loop {
181 let cmp = match (&cur1, &cur2) {
182 (Some(f1), Some(f2)) => {
183 let rel1 = f1.relative_path(&self.dir1);
184 let rel2 = f2.relative_path(&self.dir2);
185 rel1.cmp(rel2)
186 }
187 (Some(_), None) => Ordering::Less,
188 (None, Some(_)) => Ordering::Greater,
189 (None, None) => break,
190 };
191 match cmp {
192 Ordering::Less => {
193 let file1 = cur1.take().unwrap();
194 let rel1 = file1.relative_path(&self.dir1);
195 let size = file1.size();
196 total += ProgressValue::with_size(size);
197 let result =
198 FileComparisonResult::new(rel1.into(), Classification::OnlyInDir1);
199 tx.send(CompareProgress::Result(index, result))?;
200 tx.send(CompareProgress::Progress(ProgressValue::with_size(size)))?;
201 index += 1;
202 cur1 = it1_rx.recv().ok();
203 }
204 Ordering::Greater => {
205 let file2 = cur2.take().unwrap();
206 let rel2 = file2.relative_path(&self.dir2);
207 let size = file2.size();
208 total += ProgressValue::with_size(size);
209 let result =
210 FileComparisonResult::new(rel2.into(), Classification::OnlyInDir2);
211 tx.send(CompareProgress::Result(index, result))?;
212 tx.send(CompareProgress::Progress(ProgressValue::with_size(size)))?;
213 index += 1;
214 cur2 = it2_rx.recv().ok();
215 }
216 Ordering::Equal => {
217 let file1 = cur1.take().unwrap();
218 let file2 = cur2.take().unwrap();
219 let buffer_size = self.buffer_size;
220 let tx_clone = tx.clone();
221 let i = index;
222 let should_compare =
223 self.comparison_method != FileComparisonMethod::Size;
224 let size = file1.size();
225 total += ProgressValue::with_size(size);
226 scope.spawn(move |_| {
227 let mut comparer = FileComparer::new(&file1, &file2);
228 comparer.buffer_size = buffer_size;
229 if let Some((h1, h2)) = hashers_ref {
230 comparer.hashers = Some((h1, h2));
231 }
232 let rel_path = file1.relative_path(&self.dir1);
233 let mut result = FileComparisonResult::new(
234 rel_path.into(),
235 Classification::InBoth,
236 );
237 let event = match result.update(&comparer, should_compare) {
238 Ok(_) => CompareProgress::Result(i, result),
239 Err(error) => {
240 log::error!(
241 "Error comparing '{}': {}",
242 result.relative_path.display(),
243 error
244 );
245 CompareProgress::Error
246 }
247 };
248 if tx_clone.send(event).is_err()
249 || tx_clone
250 .send(CompareProgress::Progress(ProgressValue::with_size(
251 size,
252 )))
253 .is_err()
254 {
255 log::error!("Send failed");
256 }
257 });
258 index += 1;
259 cur1 = it1_rx.recv().ok();
260 cur2 = it2_rx.recv().ok();
261 }
262 }
263 }
264 tx.send(CompareProgress::Total(total))
265 })?;
266 Ok::<(), anyhow::Error>(())
267 })?;
268
269 Self::save_hashers(hashers)?;
270 Ok(())
271 }
272
273 fn get_hashers(
274 &self,
275 dir1: &Path,
276 dir2: &Path,
277 ) -> anyhow::Result<Option<(FileHasher, FileHasher)>> {
278 if self.comparison_method == FileComparisonMethod::Hash
279 || self.comparison_method == FileComparisonMethod::Rehash
280 {
281 let (h1_res, h2_res) = rayon::join(
282 || FileHasher::new_with_cache(&[dir1]),
283 || FileHasher::new_with_cache(&[dir2]),
284 );
285 let mut h1 = h1_res?;
286 let mut h2 = h2_res?;
287 h1.buffer_size = self.buffer_size;
288 h2.buffer_size = self.buffer_size;
289 if let Some(progress) = self.progress.as_ref() {
290 h1.progress = Some(Arc::clone(progress));
291 h2.progress = Some(Arc::clone(progress));
292 }
293 return Ok(Some((h1, h2)));
294 }
295 Ok(None)
296 }
297
298 fn save_hashers(hashers: Option<(FileHasher, FileHasher)>) -> anyhow::Result<()> {
299 if let Some((h1, h2)) = hashers {
300 let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
301 r1?;
302 r2?;
303 }
304 Ok(())
305 }
306
307 fn run_file_comparer(&self) -> anyhow::Result<()> {
308 assert!(self.dir1.is_file());
309 let file1_path = &self.dir1;
310 let dir1 = file1_path.parent().unwrap();
311 let file1_name = file1_path.file_name().unwrap();
312 let (dir2, file2_path) = if self.dir2.is_file() {
313 (self.dir2.parent().unwrap(), self.dir2.clone())
314 } else {
315 (self.dir2.as_path(), self.dir2.join(file1_name))
316 };
317 let file1 = FileItem::try_from(file1_path.as_path())?;
318 let file2 = FileItem::try_from(file2_path.as_path())?;
319 let mut comparer = FileComparer::new(&file1, &file2);
320 comparer.buffer_size = self.buffer_size;
321 let mut hashers = self.get_hashers(dir1, dir2)?;
322 if let Some((h1, h2)) = &mut hashers {
323 if self.comparison_method == FileComparisonMethod::Rehash {
324 h1.remove_cache_entry(file1_path)?;
325 h2.remove_cache_entry(&file2_path)?;
326 }
327 comparer.hashers = Some((h1, h2));
328 }
329 let mut result = FileComparisonResult::new(PathBuf::new(), Classification::InBoth);
330 let should_compare_content = self.comparison_method != FileComparisonMethod::Size;
331 result.update(&comparer, should_compare_content)?;
332 let file1_str = file1_path.to_str().unwrap_or("file1");
333 match self.output_format {
334 OutputFormat::Symbol => {
335 println!("{} {}", result.to_symbol_string(), file1_str);
336 }
337 OutputFormat::Default => {
338 let file2_str = file2_path.to_str().unwrap_or("file2");
339 println!("{}: {}", file1_str, result.to_string(file1_str, file2_str));
340 }
341 _ => unreachable!(),
342 }
343 Self::save_hashers(hashers)?;
344 Ok(())
345 }
346}
347
348#[derive(Default)]
349struct ComparisonSummary {
350 pub in_both: usize,
351 pub only_in_dir1: usize,
352 pub only_in_dir2: usize,
353 pub dir1_newer: usize,
354 pub dir2_newer: usize,
355 pub dir1_larger: usize,
356 pub dir2_larger: usize,
357 pub diff_content: usize,
358 pub not_comparable: usize,
359 pub num_errors: usize,
360}
361
362impl ComparisonSummary {
363 pub fn update(&mut self, result: &FileComparisonResult) {
364 match result.classification {
365 Classification::OnlyInDir1 => self.only_in_dir1 += 1,
366 Classification::OnlyInDir2 => self.only_in_dir2 += 1,
367 Classification::InBoth => {
368 self.in_both += 1;
369 let mut is_not_comparable = false;
370 match result.modified_time_comparison {
371 Some(Ordering::Greater) => self.dir1_newer += 1,
372 Some(Ordering::Less) => self.dir2_newer += 1,
373 Some(Ordering::Equal) => {}
374 None => is_not_comparable = true,
375 }
376 match result.size_comparison {
377 Some(Ordering::Greater) => self.dir1_larger += 1,
378 Some(Ordering::Less) => self.dir2_larger += 1,
379 Some(Ordering::Equal) => match result.is_content_same {
380 Some(false) => self.diff_content += 1,
381 Some(true) => {}
382 None => is_not_comparable = true,
383 },
384 None => is_not_comparable = true,
385 }
386 if is_not_comparable {
387 self.not_comparable += 1;
388 }
389 }
390 }
391 }
392
393 pub fn print(
394 &self,
395 mut writer: impl std::io::Write,
396 start_time: &time::Instant,
397 dir1_name: &str,
398 dir2_name: &str,
399 ) -> std::io::Result<()> {
400 let values = [
401 ("Elapsed:", 0),
402 ("Files in both:", self.in_both),
403 ("Only in left:", self.only_in_dir1),
404 ("Only in right:", self.only_in_dir2),
405 ("Left is newer:", self.dir1_newer),
406 ("Right is newer:", self.dir2_newer),
407 ("Left is larger:", self.dir1_larger),
408 ("Right is larger:", self.dir2_larger),
409 ("Different content:", self.diff_content),
410 ("Not comparable:", self.not_comparable),
411 ("Errors:", self.num_errors),
412 ];
413 let formatter = ColumnFormatter::new(values.iter().map(|(s, _)| *s));
414 formatter.write_value(&mut writer, "Left:", dir1_name)?;
415 formatter.write_value(&mut writer, "Right:", dir2_name)?;
416 formatter.write_value(
417 &mut writer,
418 values[0].0,
419 FormattedDuration(start_time.elapsed()),
420 )?;
421 formatter.write_values(&mut writer, &values[1..])?;
422 Ok(())
423 }
424}
425
426#[cfg(test)]
427mod tests {
428 use super::*;
429 use std::fs;
430 use std::io::Write;
431
432 #[test]
433 fn comparison_summary() {
434 let mut summary = ComparisonSummary::default();
435 let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
436 let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
437 let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
438 res3.modified_time_comparison = Some(Ordering::Greater);
439
440 summary.update(&res1);
441 summary.update(&res2);
442 summary.update(&res3);
443
444 assert_eq!(summary.only_in_dir1, 1);
445 assert_eq!(summary.only_in_dir2, 1);
446 assert_eq!(summary.in_both, 1);
447 assert_eq!(summary.dir1_newer, 1);
448 }
449
450 #[test]
451 fn directory_comparer_integration() -> anyhow::Result<()> {
452 let dir1 = tempfile::tempdir()?;
453 let dir2 = tempfile::tempdir()?;
454
455 let file1_path = dir1.path().join("same.txt");
457 fs::write(file1_path, b"same content")?;
458
459 let only1_path = dir1.path().join("only1.txt");
460 fs::write(only1_path, b"only in dir1")?;
461
462 let file2_path = dir2.path().join("same.txt");
464 fs::write(file2_path, b"same content")?;
465
466 let only2_path = dir2.path().join("only2.txt");
467 fs::write(only2_path, b"only in dir2")?;
468
469 let diff1_path = dir1.path().join("diff.txt");
471 fs::write(diff1_path, b"content 1")?;
472 let diff2_path = dir2.path().join("diff.txt");
473 fs::write(diff2_path, b"content 222")?; let diffc1_path = dir1.path().join("diffc.txt");
477 fs::write(diffc1_path, b"content 111")?;
478 let diffc2_path = dir2.path().join("diffc.txt");
479 fs::write(diffc2_path, b"content 222")?; let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
482 let (tx, rx) = mpsc::channel();
483 comparer.compare_streaming_ordered(tx)?;
484 let mut results = Vec::new();
485 while let Ok(res) = rx.recv() {
486 if let CompareProgress::Result(_, r) = res {
487 results.push(r);
488 }
489 }
490 results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
491 assert_eq!(results.len(), 5);
492
493 let diff_result = &results[0];
495 assert_eq!(diff_result.relative_path.to_str().unwrap(), "diff.txt");
496 assert_eq!(diff_result.classification, Classification::InBoth);
497 assert_eq!(diff_result.size_comparison, Some(Ordering::Less));
498 assert_eq!(diff_result.is_content_same, None);
499
500 let diffc_result = &results[1];
502 assert_eq!(diffc_result.relative_path.to_str().unwrap(), "diffc.txt");
503 assert_eq!(diffc_result.classification, Classification::InBoth);
504 assert_eq!(diffc_result.size_comparison, Some(Ordering::Equal));
505 assert_eq!(diffc_result.is_content_same, Some(false));
506
507 let only1_result = &results[2];
509 assert_eq!(only1_result.relative_path.to_str().unwrap(), "only1.txt");
510 assert_eq!(only1_result.classification, Classification::OnlyInDir1);
511
512 let only2_result = &results[3];
514 assert_eq!(only2_result.relative_path.to_str().unwrap(), "only2.txt");
515 assert_eq!(only2_result.classification, Classification::OnlyInDir2);
516
517 let same_result = &results[4];
519 assert_eq!(same_result.relative_path.to_str().unwrap(), "same.txt");
520 assert_eq!(same_result.classification, Classification::InBoth);
521 assert_eq!(same_result.size_comparison, Some(Ordering::Equal));
522
523 Ok(())
524 }
525
526 #[test]
527 fn directory_comparer_size_mode() -> anyhow::Result<()> {
528 let dir1 = tempfile::tempdir()?;
529 let dir2 = tempfile::tempdir()?;
530
531 let file1_path = dir1.path().join("file.txt");
532 let mut file1 = fs::File::create(&file1_path)?;
533 file1.write_all(b"content 1")?;
534
535 let file2_path = dir2.path().join("file.txt");
536 let mut file2 = fs::File::create(&file2_path)?;
537 file2.write_all(b"content 2")?; let mut comparer =
540 DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
541 comparer.comparison_method = FileComparisonMethod::Size;
542 let (tx, rx) = mpsc::channel();
543
544 comparer.compare_streaming_ordered(tx)?;
545
546 let mut results = Vec::new();
547 while let Ok(res) = rx.recv() {
548 if let CompareProgress::Result(_, r) = res {
549 results.push(r);
550 }
551 }
552
553 assert_eq!(results.len(), 1);
554 assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
555 assert_eq!(results[0].classification, Classification::InBoth);
556 assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
557 assert_eq!(results[0].is_content_same, None);
558
559 Ok(())
560 }
561}