1use crate::{
2 Classification, ColumnFormatter, FileComparer, FileComparisonResult, FileHasher, FileItem,
3 FileIterator, OutputFormat, Progress, ProgressBuilder, ProgressValue,
4};
5use globset::GlobSet;
6use indicatif::FormattedDuration;
7use std::{
8 cmp::Ordering,
9 io::{self, stdout},
10 path::{Path, PathBuf},
11 sync::{Arc, mpsc},
12 time,
13};
14
15#[derive(Debug, Clone)]
16enum CompareProgress {
17 StartOfComparison,
18 Progress(ProgressValue),
19 Total(ProgressValue),
20 Result(usize, FileComparisonResult),
21 Error,
22}
23
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub enum FileComparisonMethod {
27 Size,
29 Hash,
31 Rehash,
33 Full,
35}
36
37pub struct DirectoryComparer {
39 dir1: PathBuf,
40 dir2: PathBuf,
41 pub output_format: OutputFormat,
42 pub buffer_size: usize,
43 pub comparison_method: FileComparisonMethod,
44 pub exclude: Option<GlobSet>,
45 pub progress: Option<Arc<ProgressBuilder>>,
46 pub jobs: usize,
47}
48
49impl DirectoryComparer {
50 pub const DEFAULT_JOBS: usize = 8;
51
52 pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
54 Self {
55 dir1,
56 dir2,
57 output_format: OutputFormat::Default,
58 buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
59 comparison_method: FileComparisonMethod::Hash,
60 exclude: None,
61 progress: None,
62 jobs: Self::DEFAULT_JOBS,
63 }
64 }
65
66 pub fn run(&self) -> anyhow::Result<()> {
69 match self.output_format {
70 OutputFormat::Default | OutputFormat::Symbol => {}
71 _ => anyhow::bail!("Compare mode only supports default or symbol output format."),
72 }
73 if self.dir1.is_file() {
74 return self.run_file_comparer();
75 }
76
77 let mut progress = self
78 .progress
79 .as_ref()
80 .map(|progress| progress.add_spinner())
81 .unwrap_or_else(Progress::none);
82 progress.set_message("Scanning directories...");
83 let start_time = std::time::Instant::now();
84 let mut summary = ComparisonSummary::default();
85 let dir1_str = self.dir1.to_str().unwrap_or("dir1");
86 let dir2_str = self.dir2.to_str().unwrap_or("dir2");
87 let (tx, rx) = mpsc::channel();
88 std::thread::scope(|scope| {
89 scope.spawn(move || {
90 if let Err(e) = self.compare_streaming_ordered(tx) {
91 log::error!("Error during comparison: {}", e);
92 }
93 });
94
95 while let Ok(event) = rx.recv() {
97 match event {
98 CompareProgress::StartOfComparison => {
99 progress.set_message("Comparing files...");
100 }
101 CompareProgress::Total(total) => {
102 progress.set_length(total);
103 progress.set_message("");
104 }
105 CompareProgress::Result(_, result) => {
106 summary.update(&result);
107 progress.suspend_for(stdout(), || {
108 result.print(self.output_format, dir1_str, dir2_str)
109 });
110 }
111 CompareProgress::Progress(value) => progress.inc(value),
112 CompareProgress::Error => summary.num_errors += 1,
113 }
114 }
115 });
116 progress.finish();
117 eprintln!("\n--- Comparison Summary ---");
118 summary.print(&mut io::stderr(), &start_time, dir1_str, dir2_str)?;
119 Ok(())
120 }
121
122 fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
127 crate::sort_stream(
128 tx,
129 |tx_unordered| self.compare_streaming(tx_unordered),
130 |event| match event {
131 CompareProgress::Result(i, _) => Some(*i),
132 _ => None,
133 },
134 )
135 }
136
137 fn compare_streaming(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
138 let mut it1 = FileIterator::new(&self.dir1);
139 let mut it2 = FileIterator::new(&self.dir2);
140 it1.exclude = self.exclude.as_ref();
141 it2.exclude = self.exclude.as_ref();
142 let mut hashers = self.get_hashers(&self.dir1, &self.dir2)?;
143 if let Some((h1, h2)) = &mut hashers {
144 it1.cache = Some(h1.cache()?);
145 it2.cache = Some(h2.cache()?);
146 if self.comparison_method == FileComparisonMethod::Rehash {
147 h1.clear_cache()?;
148 h2.clear_cache()?;
149 }
150 }
151 let hashers_ref = hashers.as_ref();
152 std::thread::scope(|global_scope| {
153 let it1_rx = it1.spawn_in_scope(global_scope);
154 let it2_rx = it2.spawn_in_scope(global_scope);
155 let pool = crate::build_thread_pool(self.jobs)?;
156 pool.scope(move |scope| {
157 let mut cur1 = it1_rx.recv().ok();
158 let mut cur2 = it2_rx.recv().ok();
159 let mut index = 0;
160 let mut total = ProgressValue::default();
161 tx.send(CompareProgress::StartOfComparison)?;
162 loop {
163 let cmp = match (&cur1, &cur2) {
164 (Some(f1), Some(f2)) => {
165 let rel1 = f1.relative_path(&self.dir1);
166 let rel2 = f2.relative_path(&self.dir2);
167 rel1.cmp(rel2)
168 }
169 (Some(_), None) => Ordering::Less,
170 (None, Some(_)) => Ordering::Greater,
171 (None, None) => break,
172 };
173 match cmp {
174 Ordering::Less => {
175 let file1 = cur1.take().unwrap();
176 let rel1 = file1.relative_path(&self.dir1);
177 let size = file1.size();
178 total += ProgressValue::with_size(size);
179 let result =
180 FileComparisonResult::new(rel1.into(), Classification::OnlyInDir1);
181 tx.send(CompareProgress::Result(index, result))?;
182 tx.send(CompareProgress::Progress(ProgressValue::with_size(size)))?;
183 index += 1;
184 cur1 = it1_rx.recv().ok();
185 }
186 Ordering::Greater => {
187 let file2 = cur2.take().unwrap();
188 let rel2 = file2.relative_path(&self.dir2);
189 let size = file2.size();
190 total += ProgressValue::with_size(size);
191 let result =
192 FileComparisonResult::new(rel2.into(), Classification::OnlyInDir2);
193 tx.send(CompareProgress::Result(index, result))?;
194 tx.send(CompareProgress::Progress(ProgressValue::with_size(size)))?;
195 index += 1;
196 cur2 = it2_rx.recv().ok();
197 }
198 Ordering::Equal => {
199 let file1 = cur1.take().unwrap();
200 let file2 = cur2.take().unwrap();
201 let buffer_size = self.buffer_size;
202 let tx_clone = tx.clone();
203 let i = index;
204 let should_compare =
205 self.comparison_method != FileComparisonMethod::Size;
206 let size = file1.size();
207 total += ProgressValue::with_size(size);
208 scope.spawn(move |_| {
209 let mut comparer = FileComparer::new(&file1, &file2);
210 comparer.buffer_size = buffer_size;
211 if let Some((h1, h2)) = hashers_ref {
212 comparer.hashers = Some((h1, h2));
213 }
214 let rel_path = file1.relative_path(&self.dir1);
215 let mut result = FileComparisonResult::new(
216 rel_path.into(),
217 Classification::InBoth,
218 );
219 let event = match result.update(&comparer, should_compare) {
220 Ok(_) => CompareProgress::Result(i, result),
221 Err(error) => {
222 log::error!(
223 "Error comparing '{}': {}",
224 result.relative_path.display(),
225 error
226 );
227 CompareProgress::Error
228 }
229 };
230 if tx_clone.send(event).is_err()
231 || tx_clone
232 .send(CompareProgress::Progress(ProgressValue::with_size(
233 size,
234 )))
235 .is_err()
236 {
237 log::error!("Send failed");
238 }
239 });
240 index += 1;
241 cur1 = it1_rx.recv().ok();
242 cur2 = it2_rx.recv().ok();
243 }
244 }
245 }
246 tx.send(CompareProgress::Total(total))
247 })?;
248 Ok::<(), anyhow::Error>(())
249 })?;
250
251 Self::save_hashers(hashers)?;
252 Ok(())
253 }
254
255 fn get_hashers(
256 &self,
257 dir1: &Path,
258 dir2: &Path,
259 ) -> anyhow::Result<Option<(FileHasher, FileHasher)>> {
260 if self.comparison_method == FileComparisonMethod::Hash
261 || self.comparison_method == FileComparisonMethod::Rehash
262 {
263 let (h1_res, h2_res) = rayon::join(
264 || FileHasher::new_with_cache(&[dir1]),
265 || FileHasher::new_with_cache(&[dir2]),
266 );
267 let mut h1 = h1_res?;
268 let mut h2 = h2_res?;
269 h1.buffer_size = self.buffer_size;
270 h2.buffer_size = self.buffer_size;
271 if let Some(progress) = self.progress.as_ref() {
272 h1.progress = Some(Arc::clone(progress));
273 h2.progress = Some(Arc::clone(progress));
274 }
275 return Ok(Some((h1, h2)));
276 }
277 Ok(None)
278 }
279
280 fn save_hashers(hashers: Option<(FileHasher, FileHasher)>) -> anyhow::Result<()> {
281 if let Some((h1, h2)) = hashers {
282 let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
283 r1?;
284 r2?;
285 }
286 Ok(())
287 }
288
289 fn run_file_comparer(&self) -> anyhow::Result<()> {
290 assert!(self.dir1.is_file());
291 let file1_path = &self.dir1;
292 let dir1 = file1_path.parent().unwrap();
293 let file1_name = file1_path.file_name().unwrap();
294 let (dir2, file2_path) = if self.dir2.is_file() {
295 (self.dir2.parent().unwrap(), self.dir2.clone())
296 } else {
297 (self.dir2.as_path(), self.dir2.join(file1_name))
298 };
299 let file1 = FileItem::try_from(file1_path.as_path())?;
300 let file2 = FileItem::try_from(file2_path.as_path())?;
301 let mut comparer = FileComparer::new(&file1, &file2);
302 comparer.buffer_size = self.buffer_size;
303 let mut hashers = self.get_hashers(dir1, dir2)?;
304 if let Some((h1, h2)) = &mut hashers {
305 if self.comparison_method == FileComparisonMethod::Rehash {
306 h1.remove_cache_entry(file1_path)?;
307 h2.remove_cache_entry(&file2_path)?;
308 }
309 comparer.hashers = Some((h1, h2));
310 }
311 let mut result = FileComparisonResult::new(PathBuf::new(), Classification::InBoth);
312 let should_compare_content = self.comparison_method != FileComparisonMethod::Size;
313 result.update(&comparer, should_compare_content)?;
314 let file1_str = file1_path.to_str().unwrap_or("file1");
315 match self.output_format {
316 OutputFormat::Symbol => {
317 println!("{} {}", result.to_symbol_string(), file1_str);
318 }
319 OutputFormat::Default => {
320 let file2_str = file2_path.to_str().unwrap_or("file2");
321 println!("{}: {}", file1_str, result.to_string(file1_str, file2_str));
322 }
323 _ => unreachable!(),
324 }
325 Self::save_hashers(hashers)?;
326 Ok(())
327 }
328}
329
330#[derive(Default)]
331struct ComparisonSummary {
332 pub in_both: usize,
333 pub only_in_dir1: usize,
334 pub only_in_dir2: usize,
335 pub dir1_newer: usize,
336 pub dir2_newer: usize,
337 pub dir1_larger: usize,
338 pub dir2_larger: usize,
339 pub diff_content: usize,
340 pub not_comparable: usize,
341 pub num_errors: usize,
342}
343
344impl ComparisonSummary {
345 pub fn update(&mut self, result: &FileComparisonResult) {
346 match result.classification {
347 Classification::OnlyInDir1 => self.only_in_dir1 += 1,
348 Classification::OnlyInDir2 => self.only_in_dir2 += 1,
349 Classification::InBoth => {
350 self.in_both += 1;
351 let mut is_not_comparable = false;
352 match result.modified_time_comparison {
353 Some(Ordering::Greater) => self.dir1_newer += 1,
354 Some(Ordering::Less) => self.dir2_newer += 1,
355 Some(Ordering::Equal) => {}
356 None => is_not_comparable = true,
357 }
358 match result.size_comparison {
359 Some(Ordering::Greater) => self.dir1_larger += 1,
360 Some(Ordering::Less) => self.dir2_larger += 1,
361 Some(Ordering::Equal) => match result.is_content_same {
362 Some(false) => self.diff_content += 1,
363 Some(true) => {}
364 None => is_not_comparable = true,
365 },
366 None => is_not_comparable = true,
367 }
368 if is_not_comparable {
369 self.not_comparable += 1;
370 }
371 }
372 }
373 }
374
375 pub fn print(
376 &self,
377 mut writer: impl std::io::Write,
378 start_time: &time::Instant,
379 dir1_name: &str,
380 dir2_name: &str,
381 ) -> std::io::Result<()> {
382 let values = [
383 ("Elapsed:", 0),
384 ("Files in both:", self.in_both),
385 ("Only in left:", self.only_in_dir1),
386 ("Only in right:", self.only_in_dir2),
387 ("Left is newer:", self.dir1_newer),
388 ("Right is newer:", self.dir2_newer),
389 ("Left is larger:", self.dir1_larger),
390 ("Right is larger:", self.dir2_larger),
391 ("Different content:", self.diff_content),
392 ("Not comparable:", self.not_comparable),
393 ("Errors:", self.num_errors),
394 ];
395 let formatter = ColumnFormatter::new(values.iter().map(|(s, _)| *s));
396 formatter.write_value(&mut writer, "Left:", dir1_name)?;
397 formatter.write_value(&mut writer, "Right:", dir2_name)?;
398 formatter.write_value(
399 &mut writer,
400 values[0].0,
401 FormattedDuration(start_time.elapsed()),
402 )?;
403 formatter.write_values(&mut writer, &values[1..])?;
404 Ok(())
405 }
406}
407
408#[cfg(test)]
409mod tests {
410 use super::*;
411 use std::fs;
412 use std::io::Write;
413
414 #[test]
415 fn comparison_summary() {
416 let mut summary = ComparisonSummary::default();
417 let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
418 let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
419 let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
420 res3.modified_time_comparison = Some(Ordering::Greater);
421
422 summary.update(&res1);
423 summary.update(&res2);
424 summary.update(&res3);
425
426 assert_eq!(summary.only_in_dir1, 1);
427 assert_eq!(summary.only_in_dir2, 1);
428 assert_eq!(summary.in_both, 1);
429 assert_eq!(summary.dir1_newer, 1);
430 }
431
432 #[test]
433 fn directory_comparer_integration() -> anyhow::Result<()> {
434 let dir1 = tempfile::tempdir()?;
435 let dir2 = tempfile::tempdir()?;
436
437 let file1_path = dir1.path().join("same.txt");
439 fs::write(file1_path, b"same content")?;
440
441 let only1_path = dir1.path().join("only1.txt");
442 fs::write(only1_path, b"only in dir1")?;
443
444 let file2_path = dir2.path().join("same.txt");
446 fs::write(file2_path, b"same content")?;
447
448 let only2_path = dir2.path().join("only2.txt");
449 fs::write(only2_path, b"only in dir2")?;
450
451 let diff1_path = dir1.path().join("diff.txt");
453 fs::write(diff1_path, b"content 1")?;
454 let diff2_path = dir2.path().join("diff.txt");
455 fs::write(diff2_path, b"content 222")?; let diffc1_path = dir1.path().join("diffc.txt");
459 fs::write(diffc1_path, b"content 111")?;
460 let diffc2_path = dir2.path().join("diffc.txt");
461 fs::write(diffc2_path, b"content 222")?; let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
464 let (tx, rx) = mpsc::channel();
465 comparer.compare_streaming_ordered(tx)?;
466 let mut results = Vec::new();
467 while let Ok(res) = rx.recv() {
468 if let CompareProgress::Result(_, r) = res {
469 results.push(r);
470 }
471 }
472 results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
473 assert_eq!(results.len(), 5);
474
475 let diff_result = &results[0];
477 assert_eq!(diff_result.relative_path.to_str().unwrap(), "diff.txt");
478 assert_eq!(diff_result.classification, Classification::InBoth);
479 assert_eq!(diff_result.size_comparison, Some(Ordering::Less));
480 assert_eq!(diff_result.is_content_same, None);
481
482 let diffc_result = &results[1];
484 assert_eq!(diffc_result.relative_path.to_str().unwrap(), "diffc.txt");
485 assert_eq!(diffc_result.classification, Classification::InBoth);
486 assert_eq!(diffc_result.size_comparison, Some(Ordering::Equal));
487 assert_eq!(diffc_result.is_content_same, Some(false));
488
489 let only1_result = &results[2];
491 assert_eq!(only1_result.relative_path.to_str().unwrap(), "only1.txt");
492 assert_eq!(only1_result.classification, Classification::OnlyInDir1);
493
494 let only2_result = &results[3];
496 assert_eq!(only2_result.relative_path.to_str().unwrap(), "only2.txt");
497 assert_eq!(only2_result.classification, Classification::OnlyInDir2);
498
499 let same_result = &results[4];
501 assert_eq!(same_result.relative_path.to_str().unwrap(), "same.txt");
502 assert_eq!(same_result.classification, Classification::InBoth);
503 assert_eq!(same_result.size_comparison, Some(Ordering::Equal));
504
505 Ok(())
506 }
507
508 #[test]
509 fn directory_comparer_size_mode() -> anyhow::Result<()> {
510 let dir1 = tempfile::tempdir()?;
511 let dir2 = tempfile::tempdir()?;
512
513 let file1_path = dir1.path().join("file.txt");
514 let mut file1 = fs::File::create(&file1_path)?;
515 file1.write_all(b"content 1")?;
516
517 let file2_path = dir2.path().join("file.txt");
518 let mut file2 = fs::File::create(&file2_path)?;
519 file2.write_all(b"content 2")?; let mut comparer =
522 DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
523 comparer.comparison_method = FileComparisonMethod::Size;
524 let (tx, rx) = mpsc::channel();
525
526 comparer.compare_streaming_ordered(tx)?;
527
528 let mut results = Vec::new();
529 while let Ok(res) = rx.recv() {
530 if let CompareProgress::Result(_, r) = res {
531 results.push(r);
532 }
533 }
534
535 assert_eq!(results.len(), 1);
536 assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
537 assert_eq!(results[0].classification, Classification::InBoth);
538 assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
539 assert_eq!(results[0].is_content_same, None);
540
541 Ok(())
542 }
543}