1use crate::{
2 Classification, ColumnFormatter, FileComparer, FileComparisonResult, FileHasher, FileItem,
3 FileIterator, OutputFormat, Progress, ProgressBuilder, ProgressValue,
4};
5use globset::GlobSet;
6use indicatif::FormattedDuration;
7use std::{
8 cmp::Ordering,
9 io::{self, stdout},
10 path::{Path, PathBuf},
11 sync::{Arc, mpsc},
12 time,
13};
14
15#[derive(Debug, Clone)]
16enum CompareProgress {
17 StartOfComparison,
18 Progress(ProgressValue),
19 Total(ProgressValue),
20 Result(usize, FileComparisonResult),
21 Error,
22}
23
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub enum FileComparisonMethod {
27 Size,
29 Hash,
31 Rehash,
33 Full,
35}
36
37pub struct DirectoryComparer {
39 dir1: PathBuf,
40 dir2: PathBuf,
41 pub output_format: OutputFormat,
42 pub buffer_size: usize,
43 pub comparison_method: FileComparisonMethod,
44 pub exclude: Option<GlobSet>,
45 pub progress: Option<Arc<ProgressBuilder>>,
46 pub jobs: usize,
47}
48
49impl DirectoryComparer {
50 pub const DEFAULT_JOBS: usize = 8;
51
52 pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
54 Self {
55 dir1,
56 dir2,
57 output_format: OutputFormat::Default,
58 buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
59 comparison_method: FileComparisonMethod::Hash,
60 exclude: None,
61 progress: None,
62 jobs: Self::DEFAULT_JOBS,
63 }
64 }
65
66 pub fn run(&self) -> anyhow::Result<()> {
69 match self.output_format {
70 OutputFormat::Default | OutputFormat::Symbol => {}
71 _ => anyhow::bail!("Compare mode only supports default or symbol output format."),
72 }
73 if self.dir1.is_file() {
74 return self.run_file_comparer();
75 }
76
77 let mut progress = self
78 .progress
79 .as_ref()
80 .map(|progress| progress.add_spinner())
81 .unwrap_or_else(Progress::none);
82 progress.set_message("Scanning directories...");
83 let start_time = std::time::Instant::now();
84 let mut summary = ComparisonSummary::default();
85 let dir1_str = self.dir1.to_str().unwrap_or("dir1");
86 let dir2_str = self.dir2.to_str().unwrap_or("dir2");
87 let (tx, rx) = mpsc::channel();
88 std::thread::scope(|scope| {
89 scope.spawn(move || {
90 if let Err(e) = self.compare_streaming_ordered(tx) {
91 log::error!("Error during comparison: {}", e);
92 }
93 });
94
95 while let Ok(event) = rx.recv() {
97 match event {
98 CompareProgress::StartOfComparison => {
99 progress.set_message("Comparing files...");
100 }
101 CompareProgress::Total(total) => {
102 progress.set_length(total);
103 progress.set_message("");
104 }
105 CompareProgress::Result(_, result) => {
106 summary.update(&result);
107 if !(self.output_format == OutputFormat::Default && result.is_identical()) {
108 progress.suspend_for(stdout(), || {
109 result.print(self.output_format, dir1_str, dir2_str)
110 });
111 }
112 }
113 CompareProgress::Progress(value) => progress.inc(value),
114 CompareProgress::Error => summary.num_errors += 1,
115 }
116 }
117 });
118 progress.finish();
119 eprintln!("\n--- Comparison Summary ---");
120 summary.print(&mut io::stderr(), &start_time, dir1_str, dir2_str)?;
121 Ok(())
122 }
123
124 fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
129 crate::sort_stream(
130 tx,
131 |tx_unordered| self.compare_streaming(tx_unordered),
132 |event| match event {
133 CompareProgress::Result(i, _) => Some(*i),
134 _ => None,
135 },
136 )
137 }
138
139 fn compare_streaming(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
140 let mut it1 = FileIterator::new(&self.dir1);
141 let mut it2 = FileIterator::new(&self.dir2);
142 it1.exclude = self.exclude.as_ref();
143 it2.exclude = self.exclude.as_ref();
144 let mut hashers = self.get_hashers(&self.dir1, &self.dir2)?;
145 if let Some((h1, h2)) = &mut hashers {
146 it1.cache = Some(h1.cache()?);
147 it2.cache = Some(h2.cache()?);
148 if self.comparison_method == FileComparisonMethod::Rehash {
149 h1.clear_cache()?;
150 h2.clear_cache()?;
151 }
152 }
153 let hashers_ref = hashers.as_ref();
154 std::thread::scope(|global_scope| {
155 let it1_rx = it1.spawn_in_scope(global_scope);
156 let it2_rx = it2.spawn_in_scope(global_scope);
157 let pool = crate::build_thread_pool(self.jobs)?;
158 pool.scope(move |scope| {
159 let mut cur1 = it1_rx.recv().ok();
160 let mut cur2 = it2_rx.recv().ok();
161 let mut index = 0;
162 let mut total = ProgressValue::default();
163 tx.send(CompareProgress::StartOfComparison)?;
164 loop {
165 let cmp = match (&cur1, &cur2) {
166 (Some(f1), Some(f2)) => {
167 let rel1 = f1.relative_path(&self.dir1);
168 let rel2 = f2.relative_path(&self.dir2);
169 rel1.cmp(rel2)
170 }
171 (Some(_), None) => Ordering::Less,
172 (None, Some(_)) => Ordering::Greater,
173 (None, None) => break,
174 };
175 match cmp {
176 Ordering::Less => {
177 let file1 = cur1.take().unwrap();
178 let rel1 = file1.relative_path(&self.dir1);
179 let size = file1.size();
180 total += ProgressValue::with_size(size);
181 let result =
182 FileComparisonResult::new(rel1.into(), Classification::OnlyInDir1);
183 tx.send(CompareProgress::Result(index, result))?;
184 tx.send(CompareProgress::Progress(ProgressValue::with_size(size)))?;
185 index += 1;
186 cur1 = it1_rx.recv().ok();
187 }
188 Ordering::Greater => {
189 let file2 = cur2.take().unwrap();
190 let rel2 = file2.relative_path(&self.dir2);
191 let size = file2.size();
192 total += ProgressValue::with_size(size);
193 let result =
194 FileComparisonResult::new(rel2.into(), Classification::OnlyInDir2);
195 tx.send(CompareProgress::Result(index, result))?;
196 tx.send(CompareProgress::Progress(ProgressValue::with_size(size)))?;
197 index += 1;
198 cur2 = it2_rx.recv().ok();
199 }
200 Ordering::Equal => {
201 let file1 = cur1.take().unwrap();
202 let file2 = cur2.take().unwrap();
203 let buffer_size = self.buffer_size;
204 let tx_clone = tx.clone();
205 let i = index;
206 let should_compare =
207 self.comparison_method != FileComparisonMethod::Size;
208 let size = file1.size();
209 total += ProgressValue::with_size(size);
210 scope.spawn(move |_| {
211 let mut comparer = FileComparer::new(&file1, &file2);
212 comparer.buffer_size = buffer_size;
213 if let Some((h1, h2)) = hashers_ref {
214 comparer.hashers = Some((h1, h2));
215 }
216 let rel_path = file1.relative_path(&self.dir1);
217 let mut result = FileComparisonResult::new(
218 rel_path.into(),
219 Classification::InBoth,
220 );
221 let event = match result.update(&comparer, should_compare) {
222 Ok(_) => CompareProgress::Result(i, result),
223 Err(error) => {
224 log::error!(
225 "Error comparing '{}': {}",
226 result.relative_path.display(),
227 error
228 );
229 CompareProgress::Error
230 }
231 };
232 if tx_clone.send(event).is_err()
233 || tx_clone
234 .send(CompareProgress::Progress(ProgressValue::with_size(
235 size,
236 )))
237 .is_err()
238 {
239 log::error!("Send failed");
240 }
241 });
242 index += 1;
243 cur1 = it1_rx.recv().ok();
244 cur2 = it2_rx.recv().ok();
245 }
246 }
247 }
248 tx.send(CompareProgress::Total(total))
249 })?;
250 Ok::<(), anyhow::Error>(())
251 })?;
252
253 Self::save_hashers(hashers)?;
254 Ok(())
255 }
256
257 fn get_hashers(
258 &self,
259 dir1: &Path,
260 dir2: &Path,
261 ) -> anyhow::Result<Option<(FileHasher, FileHasher)>> {
262 if self.comparison_method == FileComparisonMethod::Hash
263 || self.comparison_method == FileComparisonMethod::Rehash
264 {
265 let (h1_res, h2_res) = rayon::join(
266 || FileHasher::new_with_cache(&[dir1]),
267 || FileHasher::new_with_cache(&[dir2]),
268 );
269 let mut h1 = h1_res?;
270 let mut h2 = h2_res?;
271 h1.buffer_size = self.buffer_size;
272 h2.buffer_size = self.buffer_size;
273 if let Some(progress) = self.progress.as_ref() {
274 h1.progress = Some(Arc::clone(progress));
275 h2.progress = Some(Arc::clone(progress));
276 }
277 return Ok(Some((h1, h2)));
278 }
279 Ok(None)
280 }
281
282 fn save_hashers(hashers: Option<(FileHasher, FileHasher)>) -> anyhow::Result<()> {
283 if let Some((h1, h2)) = hashers {
284 let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
285 r1?;
286 r2?;
287 }
288 Ok(())
289 }
290
291 fn run_file_comparer(&self) -> anyhow::Result<()> {
292 assert!(self.dir1.is_file());
293 let file1_path = &self.dir1;
294 let dir1 = file1_path.parent().unwrap();
295 let file1_name = file1_path.file_name().unwrap();
296 let (dir2, file2_path) = if self.dir2.is_file() {
297 (self.dir2.parent().unwrap(), self.dir2.clone())
298 } else {
299 (self.dir2.as_path(), self.dir2.join(file1_name))
300 };
301 let file1 = FileItem::try_from(file1_path.as_path())?;
302 let file2 = FileItem::try_from(file2_path.as_path())?;
303 let mut comparer = FileComparer::new(&file1, &file2);
304 comparer.buffer_size = self.buffer_size;
305 let mut hashers = self.get_hashers(dir1, dir2)?;
306 if let Some((h1, h2)) = &mut hashers {
307 if self.comparison_method == FileComparisonMethod::Rehash {
308 h1.remove_cache_entry(file1_path)?;
309 h2.remove_cache_entry(&file2_path)?;
310 }
311 comparer.hashers = Some((h1, h2));
312 }
313 let mut result = FileComparisonResult::new(PathBuf::new(), Classification::InBoth);
314 let should_compare_content = self.comparison_method != FileComparisonMethod::Size;
315 result.update(&comparer, should_compare_content)?;
316 let file1_str = file1_path.to_str().unwrap_or("file1");
317 let file2_str = file2_path.to_str().unwrap_or("file2");
318 result.print(self.output_format, file1_str, file2_str);
319 Self::save_hashers(hashers)?;
320 Ok(())
321 }
322}
323
324#[derive(Default)]
325struct ComparisonSummary {
326 pub in_both: usize,
327 pub only_in_dir1: usize,
328 pub only_in_dir2: usize,
329 pub dir1_newer: usize,
330 pub dir2_newer: usize,
331 pub dir1_larger: usize,
332 pub dir2_larger: usize,
333 pub diff_content: usize,
334 pub not_comparable: usize,
335 pub num_errors: usize,
336}
337
338impl ComparisonSummary {
339 pub fn update(&mut self, result: &FileComparisonResult) {
340 match result.classification {
341 Classification::OnlyInDir1 => self.only_in_dir1 += 1,
342 Classification::OnlyInDir2 => self.only_in_dir2 += 1,
343 Classification::InBoth => {
344 self.in_both += 1;
345 let mut is_not_comparable = false;
346 match result.modified_time_comparison {
347 Some(Ordering::Greater) => self.dir1_newer += 1,
348 Some(Ordering::Less) => self.dir2_newer += 1,
349 Some(Ordering::Equal) => {}
350 None => is_not_comparable = true,
351 }
352 match result.size_comparison {
353 Some(Ordering::Greater) => self.dir1_larger += 1,
354 Some(Ordering::Less) => self.dir2_larger += 1,
355 Some(Ordering::Equal) => match result.is_content_same {
356 Some(false) => self.diff_content += 1,
357 Some(true) => {}
358 None => is_not_comparable = true,
359 },
360 None => is_not_comparable = true,
361 }
362 if is_not_comparable {
363 self.not_comparable += 1;
364 }
365 }
366 }
367 }
368
369 pub fn print(
370 &self,
371 mut writer: impl std::io::Write,
372 start_time: &time::Instant,
373 dir1_name: &str,
374 dir2_name: &str,
375 ) -> std::io::Result<()> {
376 let values = [
377 ("Elapsed:", 0),
378 ("Files in both:", self.in_both),
379 ("Only in left:", self.only_in_dir1),
380 ("Only in right:", self.only_in_dir2),
381 ("Left is newer:", self.dir1_newer),
382 ("Right is newer:", self.dir2_newer),
383 ("Left is larger:", self.dir1_larger),
384 ("Right is larger:", self.dir2_larger),
385 ("Different content:", self.diff_content),
386 ("Not comparable:", self.not_comparable),
387 ("Errors:", self.num_errors),
388 ];
389 let formatter = ColumnFormatter::new(values.iter().map(|(s, _)| *s));
390 formatter.write_value(&mut writer, "Left:", dir1_name)?;
391 formatter.write_value(&mut writer, "Right:", dir2_name)?;
392 formatter.write_value(
393 &mut writer,
394 values[0].0,
395 FormattedDuration(start_time.elapsed()),
396 )?;
397 formatter.write_values(&mut writer, &values[1..])?;
398 Ok(())
399 }
400}
401
402#[cfg(test)]
403mod tests {
404 use super::*;
405 use std::fs;
406 use std::io::Write;
407
408 #[test]
409 fn comparison_summary() {
410 let mut summary = ComparisonSummary::default();
411 let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
412 let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
413 let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
414 res3.modified_time_comparison = Some(Ordering::Greater);
415
416 summary.update(&res1);
417 summary.update(&res2);
418 summary.update(&res3);
419
420 assert_eq!(summary.only_in_dir1, 1);
421 assert_eq!(summary.only_in_dir2, 1);
422 assert_eq!(summary.in_both, 1);
423 assert_eq!(summary.dir1_newer, 1);
424 }
425
426 #[test]
427 fn directory_comparer_integration() -> anyhow::Result<()> {
428 let dir1 = tempfile::tempdir()?;
429 let dir2 = tempfile::tempdir()?;
430
431 let file1_path = dir1.path().join("same.txt");
433 fs::write(file1_path, b"same content")?;
434
435 let only1_path = dir1.path().join("only1.txt");
436 fs::write(only1_path, b"only in dir1")?;
437
438 let file2_path = dir2.path().join("same.txt");
440 fs::write(file2_path, b"same content")?;
441
442 let only2_path = dir2.path().join("only2.txt");
443 fs::write(only2_path, b"only in dir2")?;
444
445 let diff1_path = dir1.path().join("diff.txt");
447 fs::write(diff1_path, b"content 1")?;
448 let diff2_path = dir2.path().join("diff.txt");
449 fs::write(diff2_path, b"content 222")?; let diffc1_path = dir1.path().join("diffc.txt");
453 fs::write(diffc1_path, b"content 111")?;
454 let diffc2_path = dir2.path().join("diffc.txt");
455 fs::write(diffc2_path, b"content 222")?; let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
458 let (tx, rx) = mpsc::channel();
459 comparer.compare_streaming_ordered(tx)?;
460 let mut results = Vec::new();
461 while let Ok(res) = rx.recv() {
462 if let CompareProgress::Result(_, r) = res {
463 results.push(r);
464 }
465 }
466 results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
467 assert_eq!(results.len(), 5);
468
469 let diff_result = &results[0];
471 assert_eq!(diff_result.relative_path.to_str().unwrap(), "diff.txt");
472 assert_eq!(diff_result.classification, Classification::InBoth);
473 assert_eq!(diff_result.size_comparison, Some(Ordering::Less));
474 assert_eq!(diff_result.is_content_same, None);
475
476 let diffc_result = &results[1];
478 assert_eq!(diffc_result.relative_path.to_str().unwrap(), "diffc.txt");
479 assert_eq!(diffc_result.classification, Classification::InBoth);
480 assert_eq!(diffc_result.size_comparison, Some(Ordering::Equal));
481 assert_eq!(diffc_result.is_content_same, Some(false));
482
483 let only1_result = &results[2];
485 assert_eq!(only1_result.relative_path.to_str().unwrap(), "only1.txt");
486 assert_eq!(only1_result.classification, Classification::OnlyInDir1);
487
488 let only2_result = &results[3];
490 assert_eq!(only2_result.relative_path.to_str().unwrap(), "only2.txt");
491 assert_eq!(only2_result.classification, Classification::OnlyInDir2);
492
493 let same_result = &results[4];
495 assert_eq!(same_result.relative_path.to_str().unwrap(), "same.txt");
496 assert_eq!(same_result.classification, Classification::InBoth);
497 assert_eq!(same_result.size_comparison, Some(Ordering::Equal));
498
499 Ok(())
500 }
501
502 #[test]
503 fn directory_comparer_size_mode() -> anyhow::Result<()> {
504 let dir1 = tempfile::tempdir()?;
505 let dir2 = tempfile::tempdir()?;
506
507 let file1_path = dir1.path().join("file.txt");
508 let mut file1 = fs::File::create(&file1_path)?;
509 file1.write_all(b"content 1")?;
510
511 let file2_path = dir2.path().join("file.txt");
512 let mut file2 = fs::File::create(&file2_path)?;
513 file2.write_all(b"content 2")?; let mut comparer =
516 DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
517 comparer.comparison_method = FileComparisonMethod::Size;
518 let (tx, rx) = mpsc::channel();
519
520 comparer.compare_streaming_ordered(tx)?;
521
522 let mut results = Vec::new();
523 while let Ok(res) = rx.recv() {
524 if let CompareProgress::Result(_, r) = res {
525 results.push(r);
526 }
527 }
528
529 assert_eq!(results.len(), 1);
530 assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
531 assert_eq!(results[0].classification, Classification::InBoth);
532 assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
533 assert_eq!(results[0].is_content_same, None);
534
535 Ok(())
536 }
537}