1use crate::{
2 Classification, ColumnFormatter, FileComparer, FileComparisonResult, FileHasher, FileItem,
3 FileIterator, OutputFormat, Progress, ProgressBuilder,
4};
5use globset::GlobSet;
6use indicatif::FormattedDuration;
7use std::{
8 cmp::Ordering,
9 io::{self, stdout},
10 path::{Path, PathBuf},
11 sync::{Arc, mpsc},
12 time,
13};
14
15#[derive(Debug, Clone)]
16enum CompareProgress {
17 StartOfComparison,
18 FileDone,
19 TotalFiles(usize),
20 Result(usize, FileComparisonResult),
21 Error,
22}
23
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub enum FileComparisonMethod {
27 Size,
29 Hash,
31 Rehash,
33 Full,
35}
36
37pub struct DirectoryComparer {
39 dir1: PathBuf,
40 dir2: PathBuf,
41 pub output_format: OutputFormat,
42 pub buffer_size: usize,
43 pub comparison_method: FileComparisonMethod,
44 pub exclude: Option<GlobSet>,
45 pub progress: Option<Arc<ProgressBuilder>>,
46 pub jobs: usize,
47}
48
49impl DirectoryComparer {
50 pub const DEFAULT_JOBS: usize = 8;
51
52 pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
54 Self {
55 dir1,
56 dir2,
57 output_format: OutputFormat::Default,
58 buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
59 comparison_method: FileComparisonMethod::Hash,
60 exclude: None,
61 progress: None,
62 jobs: Self::DEFAULT_JOBS,
63 }
64 }
65
66 pub fn run(&self) -> anyhow::Result<()> {
69 match self.output_format {
70 OutputFormat::Default | OutputFormat::Symbol => {}
71 _ => anyhow::bail!("Compare mode only supports default or symbol output format."),
72 }
73 if self.dir1.is_file() {
74 return self.run_file_comparer();
75 }
76
77 let progress = self
78 .progress
79 .as_ref()
80 .map(|progress| progress.add_spinner())
81 .unwrap_or_else(Progress::none);
82 progress.set_message("Scanning directories...");
83 let start_time = std::time::Instant::now();
84 let mut summary = ComparisonSummary::default();
85 let dir1_str = self.dir1.to_str().unwrap_or("dir1");
86 let dir2_str = self.dir2.to_str().unwrap_or("dir2");
87 let (tx, rx) = mpsc::channel();
88 std::thread::scope(|scope| {
89 scope.spawn(move || {
90 if let Err(e) = self.compare_streaming_ordered(tx) {
91 log::error!("Error during comparison: {}", e);
92 }
93 });
94
95 while let Ok(event) = rx.recv() {
97 match event {
98 CompareProgress::StartOfComparison => {
99 progress.set_message("Comparing files...");
100 }
101 CompareProgress::TotalFiles(total_files) => {
102 progress.set_length(total_files as u64);
103 progress.set_message("");
104 }
105 CompareProgress::Result(_, result) => {
106 summary.update(&result);
107 match self.output_format {
108 OutputFormat::Symbol => progress.suspend_for(stdout(), || {
109 println!(
110 "{} {}",
111 result.to_symbol_string(),
112 result.relative_path.display()
113 );
114 }),
115 OutputFormat::Default => {
116 if !result.is_identical() {
117 progress.suspend_for(stdout(), || {
118 println!(
119 "{}: {}",
120 result.relative_path.display(),
121 result.to_string(dir1_str, dir2_str)
122 );
123 });
124 }
125 }
126 _ => unreachable!(),
127 }
128 }
129 CompareProgress::FileDone => progress.inc(1),
130 CompareProgress::Error => summary.num_errors += 1,
131 }
132 }
133 });
134 progress.finish();
135 eprintln!("\n--- Comparison Summary ---");
136 summary.print(&mut io::stderr(), &start_time, dir1_str, dir2_str)?;
137 Ok(())
138 }
139
140 fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
145 crate::sort_stream(
146 tx,
147 |tx_unordered| self.compare_streaming(tx_unordered),
148 |event| match event {
149 CompareProgress::Result(i, _) => Some(*i),
150 _ => None,
151 },
152 )
153 }
154
155 fn compare_streaming(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
156 let mut it1 = FileIterator::new(&self.dir1);
157 let mut it2 = FileIterator::new(&self.dir2);
158 it1.exclude = self.exclude.as_ref();
159 it2.exclude = self.exclude.as_ref();
160 let mut hashers = self.get_hashers(&self.dir1, &self.dir2)?;
161 if let Some((h1, h2)) = &mut hashers {
162 it1.cache = Some(h1.cache()?);
163 it2.cache = Some(h2.cache()?);
164 if self.comparison_method == FileComparisonMethod::Rehash {
165 h1.clear_cache()?;
166 h2.clear_cache()?;
167 }
168 }
169 let hashers_ref = hashers.as_ref();
170 std::thread::scope(|global_scope| {
171 let it1_rx = it1.spawn_in_scope(global_scope);
172 let it2_rx = it2.spawn_in_scope(global_scope);
173 let pool = crate::build_thread_pool(self.jobs)?;
174 pool.scope(move |scope| {
175 let mut cur1 = it1_rx.recv().ok();
176 let mut cur2 = it2_rx.recv().ok();
177 let mut index = 0;
178 tx.send(CompareProgress::StartOfComparison)?;
179 loop {
180 let cmp = match (&cur1, &cur2) {
181 (Some(f1), Some(f2)) => {
182 let rel1 = f1.relative_path(&self.dir1);
183 let rel2 = f2.relative_path(&self.dir2);
184 rel1.cmp(rel2)
185 }
186 (Some(_), None) => Ordering::Less,
187 (None, Some(_)) => Ordering::Greater,
188 (None, None) => break,
189 };
190 match cmp {
191 Ordering::Less => {
192 let file1 = cur1.take().unwrap();
193 let rel1 = file1.relative_path(&self.dir1);
194 let result =
195 FileComparisonResult::new(rel1.into(), Classification::OnlyInDir1);
196 tx.send(CompareProgress::Result(index, result))?;
197 tx.send(CompareProgress::FileDone)?;
198 index += 1;
199 cur1 = it1_rx.recv().ok();
200 }
201 Ordering::Greater => {
202 let file2 = cur2.take().unwrap();
203 let rel2 = file2.relative_path(&self.dir2);
204 let result =
205 FileComparisonResult::new(rel2.into(), Classification::OnlyInDir2);
206 tx.send(CompareProgress::Result(index, result))?;
207 tx.send(CompareProgress::FileDone)?;
208 index += 1;
209 cur2 = it2_rx.recv().ok();
210 }
211 Ordering::Equal => {
212 let file1 = cur1.take().unwrap();
213 let file2 = cur2.take().unwrap();
214 let buffer_size = self.buffer_size;
215 let tx_clone = tx.clone();
216 let i = index;
217 let should_compare =
218 self.comparison_method != FileComparisonMethod::Size;
219 scope.spawn(move |_| {
220 let mut comparer = FileComparer::new(&file1, &file2);
221 comparer.buffer_size = buffer_size;
222 if let Some((h1, h2)) = hashers_ref {
223 comparer.hashers = Some((h1, h2));
224 }
225 let rel_path = file1.relative_path(&self.dir1);
226 let mut result = FileComparisonResult::new(
227 rel_path.into(),
228 Classification::InBoth,
229 );
230 let event = match result.update(&comparer, should_compare) {
231 Ok(_) => CompareProgress::Result(i, result),
232 Err(error) => {
233 log::error!(
234 "Error comparing '{}': {}",
235 result.relative_path.display(),
236 error
237 );
238 CompareProgress::Error
239 }
240 };
241 if tx_clone.send(event).is_err()
242 || tx_clone.send(CompareProgress::FileDone).is_err()
243 {
244 log::error!("Send failed");
245 }
246 });
247 index += 1;
248 cur1 = it1_rx.recv().ok();
249 cur2 = it2_rx.recv().ok();
250 }
251 }
252 }
253 tx.send(CompareProgress::TotalFiles(index))
254 })?;
255 Ok::<(), anyhow::Error>(())
256 })?;
257
258 Self::save_hashers(hashers)?;
259 Ok(())
260 }
261
262 fn get_hashers(
263 &self,
264 dir1: &Path,
265 dir2: &Path,
266 ) -> anyhow::Result<Option<(FileHasher, FileHasher)>> {
267 if self.comparison_method == FileComparisonMethod::Hash
268 || self.comparison_method == FileComparisonMethod::Rehash
269 {
270 let (h1_res, h2_res) = rayon::join(
271 || FileHasher::new_with_cache(&[dir1]),
272 || FileHasher::new_with_cache(&[dir2]),
273 );
274 let mut h1 = h1_res?;
275 let mut h2 = h2_res?;
276 h1.buffer_size = self.buffer_size;
277 h2.buffer_size = self.buffer_size;
278 if let Some(progress) = self.progress.as_ref() {
279 h1.progress = Some(Arc::clone(progress));
280 h2.progress = Some(Arc::clone(progress));
281 }
282 return Ok(Some((h1, h2)));
283 }
284 Ok(None)
285 }
286
287 fn save_hashers(hashers: Option<(FileHasher, FileHasher)>) -> anyhow::Result<()> {
288 if let Some((h1, h2)) = hashers {
289 let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
290 r1?;
291 r2?;
292 }
293 Ok(())
294 }
295
296 fn run_file_comparer(&self) -> anyhow::Result<()> {
297 assert!(self.dir1.is_file());
298 let file1_path = &self.dir1;
299 let dir1 = file1_path.parent().unwrap();
300 let file1_name = file1_path.file_name().unwrap();
301 let (dir2, file2_path) = if self.dir2.is_file() {
302 (self.dir2.parent().unwrap(), self.dir2.clone())
303 } else {
304 (self.dir2.as_path(), self.dir2.join(file1_name))
305 };
306 let file1 = FileItem::try_from(file1_path.as_path())?;
307 let file2 = FileItem::try_from(file2_path.as_path())?;
308 let mut comparer = FileComparer::new(&file1, &file2);
309 comparer.buffer_size = self.buffer_size;
310 let mut hashers = self.get_hashers(dir1, dir2)?;
311 if let Some((h1, h2)) = &mut hashers {
312 if self.comparison_method == FileComparisonMethod::Rehash {
313 h1.remove_cache_entry(file1_path)?;
314 h2.remove_cache_entry(&file2_path)?;
315 }
316 comparer.hashers = Some((h1, h2));
317 }
318 let mut result = FileComparisonResult::new(PathBuf::new(), Classification::InBoth);
319 let should_compare_content = self.comparison_method != FileComparisonMethod::Size;
320 result.update(&comparer, should_compare_content)?;
321 let file1_str = file1_path.to_str().unwrap_or("file1");
322 match self.output_format {
323 OutputFormat::Symbol => {
324 println!("{} {}", result.to_symbol_string(), file1_str);
325 }
326 OutputFormat::Default => {
327 let file2_str = file2_path.to_str().unwrap_or("file2");
328 println!("{}: {}", file1_str, result.to_string(file1_str, file2_str));
329 }
330 _ => unreachable!(),
331 }
332 Self::save_hashers(hashers)?;
333 Ok(())
334 }
335}
336
337#[derive(Default)]
338struct ComparisonSummary {
339 pub in_both: usize,
340 pub only_in_dir1: usize,
341 pub only_in_dir2: usize,
342 pub dir1_newer: usize,
343 pub dir2_newer: usize,
344 pub dir1_larger: usize,
345 pub dir2_larger: usize,
346 pub diff_content: usize,
347 pub not_comparable: usize,
348 pub num_errors: usize,
349}
350
351impl ComparisonSummary {
352 pub fn update(&mut self, result: &FileComparisonResult) {
353 match result.classification {
354 Classification::OnlyInDir1 => self.only_in_dir1 += 1,
355 Classification::OnlyInDir2 => self.only_in_dir2 += 1,
356 Classification::InBoth => {
357 self.in_both += 1;
358 let mut is_not_comparable = false;
359 match result.modified_time_comparison {
360 Some(Ordering::Greater) => self.dir1_newer += 1,
361 Some(Ordering::Less) => self.dir2_newer += 1,
362 Some(Ordering::Equal) => {}
363 None => is_not_comparable = true,
364 }
365 match result.size_comparison {
366 Some(Ordering::Greater) => self.dir1_larger += 1,
367 Some(Ordering::Less) => self.dir2_larger += 1,
368 Some(Ordering::Equal) => match result.is_content_same {
369 Some(false) => self.diff_content += 1,
370 Some(true) => {}
371 None => is_not_comparable = true,
372 },
373 None => is_not_comparable = true,
374 }
375 if is_not_comparable {
376 self.not_comparable += 1;
377 }
378 }
379 }
380 }
381
382 pub fn print(
383 &self,
384 mut writer: impl std::io::Write,
385 start_time: &time::Instant,
386 dir1_name: &str,
387 dir2_name: &str,
388 ) -> std::io::Result<()> {
389 let values = [
390 ("Elapsed:", 0),
391 ("Files in both:", self.in_both),
392 ("Only in left:", self.only_in_dir1),
393 ("Only in right:", self.only_in_dir2),
394 ("Left is newer:", self.dir1_newer),
395 ("Right is newer:", self.dir2_newer),
396 ("Left is larger:", self.dir1_larger),
397 ("Right is larger:", self.dir2_larger),
398 ("Different content:", self.diff_content),
399 ("Not comparable:", self.not_comparable),
400 ("Errors:", self.num_errors),
401 ];
402 let formatter = ColumnFormatter::new(values.iter().map(|(s, _)| *s));
403 formatter.write_value(&mut writer, "Left:", dir1_name)?;
404 formatter.write_value(&mut writer, "Right:", dir2_name)?;
405 formatter.write_value(
406 &mut writer,
407 values[0].0,
408 FormattedDuration(start_time.elapsed()),
409 )?;
410 formatter.write_values(&mut writer, &values[1..])?;
411 Ok(())
412 }
413}
414
415#[cfg(test)]
416mod tests {
417 use super::*;
418 use std::fs;
419 use std::io::Write;
420
421 #[test]
422 fn comparison_summary() {
423 let mut summary = ComparisonSummary::default();
424 let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
425 let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
426 let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
427 res3.modified_time_comparison = Some(Ordering::Greater);
428
429 summary.update(&res1);
430 summary.update(&res2);
431 summary.update(&res3);
432
433 assert_eq!(summary.only_in_dir1, 1);
434 assert_eq!(summary.only_in_dir2, 1);
435 assert_eq!(summary.in_both, 1);
436 assert_eq!(summary.dir1_newer, 1);
437 }
438
439 #[test]
440 fn directory_comparer_integration() -> anyhow::Result<()> {
441 let dir1 = tempfile::tempdir()?;
442 let dir2 = tempfile::tempdir()?;
443
444 let file1_path = dir1.path().join("same.txt");
446 fs::write(file1_path, b"same content")?;
447
448 let only1_path = dir1.path().join("only1.txt");
449 fs::write(only1_path, b"only in dir1")?;
450
451 let file2_path = dir2.path().join("same.txt");
453 fs::write(file2_path, b"same content")?;
454
455 let only2_path = dir2.path().join("only2.txt");
456 fs::write(only2_path, b"only in dir2")?;
457
458 let diff1_path = dir1.path().join("diff.txt");
460 fs::write(diff1_path, b"content 1")?;
461 let diff2_path = dir2.path().join("diff.txt");
462 fs::write(diff2_path, b"content 222")?; let diffc1_path = dir1.path().join("diffc.txt");
466 fs::write(diffc1_path, b"content 111")?;
467 let diffc2_path = dir2.path().join("diffc.txt");
468 fs::write(diffc2_path, b"content 222")?; let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
471 let (tx, rx) = mpsc::channel();
472 comparer.compare_streaming_ordered(tx)?;
473 let mut results = Vec::new();
474 while let Ok(res) = rx.recv() {
475 if let CompareProgress::Result(_, r) = res {
476 results.push(r);
477 }
478 }
479 results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
480 assert_eq!(results.len(), 5);
481
482 let diff_result = &results[0];
484 assert_eq!(diff_result.relative_path.to_str().unwrap(), "diff.txt");
485 assert_eq!(diff_result.classification, Classification::InBoth);
486 assert_eq!(diff_result.size_comparison, Some(Ordering::Less));
487 assert_eq!(diff_result.is_content_same, None);
488
489 let diffc_result = &results[1];
491 assert_eq!(diffc_result.relative_path.to_str().unwrap(), "diffc.txt");
492 assert_eq!(diffc_result.classification, Classification::InBoth);
493 assert_eq!(diffc_result.size_comparison, Some(Ordering::Equal));
494 assert_eq!(diffc_result.is_content_same, Some(false));
495
496 let only1_result = &results[2];
498 assert_eq!(only1_result.relative_path.to_str().unwrap(), "only1.txt");
499 assert_eq!(only1_result.classification, Classification::OnlyInDir1);
500
501 let only2_result = &results[3];
503 assert_eq!(only2_result.relative_path.to_str().unwrap(), "only2.txt");
504 assert_eq!(only2_result.classification, Classification::OnlyInDir2);
505
506 let same_result = &results[4];
508 assert_eq!(same_result.relative_path.to_str().unwrap(), "same.txt");
509 assert_eq!(same_result.classification, Classification::InBoth);
510 assert_eq!(same_result.size_comparison, Some(Ordering::Equal));
511
512 Ok(())
513 }
514
515 #[test]
516 fn directory_comparer_size_mode() -> anyhow::Result<()> {
517 let dir1 = tempfile::tempdir()?;
518 let dir2 = tempfile::tempdir()?;
519
520 let file1_path = dir1.path().join("file.txt");
521 let mut file1 = fs::File::create(&file1_path)?;
522 file1.write_all(b"content 1")?;
523
524 let file2_path = dir2.path().join("file.txt");
525 let mut file2 = fs::File::create(&file2_path)?;
526 file2.write_all(b"content 2")?; let mut comparer =
529 DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
530 comparer.comparison_method = FileComparisonMethod::Size;
531 let (tx, rx) = mpsc::channel();
532
533 comparer.compare_streaming_ordered(tx)?;
534
535 let mut results = Vec::new();
536 while let Ok(res) = rx.recv() {
537 if let CompareProgress::Result(_, r) = res {
538 results.push(r);
539 }
540 }
541
542 assert_eq!(results.len(), 1);
543 assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
544 assert_eq!(results[0].classification, Classification::InBoth);
545 assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
546 assert_eq!(results[0].is_content_same, None);
547
548 Ok(())
549 }
550}