1use crate::{
2 Classification, ColumnFormatter, FileComparer, FileComparisonResult, FileHasher, FileIterator,
3 OutputFormat, Progress, ProgressBuilder,
4};
5use globset::GlobSet;
6use indicatif::FormattedDuration;
7use std::{
8 cmp::Ordering,
9 io::{self, stdout},
10 path::{Path, PathBuf},
11 sync::{Arc, mpsc},
12 time,
13};
14
15#[derive(Debug, Clone)]
16enum CompareProgress {
17 StartOfComparison,
18 FileDone,
19 TotalFiles(usize),
20 Result(usize, FileComparisonResult),
21 Error,
22}
23
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub enum FileComparisonMethod {
27 Size,
29 Hash,
31 Rehash,
33 Full,
35}
36
37pub struct DirectoryComparer {
39 dir1: PathBuf,
40 dir2: PathBuf,
41 pub output_format: OutputFormat,
42 pub buffer_size: usize,
43 pub comparison_method: FileComparisonMethod,
44 pub exclude: Option<GlobSet>,
45 pub progress: Option<Arc<ProgressBuilder>>,
46 pub jobs: usize,
47}
48
49impl DirectoryComparer {
50 pub const DEFAULT_JOBS: usize = 8;
51
52 pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
54 Self {
55 dir1,
56 dir2,
57 output_format: OutputFormat::Default,
58 buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
59 comparison_method: FileComparisonMethod::Hash,
60 exclude: None,
61 progress: None,
62 jobs: Self::DEFAULT_JOBS,
63 }
64 }
65
66 pub fn run(&self) -> anyhow::Result<()> {
69 match self.output_format {
70 OutputFormat::Default | OutputFormat::Symbol => {}
71 _ => anyhow::bail!("Compare mode only supports default or symbol output format."),
72 }
73 if self.dir1.is_file() {
74 return self.run_file_comparer();
75 }
76
77 let progress = self
78 .progress
79 .as_ref()
80 .map(|progress| progress.add_spinner())
81 .unwrap_or_else(Progress::none);
82 progress.set_message("Scanning directories...");
83 let start_time = std::time::Instant::now();
84 let mut summary = ComparisonSummary::default();
85 let dir1_str = self.dir1.to_str().unwrap_or("dir1");
86 let dir2_str = self.dir2.to_str().unwrap_or("dir2");
87 let (tx, rx) = mpsc::channel();
88 std::thread::scope(|scope| {
89 scope.spawn(move || {
90 if let Err(e) = self.compare_streaming_ordered(tx) {
91 log::error!("Error during comparison: {}", e);
92 }
93 });
94
95 while let Ok(event) = rx.recv() {
97 match event {
98 CompareProgress::StartOfComparison => {
99 progress.set_message("Comparing files...");
100 }
101 CompareProgress::TotalFiles(total_files) => {
102 progress.set_length(total_files as u64);
103 progress.set_message("");
104 }
105 CompareProgress::Result(_, result) => {
106 summary.update(&result);
107 match self.output_format {
108 OutputFormat::Symbol => progress.suspend_for(stdout(), || {
109 println!(
110 "{} {}",
111 result.to_symbol_string(),
112 result.relative_path.display()
113 );
114 }),
115 OutputFormat::Default => {
116 if !result.is_identical() {
117 progress.suspend_for(stdout(), || {
118 println!(
119 "{}: {}",
120 result.relative_path.display(),
121 result.to_string(dir1_str, dir2_str)
122 );
123 });
124 }
125 }
126 OutputFormat::Yaml => unreachable!(),
127 }
128 }
129 CompareProgress::FileDone => progress.inc(1),
130 CompareProgress::Error => summary.num_errors += 1,
131 }
132 }
133 });
134 progress.finish();
135 eprintln!("\n--- Comparison Summary ---");
136 summary.print(&mut io::stderr(), &start_time, dir1_str, dir2_str)?;
137 Ok(())
138 }
139
140 fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
145 crate::sort_stream(
146 tx,
147 |tx_unordered| self.compare_streaming(tx_unordered),
148 |event| match event {
149 CompareProgress::Result(i, _) => Some(*i),
150 _ => None,
151 },
152 )
153 }
154
155 fn compare_streaming(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
156 let mut it1 = FileIterator::new(&self.dir1);
157 let mut it2 = FileIterator::new(&self.dir2);
158 it1.exclude = self.exclude.as_ref();
159 it2.exclude = self.exclude.as_ref();
160 let mut hashers = self.get_hashers(&self.dir1, &self.dir2)?;
161 if let Some((h1, h2)) = &mut hashers {
162 it1.cache = Some(h1.cache()?);
163 it2.cache = Some(h2.cache()?);
164 if self.comparison_method == FileComparisonMethod::Rehash {
165 h1.clear_cache()?;
166 h2.clear_cache()?;
167 }
168 }
169 let hashers_ref = hashers.as_ref();
170 std::thread::scope(|global_scope| {
171 let it1_rx = it1.spawn_in_scope(global_scope);
172 let it2_rx = it2.spawn_in_scope(global_scope);
173 let pool = crate::build_thread_pool(self.jobs)?;
174 pool.scope(move |scope| {
175 let mut cur1 = it1_rx.recv().ok();
176 let mut cur2 = it2_rx.recv().ok();
177 let mut index = 0;
178 tx.send(CompareProgress::StartOfComparison)?;
179 loop {
180 let cmp = match (&cur1, &cur2) {
181 (Some(p1), Some(p2)) => {
182 let rel1 = crate::strip_prefix(p1, &self.dir1).unwrap();
183 let rel2 = crate::strip_prefix(p2, &self.dir2).unwrap();
184 rel1.cmp(rel2)
185 }
186 (Some(_), None) => Ordering::Less,
187 (None, Some(_)) => Ordering::Greater,
188 (None, None) => break,
189 };
190 match cmp {
191 Ordering::Less => {
192 let path1 = cur1.take().unwrap();
193 let rel1 = crate::strip_prefix(&path1, &self.dir1).unwrap();
194 let result =
195 FileComparisonResult::new(rel1.into(), Classification::OnlyInDir1);
196 tx.send(CompareProgress::Result(index, result))?;
197 tx.send(CompareProgress::FileDone)?;
198 index += 1;
199 cur1 = it1_rx.recv().ok();
200 }
201 Ordering::Greater => {
202 let path2 = cur2.take().unwrap();
203 let rel2 = crate::strip_prefix(&path2, &self.dir2).unwrap();
204 let result =
205 FileComparisonResult::new(rel2.into(), Classification::OnlyInDir2);
206 tx.send(CompareProgress::Result(index, result))?;
207 tx.send(CompareProgress::FileDone)?;
208 index += 1;
209 cur2 = it2_rx.recv().ok();
210 }
211 Ordering::Equal => {
212 let path1 = cur1.take().unwrap();
213 let path2 = cur2.take().unwrap();
214 let buffer_size = self.buffer_size;
215 let tx_clone = tx.clone();
216 let i = index;
217 let should_compare =
218 self.comparison_method != FileComparisonMethod::Size;
219 scope.spawn(move |_| {
220 let mut comparer = FileComparer::new(&path1, &path2);
221 comparer.buffer_size = buffer_size;
222 if let Some((h1, h2)) = hashers_ref {
223 comparer.hashers = Some((h1, h2));
224 }
225 let rel_path = crate::strip_prefix(&path1, &self.dir1).unwrap();
226 let mut result = FileComparisonResult::new(
227 rel_path.into(),
228 Classification::InBoth,
229 );
230 let event = match result.update(&comparer, should_compare) {
231 Ok(_) => CompareProgress::Result(i, result),
232 Err(error) => {
233 log::error!(
234 "Error comparing {:?}: {}",
235 result.relative_path,
236 error
237 );
238 CompareProgress::Error
239 }
240 };
241 if tx_clone.send(event).is_err()
242 || tx_clone.send(CompareProgress::FileDone).is_err()
243 {
244 log::error!("Send failed");
245 }
246 });
247 index += 1;
248 cur1 = it1_rx.recv().ok();
249 cur2 = it2_rx.recv().ok();
250 }
251 }
252 }
253 tx.send(CompareProgress::TotalFiles(index))
254 })?;
255 Ok::<(), anyhow::Error>(())
256 })?;
257
258 Self::save_hashers(hashers)?;
259 Ok(())
260 }
261
262 fn get_hashers(
263 &self,
264 dir1: &Path,
265 dir2: &Path,
266 ) -> anyhow::Result<Option<(FileHasher, FileHasher)>> {
267 if self.comparison_method == FileComparisonMethod::Hash
268 || self.comparison_method == FileComparisonMethod::Rehash
269 {
270 let (h1_res, h2_res) = rayon::join(
271 || FileHasher::new_with_cache(&[dir1]),
272 || FileHasher::new_with_cache(&[dir2]),
273 );
274 let mut h1 = h1_res?;
275 let mut h2 = h2_res?;
276 h1.buffer_size = self.buffer_size;
277 h2.buffer_size = self.buffer_size;
278 if let Some(progress) = self.progress.as_ref() {
279 h1.progress = Some(Arc::clone(progress));
280 h2.progress = Some(Arc::clone(progress));
281 }
282 return Ok(Some((h1, h2)));
283 }
284 Ok(None)
285 }
286
287 fn save_hashers(hashers: Option<(FileHasher, FileHasher)>) -> anyhow::Result<()> {
288 if let Some((h1, h2)) = hashers {
289 let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
290 r1?;
291 r2?;
292 }
293 Ok(())
294 }
295
296 fn run_file_comparer(&self) -> anyhow::Result<()> {
297 assert!(self.dir1.is_file());
298 let file1 = &self.dir1;
299 let dir1 = file1.parent().unwrap();
300 let file1_name = file1.file_name().unwrap();
301 let (dir2, file2) = if self.dir2.is_file() {
302 (self.dir2.parent().unwrap(), self.dir2.clone())
303 } else {
304 (self.dir2.as_path(), self.dir2.join(file1_name))
305 };
306
307 let mut comparer = FileComparer::new(file1, &file2);
308 comparer.buffer_size = self.buffer_size;
309 let mut hashers = self.get_hashers(dir1, dir2)?;
310 if let Some((h1, h2)) = &mut hashers {
311 if self.comparison_method == FileComparisonMethod::Rehash {
312 h1.remove_cache_entry(file1)?;
313 h2.remove_cache_entry(&file2)?;
314 }
315 comparer.hashers = Some((h1, h2));
316 }
317 let mut result = FileComparisonResult::new(PathBuf::new(), Classification::InBoth);
318 let should_compare_content = self.comparison_method != FileComparisonMethod::Size;
319 result.update(&comparer, should_compare_content)?;
320 let file1_str = file1.to_str().unwrap_or("file1");
321 match self.output_format {
322 OutputFormat::Symbol => {
323 println!("{} {}", result.to_symbol_string(), file1_str);
324 }
325 OutputFormat::Default => {
326 let file2_str = file2.to_str().unwrap_or("file2");
327 println!("{}: {}", file1_str, result.to_string(file1_str, file2_str));
328 }
329 OutputFormat::Yaml => unreachable!(),
330 }
331 Self::save_hashers(hashers)?;
332 Ok(())
333 }
334}
335
336#[derive(Default)]
337struct ComparisonSummary {
338 pub in_both: usize,
339 pub only_in_dir1: usize,
340 pub only_in_dir2: usize,
341 pub dir1_newer: usize,
342 pub dir2_newer: usize,
343 pub dir1_larger: usize,
344 pub dir2_larger: usize,
345 pub diff_content: usize,
346 pub not_comparable: usize,
347 pub num_errors: usize,
348}
349
350impl ComparisonSummary {
351 pub fn update(&mut self, result: &FileComparisonResult) {
352 match result.classification {
353 Classification::OnlyInDir1 => self.only_in_dir1 += 1,
354 Classification::OnlyInDir2 => self.only_in_dir2 += 1,
355 Classification::InBoth => {
356 self.in_both += 1;
357 let mut is_not_comparable = false;
358 match result.modified_time_comparison {
359 Some(Ordering::Greater) => self.dir1_newer += 1,
360 Some(Ordering::Less) => self.dir2_newer += 1,
361 Some(Ordering::Equal) => {}
362 None => is_not_comparable = true,
363 }
364 match result.size_comparison {
365 Some(Ordering::Greater) => self.dir1_larger += 1,
366 Some(Ordering::Less) => self.dir2_larger += 1,
367 Some(Ordering::Equal) => match result.is_content_same {
368 Some(false) => self.diff_content += 1,
369 Some(true) => {}
370 None => is_not_comparable = true,
371 },
372 None => is_not_comparable = true,
373 }
374 if is_not_comparable {
375 self.not_comparable += 1;
376 }
377 }
378 }
379 }
380
381 pub fn print(
382 &self,
383 mut writer: impl std::io::Write,
384 start_time: &time::Instant,
385 dir1_name: &str,
386 dir2_name: &str,
387 ) -> std::io::Result<()> {
388 let values = [
389 ("Elapsed:", 0),
390 ("Files in both:", self.in_both),
391 ("Only in left:", self.only_in_dir1),
392 ("Only in right:", self.only_in_dir2),
393 ("Left is newer:", self.dir1_newer),
394 ("Right is newer:", self.dir2_newer),
395 ("Left is larger:", self.dir1_larger),
396 ("Right is larger:", self.dir2_larger),
397 ("Different content:", self.diff_content),
398 ("Not comparable:", self.not_comparable),
399 ("Errors:", self.num_errors),
400 ];
401 let formatter = ColumnFormatter::new(values.iter().map(|(s, _)| *s));
402 formatter.write_value(&mut writer, "Left:", dir1_name)?;
403 formatter.write_value(&mut writer, "Right:", dir2_name)?;
404 formatter.write_value(
405 &mut writer,
406 values[0].0,
407 FormattedDuration(start_time.elapsed()),
408 )?;
409 formatter.write_values(&mut writer, &values[1..])?;
410 Ok(())
411 }
412}
413
414#[cfg(test)]
415mod tests {
416 use super::*;
417 use std::fs;
418 use std::io::Write;
419
420 #[test]
421 fn comparison_summary() {
422 let mut summary = ComparisonSummary::default();
423 let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
424 let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
425 let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
426 res3.modified_time_comparison = Some(Ordering::Greater);
427
428 summary.update(&res1);
429 summary.update(&res2);
430 summary.update(&res3);
431
432 assert_eq!(summary.only_in_dir1, 1);
433 assert_eq!(summary.only_in_dir2, 1);
434 assert_eq!(summary.in_both, 1);
435 assert_eq!(summary.dir1_newer, 1);
436 }
437
438 #[test]
439 fn directory_comparer_integration() -> anyhow::Result<()> {
440 let dir1 = tempfile::tempdir()?;
441 let dir2 = tempfile::tempdir()?;
442
443 let file1_path = dir1.path().join("same.txt");
445 let mut file1 = fs::File::create(&file1_path)?;
446 file1.write_all(b"same content")?;
447
448 let only1_path = dir1.path().join("only1.txt");
449 let mut only1 = fs::File::create(&only1_path)?;
450 only1.write_all(b"only in dir1")?;
451
452 let file2_path = dir2.path().join("same.txt");
454 let mut file2 = fs::File::create(&file2_path)?;
455 file2.write_all(b"same content")?;
456
457 let only2_path = dir2.path().join("only2.txt");
458 let mut only2 = fs::File::create(&only2_path)?;
459 only2.write_all(b"only in dir2")?;
460
461 let diff1_path = dir1.path().join("diff.txt");
463 let mut diff1 = fs::File::create(&diff1_path)?;
464 diff1.write_all(b"content 1")?;
465
466 let diff2_path = dir2.path().join("diff.txt");
467 let mut diff2 = fs::File::create(&diff2_path)?;
468 diff2.write_all(b"content 222")?; let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
471 let (tx, rx) = mpsc::channel();
472
473 comparer.compare_streaming_ordered(tx)?;
474
475 let mut results = Vec::new();
476 while let Ok(res) = rx.recv() {
477 if let CompareProgress::Result(_, r) = res {
478 results.push(r);
479 }
480 }
481
482 results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
483
484 assert_eq!(results.len(), 4);
485
486 assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
488 assert_eq!(results[0].classification, Classification::InBoth);
489 assert!(
490 results[0].is_content_same == Some(false)
491 || results[0].size_comparison != Some(Ordering::Equal)
492 );
493
494 assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
496 assert_eq!(results[1].classification, Classification::OnlyInDir1);
497
498 assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
500 assert_eq!(results[2].classification, Classification::OnlyInDir2);
501
502 assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
504 assert_eq!(results[3].classification, Classification::InBoth);
505 assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
506
507 Ok(())
508 }
509
510 #[test]
511 fn directory_comparer_size_mode() -> anyhow::Result<()> {
512 let dir1 = tempfile::tempdir()?;
513 let dir2 = tempfile::tempdir()?;
514
515 let file1_path = dir1.path().join("file.txt");
516 let mut file1 = fs::File::create(&file1_path)?;
517 file1.write_all(b"content 1")?;
518
519 let file2_path = dir2.path().join("file.txt");
520 let mut file2 = fs::File::create(&file2_path)?;
521 file2.write_all(b"content 2")?; let mut comparer =
524 DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
525 comparer.comparison_method = FileComparisonMethod::Size;
526 let (tx, rx) = mpsc::channel();
527
528 comparer.compare_streaming_ordered(tx)?;
529
530 let mut results = Vec::new();
531 while let Ok(res) = rx.recv() {
532 if let CompareProgress::Result(_, r) = res {
533 results.push(r);
534 }
535 }
536
537 assert_eq!(results.len(), 1);
538 assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
539 assert_eq!(results[0].classification, Classification::InBoth);
540 assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
541 assert_eq!(results[0].is_content_same, None);
542
543 Ok(())
544 }
545}