1use crate::{
2 Classification, ColumnFormatter, FileComparer, FileComparisonResult, FileHasher, FileIterator,
3 OutputFormat, Progress, ProgressBuilder,
4};
5use globset::GlobSet;
6use indicatif::FormattedDuration;
7use simple_path::SimplePath;
8use std::{
9 cmp::Ordering,
10 io::{self, stdout},
11 path::{Path, PathBuf},
12 sync::{Arc, mpsc},
13 time,
14};
15
16#[derive(Debug, Clone)]
17enum CompareProgress {
18 StartOfComparison,
19 FileDone,
20 TotalFiles(usize),
21 Result(usize, FileComparisonResult),
22 Error,
23}
24
25#[derive(Debug, Clone, Copy, PartialEq, Eq)]
27pub enum FileComparisonMethod {
28 Size,
30 Hash,
32 Rehash,
34 Full,
36}
37
38pub struct DirectoryComparer {
40 dir1: PathBuf,
41 dir2: PathBuf,
42 pub output_format: OutputFormat,
43 pub buffer_size: usize,
44 pub comparison_method: FileComparisonMethod,
45 pub exclude: Option<GlobSet>,
46 pub progress: Option<Arc<ProgressBuilder>>,
47 pub jobs: usize,
48}
49
50impl DirectoryComparer {
51 pub const DEFAULT_JOBS: usize = 8;
52
53 pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
55 Self {
56 dir1,
57 dir2,
58 output_format: OutputFormat::Default,
59 buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
60 comparison_method: FileComparisonMethod::Hash,
61 exclude: None,
62 progress: None,
63 jobs: Self::DEFAULT_JOBS,
64 }
65 }
66
67 pub fn run(&self) -> anyhow::Result<()> {
70 match self.output_format {
71 OutputFormat::Default | OutputFormat::Symbol => {}
72 _ => anyhow::bail!("Compare mode only supports default or symbol output format."),
73 }
74 if self.dir1.is_file() {
75 return self.run_file_comparer();
76 }
77
78 let progress = self
79 .progress
80 .as_ref()
81 .map(|progress| progress.add_spinner())
82 .unwrap_or_else(Progress::none);
83 progress.set_message("Scanning directories...");
84 let start_time = std::time::Instant::now();
85 let mut summary = ComparisonSummary::default();
86 let dir1_str = self.dir1.to_str().unwrap_or("dir1");
87 let dir2_str = self.dir2.to_str().unwrap_or("dir2");
88 let (tx, rx) = mpsc::channel();
89 std::thread::scope(|scope| {
90 scope.spawn(move || {
91 if let Err(e) = self.compare_streaming_ordered(tx) {
92 log::error!("Error during comparison: {}", e);
93 }
94 });
95
96 while let Ok(event) = rx.recv() {
98 match event {
99 CompareProgress::StartOfComparison => {
100 progress.set_message("Comparing files...");
101 }
102 CompareProgress::TotalFiles(total_files) => {
103 progress.set_length(total_files as u64);
104 progress.set_message("");
105 }
106 CompareProgress::Result(_, result) => {
107 summary.update(&result);
108 match self.output_format {
109 OutputFormat::Symbol => progress.suspend_for(stdout(), || {
110 println!(
111 "{} {}",
112 result.to_symbol_string(),
113 result.relative_path.display()
114 );
115 }),
116 OutputFormat::Default => {
117 if !result.is_identical() {
118 progress.suspend_for(stdout(), || {
119 println!(
120 "{}: {}",
121 result.relative_path.display(),
122 result.to_string(dir1_str, dir2_str)
123 );
124 });
125 }
126 }
127 OutputFormat::Yaml => unreachable!(),
128 }
129 }
130 CompareProgress::FileDone => progress.inc(1),
131 CompareProgress::Error => summary.num_errors += 1,
132 }
133 }
134 });
135 progress.finish();
136 eprintln!("\n--- Comparison Summary ---");
137 summary.print(&mut io::stderr(), &start_time, dir1_str, dir2_str)?;
138 Ok(())
139 }
140
141 fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
146 crate::sort_stream(
147 tx,
148 |tx_unordered| self.compare_streaming(tx_unordered),
149 |event| match event {
150 CompareProgress::Result(i, _) => Some(*i),
151 _ => None,
152 },
153 )
154 }
155
156 fn compare_streaming(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
157 let mut it1 = FileIterator::new(&self.dir1);
158 let mut it2 = FileIterator::new(&self.dir2);
159 it1.exclude = self.exclude.as_ref();
160 it2.exclude = self.exclude.as_ref();
161 let mut hashers = self.get_hashers(&self.dir1, &self.dir2)?;
162 if let Some((h1, h2)) = &mut hashers {
163 it1.cache = Some(h1.cache()?);
164 it2.cache = Some(h2.cache()?);
165 if self.comparison_method == FileComparisonMethod::Rehash {
166 h1.clear_cache()?;
167 h2.clear_cache()?;
168 }
169 }
170 let hashers_ref = hashers.as_ref();
171 std::thread::scope(|global_scope| {
172 let it1_rx = it1.spawn_in_scope(global_scope);
173 let it2_rx = it2.spawn_in_scope(global_scope);
174 let pool = crate::build_thread_pool(self.jobs)?;
175 pool.scope(move |scope| {
176 let mut cur1 = it1_rx.recv().ok();
177 let mut cur2 = it2_rx.recv().ok();
178 let mut index = 0;
179 tx.send(CompareProgress::StartOfComparison)?;
180 loop {
181 let cmp = match (&cur1, &cur2) {
182 (Some(p1), Some(p2)) => {
183 let rel1 = SimplePath::strip_prefix(p1, &self.dir1).unwrap();
184 let rel2 = SimplePath::strip_prefix(p2, &self.dir2).unwrap();
185 rel1.cmp(rel2)
186 }
187 (Some(_), None) => Ordering::Less,
188 (None, Some(_)) => Ordering::Greater,
189 (None, None) => break,
190 };
191 match cmp {
192 Ordering::Less => {
193 let path1 = cur1.take().unwrap();
194 let rel1 = SimplePath::strip_prefix(&path1, &self.dir1).unwrap();
195 let result =
196 FileComparisonResult::new(rel1.into(), Classification::OnlyInDir1);
197 tx.send(CompareProgress::Result(index, result))?;
198 tx.send(CompareProgress::FileDone)?;
199 index += 1;
200 cur1 = it1_rx.recv().ok();
201 }
202 Ordering::Greater => {
203 let path2 = cur2.take().unwrap();
204 let rel2 = SimplePath::strip_prefix(&path2, &self.dir2).unwrap();
205 let result =
206 FileComparisonResult::new(rel2.into(), Classification::OnlyInDir2);
207 tx.send(CompareProgress::Result(index, result))?;
208 tx.send(CompareProgress::FileDone)?;
209 index += 1;
210 cur2 = it2_rx.recv().ok();
211 }
212 Ordering::Equal => {
213 let path1 = cur1.take().unwrap();
214 let path2 = cur2.take().unwrap();
215 let buffer_size = self.buffer_size;
216 let tx_clone = tx.clone();
217 let i = index;
218 let should_compare =
219 self.comparison_method != FileComparisonMethod::Size;
220 scope.spawn(move |_| {
221 let mut comparer = FileComparer::new(&path1, &path2);
222 comparer.buffer_size = buffer_size;
223 if let Some((h1, h2)) = hashers_ref {
224 comparer.hashers = Some((h1, h2));
225 }
226 let rel_path =
227 SimplePath::strip_prefix(&path1, &self.dir1).unwrap();
228 let mut result = FileComparisonResult::new(
229 rel_path.into(),
230 Classification::InBoth,
231 );
232 let event = match result.update(&comparer, should_compare) {
233 Ok(_) => CompareProgress::Result(i, result),
234 Err(error) => {
235 log::error!(
236 "Error comparing {:?}: {}",
237 result.relative_path,
238 error
239 );
240 CompareProgress::Error
241 }
242 };
243 if tx_clone.send(event).is_err()
244 || tx_clone.send(CompareProgress::FileDone).is_err()
245 {
246 log::error!("Send failed");
247 }
248 });
249 index += 1;
250 cur1 = it1_rx.recv().ok();
251 cur2 = it2_rx.recv().ok();
252 }
253 }
254 }
255 tx.send(CompareProgress::TotalFiles(index))
256 })?;
257 Ok::<(), anyhow::Error>(())
258 })?;
259
260 Self::save_hashers(hashers)?;
261 Ok(())
262 }
263
264 fn get_hashers(
265 &self,
266 dir1: &Path,
267 dir2: &Path,
268 ) -> anyhow::Result<Option<(FileHasher, FileHasher)>> {
269 if self.comparison_method == FileComparisonMethod::Hash
270 || self.comparison_method == FileComparisonMethod::Rehash
271 {
272 let (h1_res, h2_res) = rayon::join(
273 || FileHasher::new_with_cache(&[dir1]),
274 || FileHasher::new_with_cache(&[dir2]),
275 );
276 let mut h1 = h1_res?;
277 let mut h2 = h2_res?;
278 h1.buffer_size = self.buffer_size;
279 h2.buffer_size = self.buffer_size;
280 if let Some(progress) = self.progress.as_ref() {
281 h1.progress = Some(Arc::clone(progress));
282 h2.progress = Some(Arc::clone(progress));
283 }
284 return Ok(Some((h1, h2)));
285 }
286 Ok(None)
287 }
288
289 fn save_hashers(hashers: Option<(FileHasher, FileHasher)>) -> anyhow::Result<()> {
290 if let Some((h1, h2)) = hashers {
291 let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
292 r1?;
293 r2?;
294 }
295 Ok(())
296 }
297
298 fn run_file_comparer(&self) -> anyhow::Result<()> {
299 assert!(self.dir1.is_file());
300 let file1 = &self.dir1;
301 let dir1 = file1.parent().unwrap();
302 let file1_name = file1.file_name().unwrap();
303 let (dir2, file2) = if self.dir2.is_file() {
304 (self.dir2.parent().unwrap(), self.dir2.clone())
305 } else {
306 (self.dir2.as_path(), self.dir2.join(file1_name))
307 };
308
309 let mut comparer = FileComparer::new(file1, &file2);
310 comparer.buffer_size = self.buffer_size;
311 let mut hashers = self.get_hashers(dir1, dir2)?;
312 if let Some((h1, h2)) = &mut hashers {
313 if self.comparison_method == FileComparisonMethod::Rehash {
314 h1.remove_cache_entry(file1)?;
315 h2.remove_cache_entry(&file2)?;
316 }
317 comparer.hashers = Some((h1, h2));
318 }
319 let mut result = FileComparisonResult::new(PathBuf::new(), Classification::InBoth);
320 let should_compare_content = self.comparison_method != FileComparisonMethod::Size;
321 result.update(&comparer, should_compare_content)?;
322 let file1_str = file1.to_str().unwrap_or("file1");
323 match self.output_format {
324 OutputFormat::Symbol => {
325 println!("{} {}", result.to_symbol_string(), file1_str);
326 }
327 OutputFormat::Default => {
328 let file2_str = file2.to_str().unwrap_or("file2");
329 println!("{}: {}", file1_str, result.to_string(file1_str, file2_str));
330 }
331 OutputFormat::Yaml => unreachable!(),
332 }
333 Self::save_hashers(hashers)?;
334 Ok(())
335 }
336}
337
338#[derive(Default)]
339struct ComparisonSummary {
340 pub in_both: usize,
341 pub only_in_dir1: usize,
342 pub only_in_dir2: usize,
343 pub dir1_newer: usize,
344 pub dir2_newer: usize,
345 pub dir1_larger: usize,
346 pub dir2_larger: usize,
347 pub diff_content: usize,
348 pub not_comparable: usize,
349 pub num_errors: usize,
350}
351
352impl ComparisonSummary {
353 pub fn update(&mut self, result: &FileComparisonResult) {
354 match result.classification {
355 Classification::OnlyInDir1 => self.only_in_dir1 += 1,
356 Classification::OnlyInDir2 => self.only_in_dir2 += 1,
357 Classification::InBoth => {
358 self.in_both += 1;
359 let mut is_not_comparable = false;
360 match result.modified_time_comparison {
361 Some(Ordering::Greater) => self.dir1_newer += 1,
362 Some(Ordering::Less) => self.dir2_newer += 1,
363 Some(Ordering::Equal) => {}
364 None => is_not_comparable = true,
365 }
366 match result.size_comparison {
367 Some(Ordering::Greater) => self.dir1_larger += 1,
368 Some(Ordering::Less) => self.dir2_larger += 1,
369 Some(Ordering::Equal) => match result.is_content_same {
370 Some(false) => self.diff_content += 1,
371 Some(true) => {}
372 None => is_not_comparable = true,
373 },
374 None => is_not_comparable = true,
375 }
376 if is_not_comparable {
377 self.not_comparable += 1;
378 }
379 }
380 }
381 }
382
383 pub fn print(
384 &self,
385 mut writer: impl std::io::Write,
386 start_time: &time::Instant,
387 dir1_name: &str,
388 dir2_name: &str,
389 ) -> std::io::Result<()> {
390 let values = [
391 ("Elapsed:", 0),
392 ("Files in both:", self.in_both),
393 ("Only in left:", self.only_in_dir1),
394 ("Only in right:", self.only_in_dir2),
395 ("Left is newer:", self.dir1_newer),
396 ("Right is newer:", self.dir2_newer),
397 ("Left is larger:", self.dir1_larger),
398 ("Right is larger:", self.dir2_larger),
399 ("Different content:", self.diff_content),
400 ("Not comparable:", self.not_comparable),
401 ("Errors:", self.num_errors),
402 ];
403 let formatter = ColumnFormatter::new(values.iter().map(|(s, _)| *s));
404 formatter.write_value(&mut writer, "Left:", dir1_name)?;
405 formatter.write_value(&mut writer, "Right:", dir2_name)?;
406 formatter.write_value(
407 &mut writer,
408 values[0].0,
409 FormattedDuration(start_time.elapsed()),
410 )?;
411 formatter.write_values(&mut writer, &values[1..])?;
412 Ok(())
413 }
414}
415
416#[cfg(test)]
417mod tests {
418 use super::*;
419 use std::fs;
420 use std::io::Write;
421
422 #[test]
423 fn comparison_summary() {
424 let mut summary = ComparisonSummary::default();
425 let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
426 let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
427 let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
428 res3.modified_time_comparison = Some(Ordering::Greater);
429
430 summary.update(&res1);
431 summary.update(&res2);
432 summary.update(&res3);
433
434 assert_eq!(summary.only_in_dir1, 1);
435 assert_eq!(summary.only_in_dir2, 1);
436 assert_eq!(summary.in_both, 1);
437 assert_eq!(summary.dir1_newer, 1);
438 }
439
440 #[test]
441 fn directory_comparer_integration() -> anyhow::Result<()> {
442 let dir1 = tempfile::tempdir()?;
443 let dir2 = tempfile::tempdir()?;
444
445 let file1_path = dir1.path().join("same.txt");
447 let mut file1 = fs::File::create(&file1_path)?;
448 file1.write_all(b"same content")?;
449
450 let only1_path = dir1.path().join("only1.txt");
451 let mut only1 = fs::File::create(&only1_path)?;
452 only1.write_all(b"only in dir1")?;
453
454 let file2_path = dir2.path().join("same.txt");
456 let mut file2 = fs::File::create(&file2_path)?;
457 file2.write_all(b"same content")?;
458
459 let only2_path = dir2.path().join("only2.txt");
460 let mut only2 = fs::File::create(&only2_path)?;
461 only2.write_all(b"only in dir2")?;
462
463 let diff1_path = dir1.path().join("diff.txt");
465 let mut diff1 = fs::File::create(&diff1_path)?;
466 diff1.write_all(b"content 1")?;
467
468 let diff2_path = dir2.path().join("diff.txt");
469 let mut diff2 = fs::File::create(&diff2_path)?;
470 diff2.write_all(b"content 222")?; let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
473 let (tx, rx) = mpsc::channel();
474
475 comparer.compare_streaming_ordered(tx)?;
476
477 let mut results = Vec::new();
478 while let Ok(res) = rx.recv() {
479 if let CompareProgress::Result(_, r) = res {
480 results.push(r);
481 }
482 }
483
484 results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
485
486 assert_eq!(results.len(), 4);
487
488 assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
490 assert_eq!(results[0].classification, Classification::InBoth);
491 assert!(
492 results[0].is_content_same == Some(false)
493 || results[0].size_comparison != Some(Ordering::Equal)
494 );
495
496 assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
498 assert_eq!(results[1].classification, Classification::OnlyInDir1);
499
500 assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
502 assert_eq!(results[2].classification, Classification::OnlyInDir2);
503
504 assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
506 assert_eq!(results[3].classification, Classification::InBoth);
507 assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
508
509 Ok(())
510 }
511
512 #[test]
513 fn directory_comparer_size_mode() -> anyhow::Result<()> {
514 let dir1 = tempfile::tempdir()?;
515 let dir2 = tempfile::tempdir()?;
516
517 let file1_path = dir1.path().join("file.txt");
518 let mut file1 = fs::File::create(&file1_path)?;
519 file1.write_all(b"content 1")?;
520
521 let file2_path = dir2.path().join("file.txt");
522 let mut file2 = fs::File::create(&file2_path)?;
523 file2.write_all(b"content 2")?; let mut comparer =
526 DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
527 comparer.comparison_method = FileComparisonMethod::Size;
528 let (tx, rx) = mpsc::channel();
529
530 comparer.compare_streaming_ordered(tx)?;
531
532 let mut results = Vec::new();
533 while let Ok(res) = rx.recv() {
534 if let CompareProgress::Result(_, r) = res {
535 results.push(r);
536 }
537 }
538
539 assert_eq!(results.len(), 1);
540 assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
541 assert_eq!(results[0].classification, Classification::InBoth);
542 assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
543 assert_eq!(results[0].is_content_same, None);
544
545 Ok(())
546 }
547}