1use crate::{
2 Classification, ColumnFormatter, FileComparer, FileComparisonResult, FileHasher, FileIterator,
3 Progress, ProgressBuilder,
4};
5use globset::GlobSet;
6use indicatif::FormattedDuration;
7use std::{
8 cmp::Ordering,
9 io::{self, stdout},
10 path::{Path, PathBuf},
11 sync::{Arc, mpsc},
12 time,
13};
14
15#[derive(Debug, Clone)]
16enum CompareProgress {
17 StartOfComparison,
18 FileDone,
19 TotalFiles(usize),
20 Result(usize, FileComparisonResult),
21 Error,
22}
23
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub enum FileComparisonMethod {
27 Size,
29 Hash,
31 Rehash,
33 Full,
35}
36
37pub struct DirectoryComparer {
39 dir1: PathBuf,
40 dir2: PathBuf,
41 pub is_symbols_format: bool,
42 pub buffer_size: usize,
43 pub comparison_method: FileComparisonMethod,
44 pub exclude: Option<GlobSet>,
45 pub progress: Option<Arc<ProgressBuilder>>,
46 pub jobs: usize,
47}
48
49impl DirectoryComparer {
50 pub const DEFAULT_JOBS: usize = 8;
51
52 pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
54 Self {
55 dir1,
56 dir2,
57 is_symbols_format: false,
58 buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
59 comparison_method: FileComparisonMethod::Hash,
60 exclude: None,
61 progress: None,
62 jobs: Self::DEFAULT_JOBS,
63 }
64 }
65
66 pub fn run(&self) -> anyhow::Result<()> {
69 if self.dir1.is_file() {
70 return self.run_file_comparer();
71 }
72
73 let progress = self
74 .progress
75 .as_ref()
76 .map(|progress| progress.add_spinner())
77 .unwrap_or_else(Progress::none);
78 progress.set_message("Scanning directories...");
79 let start_time = std::time::Instant::now();
80 let mut summary = ComparisonSummary::default();
81 let dir1_str = self.dir1.to_str().unwrap_or("dir1");
82 let dir2_str = self.dir2.to_str().unwrap_or("dir2");
83 let (tx, rx) = mpsc::channel();
84 std::thread::scope(|scope| {
85 scope.spawn(move || {
86 if let Err(e) = self.compare_streaming_ordered(tx) {
87 log::error!("Error during comparison: {}", e);
88 }
89 });
90
91 while let Ok(event) = rx.recv() {
93 match event {
94 CompareProgress::StartOfComparison => {
95 progress.set_message("Comparing files...");
96 }
97 CompareProgress::TotalFiles(total_files) => {
98 progress.set_length(total_files as u64);
99 progress.set_message("");
100 }
101 CompareProgress::Result(_, result) => {
102 summary.update(&result);
103 if self.is_symbols_format {
104 progress.suspend_for(stdout(), || {
105 println!(
106 "{} {}",
107 result.to_symbol_string(),
108 result.relative_path.display()
109 );
110 })
111 } else if !result.is_identical() {
112 progress.suspend_for(stdout(), || {
113 println!(
114 "{}: {}",
115 result.relative_path.display(),
116 result.to_string(dir1_str, dir2_str)
117 );
118 });
119 }
120 }
121 CompareProgress::FileDone => progress.inc(1),
122 CompareProgress::Error => summary.num_errors += 1,
123 }
124 }
125 });
126 progress.finish();
127 eprintln!("\n--- Comparison Summary ---");
128 summary.print(&mut io::stderr(), &start_time, dir1_str, dir2_str)?;
129 Ok(())
130 }
131
132 fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
137 crate::sort_stream(
138 tx,
139 |tx_unordered| self.compare_streaming(tx_unordered),
140 |event| match event {
141 CompareProgress::Result(i, _) => Some(*i),
142 _ => None,
143 },
144 )
145 }
146
147 fn compare_streaming(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
148 let mut it1 = FileIterator::new(&self.dir1);
149 let mut it2 = FileIterator::new(&self.dir2);
150 it1.exclude = self.exclude.as_ref();
151 it2.exclude = self.exclude.as_ref();
152 let mut hashers = self.get_hashers(&self.dir1, &self.dir2)?;
153 if let Some((h1, h2)) = &mut hashers {
154 it1.cache = Some(h1.cache()?);
155 it2.cache = Some(h2.cache()?);
156 if self.comparison_method == FileComparisonMethod::Rehash {
157 h1.clear_cache()?;
158 h2.clear_cache()?;
159 }
160 }
161 let hashers_ref = hashers.as_ref();
162 std::thread::scope(|global_scope| {
163 let it1_rx = it1.spawn_in_scope(global_scope);
164 let it2_rx = it2.spawn_in_scope(global_scope);
165 let pool = crate::build_thread_pool(self.jobs)?;
166 pool.scope(move |scope| {
167 let mut cur1 = it1_rx.recv().ok();
168 let mut cur2 = it2_rx.recv().ok();
169 let mut index = 0;
170 tx.send(CompareProgress::StartOfComparison)?;
171 loop {
172 let cmp = match (&cur1, &cur2) {
173 (Some(p1), Some(p2)) => {
174 let rel1 = crate::strip_prefix(p1, &self.dir1).unwrap();
175 let rel2 = crate::strip_prefix(p2, &self.dir2).unwrap();
176 rel1.cmp(rel2)
177 }
178 (Some(_), None) => Ordering::Less,
179 (None, Some(_)) => Ordering::Greater,
180 (None, None) => break,
181 };
182 match cmp {
183 Ordering::Less => {
184 let path1 = cur1.take().unwrap();
185 let rel1 = crate::strip_prefix(&path1, &self.dir1).unwrap();
186 let result =
187 FileComparisonResult::new(rel1.into(), Classification::OnlyInDir1);
188 tx.send(CompareProgress::Result(index, result))?;
189 tx.send(CompareProgress::FileDone)?;
190 index += 1;
191 cur1 = it1_rx.recv().ok();
192 }
193 Ordering::Greater => {
194 let path2 = cur2.take().unwrap();
195 let rel2 = crate::strip_prefix(&path2, &self.dir2).unwrap();
196 let result =
197 FileComparisonResult::new(rel2.into(), Classification::OnlyInDir2);
198 tx.send(CompareProgress::Result(index, result))?;
199 tx.send(CompareProgress::FileDone)?;
200 index += 1;
201 cur2 = it2_rx.recv().ok();
202 }
203 Ordering::Equal => {
204 let path1 = cur1.take().unwrap();
205 let path2 = cur2.take().unwrap();
206 let buffer_size = self.buffer_size;
207 let tx_clone = tx.clone();
208 let i = index;
209 let should_compare =
210 self.comparison_method != FileComparisonMethod::Size;
211 scope.spawn(move |_| {
212 let mut comparer = FileComparer::new(&path1, &path2);
213 comparer.buffer_size = buffer_size;
214 if let Some((h1, h2)) = hashers_ref {
215 comparer.hashers = Some((h1, h2));
216 }
217 let rel_path = crate::strip_prefix(&path1, &self.dir1).unwrap();
218 let mut result = FileComparisonResult::new(
219 rel_path.into(),
220 Classification::InBoth,
221 );
222 let event = match result.update(&comparer, should_compare) {
223 Ok(_) => CompareProgress::Result(i, result),
224 Err(error) => {
225 log::error!(
226 "Error comparing {:?}: {}",
227 result.relative_path,
228 error
229 );
230 CompareProgress::Error
231 }
232 };
233 if tx_clone.send(event).is_err()
234 || tx_clone.send(CompareProgress::FileDone).is_err()
235 {
236 log::error!("Send failed");
237 }
238 });
239 index += 1;
240 cur1 = it1_rx.recv().ok();
241 cur2 = it2_rx.recv().ok();
242 }
243 }
244 }
245 tx.send(CompareProgress::TotalFiles(index))
246 })?;
247 Ok::<(), anyhow::Error>(())
248 })?;
249
250 Self::save_hashers(hashers)?;
251 Ok(())
252 }
253
254 fn get_hashers(
255 &self,
256 dir1: &Path,
257 dir2: &Path,
258 ) -> anyhow::Result<Option<(FileHasher, FileHasher)>> {
259 if self.comparison_method == FileComparisonMethod::Hash
260 || self.comparison_method == FileComparisonMethod::Rehash
261 {
262 let (h1_res, h2_res) = rayon::join(
263 || FileHasher::new_with_cache(&[dir1]),
264 || FileHasher::new_with_cache(&[dir2]),
265 );
266 let mut h1 = h1_res?;
267 let mut h2 = h2_res?;
268 h1.buffer_size = self.buffer_size;
269 h2.buffer_size = self.buffer_size;
270 if let Some(progress) = self.progress.as_ref() {
271 h1.progress = Some(Arc::clone(progress));
272 h2.progress = Some(Arc::clone(progress));
273 }
274 return Ok(Some((h1, h2)));
275 }
276 Ok(None)
277 }
278
279 fn save_hashers(hashers: Option<(FileHasher, FileHasher)>) -> anyhow::Result<()> {
280 if let Some((h1, h2)) = hashers {
281 let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
282 r1?;
283 r2?;
284 }
285 Ok(())
286 }
287
288 fn run_file_comparer(&self) -> anyhow::Result<()> {
289 assert!(self.dir1.is_file());
290 let file1 = &self.dir1;
291 let dir1 = file1.parent().unwrap();
292 let file1_name = file1.file_name().unwrap();
293 let (dir2, file2) = if self.dir2.is_file() {
294 (self.dir2.parent().unwrap(), self.dir2.clone())
295 } else {
296 (self.dir2.as_path(), self.dir2.join(file1_name))
297 };
298
299 let mut comparer = FileComparer::new(file1, &file2);
300 comparer.buffer_size = self.buffer_size;
301 let mut hashers = self.get_hashers(dir1, dir2)?;
302 if let Some((h1, h2)) = &mut hashers {
303 if self.comparison_method == FileComparisonMethod::Rehash {
304 h1.remove_cache_entry(file1)?;
305 h2.remove_cache_entry(&file2)?;
306 }
307 comparer.hashers = Some((h1, h2));
308 }
309 let mut result = FileComparisonResult::new(PathBuf::new(), Classification::InBoth);
310 let should_compare_content = self.comparison_method != FileComparisonMethod::Size;
311 result.update(&comparer, should_compare_content)?;
312 let file1_str = file1.to_str().unwrap_or("file1");
313 if self.is_symbols_format {
314 println!("{} {}", result.to_symbol_string(), file1_str);
315 } else {
316 let file2_str = file2.to_str().unwrap_or("file2");
317 println!("{}: {}", file1_str, result.to_string(file1_str, file2_str));
318 }
319 Self::save_hashers(hashers)?;
320 Ok(())
321 }
322}
323
324#[derive(Default)]
325struct ComparisonSummary {
326 pub in_both: usize,
327 pub only_in_dir1: usize,
328 pub only_in_dir2: usize,
329 pub dir1_newer: usize,
330 pub dir2_newer: usize,
331 pub dir1_larger: usize,
332 pub dir2_larger: usize,
333 pub diff_content: usize,
334 pub not_comparable: usize,
335 pub num_errors: usize,
336}
337
338impl ComparisonSummary {
339 pub fn update(&mut self, result: &FileComparisonResult) {
340 match result.classification {
341 Classification::OnlyInDir1 => self.only_in_dir1 += 1,
342 Classification::OnlyInDir2 => self.only_in_dir2 += 1,
343 Classification::InBoth => {
344 self.in_both += 1;
345 let mut is_not_comparable = false;
346 match result.modified_time_comparison {
347 Some(Ordering::Greater) => self.dir1_newer += 1,
348 Some(Ordering::Less) => self.dir2_newer += 1,
349 Some(Ordering::Equal) => {}
350 None => is_not_comparable = true,
351 }
352 match result.size_comparison {
353 Some(Ordering::Greater) => self.dir1_larger += 1,
354 Some(Ordering::Less) => self.dir2_larger += 1,
355 Some(Ordering::Equal) => match result.is_content_same {
356 Some(false) => self.diff_content += 1,
357 Some(true) => {}
358 None => is_not_comparable = true,
359 },
360 None => is_not_comparable = true,
361 }
362 if is_not_comparable {
363 self.not_comparable += 1;
364 }
365 }
366 }
367 }
368
369 pub fn print(
370 &self,
371 mut writer: impl std::io::Write,
372 start_time: &time::Instant,
373 dir1_name: &str,
374 dir2_name: &str,
375 ) -> std::io::Result<()> {
376 let values = [
377 ("Elapsed:", 0),
378 ("Files in both:", self.in_both),
379 ("Only in left:", self.only_in_dir1),
380 ("Only in right:", self.only_in_dir2),
381 ("Left is newer:", self.dir1_newer),
382 ("Right is newer:", self.dir2_newer),
383 ("Left is larger:", self.dir1_larger),
384 ("Right is larger:", self.dir2_larger),
385 ("Different content:", self.diff_content),
386 ("Not comparable:", self.not_comparable),
387 ("Errors:", self.num_errors),
388 ];
389 let formatter = ColumnFormatter::new(values.iter().map(|(s, _)| *s));
390 formatter.write_value(&mut writer, "Left:", dir1_name)?;
391 formatter.write_value(&mut writer, "Right:", dir2_name)?;
392 formatter.write_value(
393 &mut writer,
394 values[0].0,
395 FormattedDuration(start_time.elapsed()),
396 )?;
397 formatter.write_values(&mut writer, &values[1..])?;
398 Ok(())
399 }
400}
401
402#[cfg(test)]
403mod tests {
404 use super::*;
405 use std::fs;
406 use std::io::Write;
407
408 #[test]
409 fn comparison_summary() {
410 let mut summary = ComparisonSummary::default();
411 let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
412 let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
413 let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
414 res3.modified_time_comparison = Some(Ordering::Greater);
415
416 summary.update(&res1);
417 summary.update(&res2);
418 summary.update(&res3);
419
420 assert_eq!(summary.only_in_dir1, 1);
421 assert_eq!(summary.only_in_dir2, 1);
422 assert_eq!(summary.in_both, 1);
423 assert_eq!(summary.dir1_newer, 1);
424 }
425
426 #[test]
427 fn directory_comparer_integration() -> anyhow::Result<()> {
428 let dir1 = tempfile::tempdir()?;
429 let dir2 = tempfile::tempdir()?;
430
431 let file1_path = dir1.path().join("same.txt");
433 let mut file1 = fs::File::create(&file1_path)?;
434 file1.write_all(b"same content")?;
435
436 let only1_path = dir1.path().join("only1.txt");
437 let mut only1 = fs::File::create(&only1_path)?;
438 only1.write_all(b"only in dir1")?;
439
440 let file2_path = dir2.path().join("same.txt");
442 let mut file2 = fs::File::create(&file2_path)?;
443 file2.write_all(b"same content")?;
444
445 let only2_path = dir2.path().join("only2.txt");
446 let mut only2 = fs::File::create(&only2_path)?;
447 only2.write_all(b"only in dir2")?;
448
449 let diff1_path = dir1.path().join("diff.txt");
451 let mut diff1 = fs::File::create(&diff1_path)?;
452 diff1.write_all(b"content 1")?;
453
454 let diff2_path = dir2.path().join("diff.txt");
455 let mut diff2 = fs::File::create(&diff2_path)?;
456 diff2.write_all(b"content 222")?; let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
459 let (tx, rx) = mpsc::channel();
460
461 comparer.compare_streaming_ordered(tx)?;
462
463 let mut results = Vec::new();
464 while let Ok(res) = rx.recv() {
465 if let CompareProgress::Result(_, r) = res {
466 results.push(r);
467 }
468 }
469
470 results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
471
472 assert_eq!(results.len(), 4);
473
474 assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
476 assert_eq!(results[0].classification, Classification::InBoth);
477 assert!(
478 results[0].is_content_same == Some(false)
479 || results[0].size_comparison != Some(Ordering::Equal)
480 );
481
482 assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
484 assert_eq!(results[1].classification, Classification::OnlyInDir1);
485
486 assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
488 assert_eq!(results[2].classification, Classification::OnlyInDir2);
489
490 assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
492 assert_eq!(results[3].classification, Classification::InBoth);
493 assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
494
495 Ok(())
496 }
497
498 #[test]
499 fn directory_comparer_size_mode() -> anyhow::Result<()> {
500 let dir1 = tempfile::tempdir()?;
501 let dir2 = tempfile::tempdir()?;
502
503 let file1_path = dir1.path().join("file.txt");
504 let mut file1 = fs::File::create(&file1_path)?;
505 file1.write_all(b"content 1")?;
506
507 let file2_path = dir2.path().join("file.txt");
508 let mut file2 = fs::File::create(&file2_path)?;
509 file2.write_all(b"content 2")?; let mut comparer =
512 DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
513 comparer.comparison_method = FileComparisonMethod::Size;
514 let (tx, rx) = mpsc::channel();
515
516 comparer.compare_streaming_ordered(tx)?;
517
518 let mut results = Vec::new();
519 while let Ok(res) = rx.recv() {
520 if let CompareProgress::Result(_, r) = res {
521 results.push(r);
522 }
523 }
524
525 assert_eq!(results.len(), 1);
526 assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
527 assert_eq!(results[0].classification, Classification::InBoth);
528 assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
529 assert_eq!(results[0].is_content_same, None);
530
531 Ok(())
532 }
533}