1use crate::{
2 Classification, ColumnFormatter, FileComparer, FileComparisonResult, FileHasher, FileIterator,
3 Progress, ProgressBuilder,
4};
5use globset::GlobSet;
6use indicatif::FormattedDuration;
7use std::{
8 cmp::Ordering,
9 io::{self, stdout},
10 path::{Path, PathBuf},
11 sync::{Arc, mpsc},
12 time,
13};
14
15#[derive(Debug, Clone)]
16enum CompareProgress {
17 StartOfComparison,
18 FileDone,
19 TotalFiles(usize),
20 Result(usize, FileComparisonResult),
21 Error,
22}
23
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
26pub enum FileComparisonMethod {
27 Size,
29 Hash,
31 Rehash,
33 Full,
35}
36
37pub struct DirectoryComparer {
39 dir1: PathBuf,
40 dir2: PathBuf,
41 pub is_symbols_format: bool,
42 pub buffer_size: usize,
43 pub comparison_method: FileComparisonMethod,
44 pub exclude: Option<GlobSet>,
45 pub progress: Option<Arc<ProgressBuilder>>,
46 pub jobs: usize,
47}
48
49impl DirectoryComparer {
50 pub const DEFAULT_JOBS: usize = 8;
51
52 pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
54 Self {
55 dir1,
56 dir2,
57 is_symbols_format: false,
58 buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
59 comparison_method: FileComparisonMethod::Hash,
60 exclude: None,
61 progress: None,
62 jobs: Self::DEFAULT_JOBS,
63 }
64 }
65
66 pub fn run(&self) -> anyhow::Result<()> {
69 if self.dir1.is_file() {
70 return self.run_file_comparer();
71 }
72
73 let progress = self
74 .progress
75 .as_ref()
76 .map(|progress| progress.add_spinner())
77 .unwrap_or_else(Progress::none);
78 progress.set_message("Scanning directories...");
79 let start_time = std::time::Instant::now();
80 let mut summary = ComparisonSummary::default();
81 let dir1_str = self.dir1.to_str().unwrap_or("dir1");
82 let dir2_str = self.dir2.to_str().unwrap_or("dir2");
83 let (tx, rx) = mpsc::channel();
84 std::thread::scope(|scope| {
85 scope.spawn(move || {
86 if let Err(e) = self.compare_streaming_ordered(tx) {
87 log::error!("Error during comparison: {}", e);
88 }
89 });
90
91 while let Ok(event) = rx.recv() {
93 match event {
94 CompareProgress::StartOfComparison => {
95 progress.set_message("Comparing files...");
96 }
97 CompareProgress::TotalFiles(total_files) => {
98 progress.set_length(total_files as u64);
99 progress.set_message("");
100 }
101 CompareProgress::Result(_, result) => {
102 summary.update(&result);
103 if self.is_symbols_format {
104 progress.suspend_for(stdout(), || {
105 println!(
106 "{} {}",
107 result.to_symbol_string(),
108 result.relative_path.display()
109 );
110 })
111 } else if !result.is_identical() {
112 progress.suspend_for(stdout(), || {
113 println!(
114 "{}: {}",
115 result.relative_path.display(),
116 result.to_string(dir1_str, dir2_str)
117 );
118 });
119 }
120 }
121 CompareProgress::FileDone => progress.inc(1),
122 CompareProgress::Error => summary.num_errors += 1,
123 }
124 }
125 });
126 progress.finish();
127 eprintln!("\n--- Comparison Summary ---");
128 summary.print(&mut io::stderr(), &start_time, dir1_str, dir2_str)?;
129 Ok(())
130 }
131
132 fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
137 crate::sort_stream(
138 tx,
139 |tx_unordered| self.compare_streaming(tx_unordered),
140 |event| match event {
141 CompareProgress::Result(i, _) => Some(*i),
142 _ => None,
143 },
144 )
145 }
146
147 fn compare_streaming(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
148 let mut it1 = FileIterator::new(self.dir1.clone());
149 let mut it2 = FileIterator::new(self.dir2.clone());
150 it1.exclude = self.exclude.as_ref();
151 it2.exclude = self.exclude.as_ref();
152 let hashers = self.get_hashers(&self.dir1, &self.dir2)?;
153 if let Some((h1, h2)) = &hashers {
154 it1.hasher = Some(h1);
155 it2.hasher = Some(h2);
156 if self.comparison_method == FileComparisonMethod::Rehash {
157 h1.clear_cache()?;
158 h2.clear_cache()?;
159 }
160 }
161 let hashers_ref = hashers.as_ref();
162 std::thread::scope(|global_scope| {
163 let it1_rx = it1.spawn_in_scope(global_scope);
164 let it2_rx = it2.spawn_in_scope(global_scope);
165 let pool = crate::build_thread_pool(self.jobs)?;
166 pool.scope(move |scope| {
167 let mut cur1 = it1_rx.recv().ok();
168 let mut cur2 = it2_rx.recv().ok();
169 let mut index = 0;
170 tx.send(CompareProgress::StartOfComparison)?;
171 loop {
172 let cmp = match (&cur1, &cur2) {
173 (Some(p1), Some(p2)) => {
174 let rel1 = crate::strip_prefix(p1, &self.dir1).unwrap();
175 let rel2 = crate::strip_prefix(p2, &self.dir2).unwrap();
176 rel1.cmp(rel2)
177 }
178 (Some(_), None) => Ordering::Less,
179 (None, Some(_)) => Ordering::Greater,
180 (None, None) => break,
181 };
182 match cmp {
183 Ordering::Less => {
184 let path1 = cur1.take().unwrap();
185 let rel1 = crate::strip_prefix(&path1, &self.dir1).unwrap();
186 let result =
187 FileComparisonResult::new(rel1.into(), Classification::OnlyInDir1);
188 tx.send(CompareProgress::Result(index, result))?;
189 tx.send(CompareProgress::FileDone)?;
190 index += 1;
191 cur1 = it1_rx.recv().ok();
192 }
193 Ordering::Greater => {
194 let path2 = cur2.take().unwrap();
195 let rel2 = crate::strip_prefix(&path2, &self.dir2).unwrap();
196 let result =
197 FileComparisonResult::new(rel2.into(), Classification::OnlyInDir2);
198 tx.send(CompareProgress::Result(index, result))?;
199 tx.send(CompareProgress::FileDone)?;
200 index += 1;
201 cur2 = it2_rx.recv().ok();
202 }
203 Ordering::Equal => {
204 let path1 = cur1.take().unwrap();
205 let path2 = cur2.take().unwrap();
206 let buffer_size = self.buffer_size;
207 let tx_clone = tx.clone();
208 let i = index;
209 let should_compare =
210 self.comparison_method != FileComparisonMethod::Size;
211 scope.spawn(move |_| {
212 let mut comparer = FileComparer::new(&path1, &path2);
213 comparer.buffer_size = buffer_size;
214 if let Some((h1, h2)) = hashers_ref {
215 comparer.hashers = Some((h1, h2));
216 }
217 let rel_path = crate::strip_prefix(&path1, &self.dir1).unwrap();
218 let mut result = FileComparisonResult::new(
219 rel_path.into(),
220 Classification::InBoth,
221 );
222 let event = match result.update(&comparer, should_compare) {
223 Ok(_) => CompareProgress::Result(i, result),
224 Err(error) => {
225 log::error!(
226 "Error comparing {:?}: {}",
227 result.relative_path,
228 error
229 );
230 CompareProgress::Error
231 }
232 };
233 if tx_clone.send(event).is_err()
234 || tx_clone.send(CompareProgress::FileDone).is_err()
235 {
236 log::error!("Send failed");
237 }
238 });
239 index += 1;
240 cur1 = it1_rx.recv().ok();
241 cur2 = it2_rx.recv().ok();
242 }
243 }
244 }
245 tx.send(CompareProgress::TotalFiles(index))
246 })?;
247 Ok::<(), anyhow::Error>(())
248 })?;
249
250 Self::save_hashers(hashers)?;
251 Ok(())
252 }
253
254 fn get_hashers(
255 &self,
256 dir1: &Path,
257 dir2: &Path,
258 ) -> anyhow::Result<Option<(FileHasher, FileHasher)>> {
259 if self.comparison_method == FileComparisonMethod::Hash
260 || self.comparison_method == FileComparisonMethod::Rehash
261 {
262 let (h1_res, h2_res) =
263 rayon::join(|| FileHasher::new(&[dir1]), || FileHasher::new(&[dir2]));
264 let mut h1 = h1_res?;
265 let mut h2 = h2_res?;
266 h1.buffer_size = self.buffer_size;
267 h2.buffer_size = self.buffer_size;
268 if let Some(progress) = self.progress.as_ref() {
269 h1.progress = Some(Arc::clone(progress));
270 h2.progress = Some(Arc::clone(progress));
271 }
272 return Ok(Some((h1, h2)));
273 }
274 Ok(None)
275 }
276
277 fn save_hashers(hashers: Option<(FileHasher, FileHasher)>) -> anyhow::Result<()> {
278 if let Some((h1, h2)) = hashers {
279 let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
280 r1?;
281 r2?;
282 }
283 Ok(())
284 }
285
286 fn run_file_comparer(&self) -> anyhow::Result<()> {
287 assert!(self.dir1.is_file());
288 let file1 = &self.dir1;
289 let dir1 = file1.parent().unwrap();
290 let file1_name = file1.file_name().unwrap();
291 let (dir2, file2) = if self.dir2.is_file() {
292 (self.dir2.parent().unwrap(), self.dir2.clone())
293 } else {
294 (self.dir2.as_path(), self.dir2.join(file1_name))
295 };
296
297 let mut comparer = FileComparer::new(file1, &file2);
298 comparer.buffer_size = self.buffer_size;
299 let hashers = self.get_hashers(dir1, dir2)?;
300 if let Some((h1, h2)) = &hashers {
301 if self.comparison_method == FileComparisonMethod::Rehash {
302 h1.remove_cache_entry(file1)?;
303 h2.remove_cache_entry(&file2)?;
304 }
305 comparer.hashers = Some((h1, h2));
306 }
307 let mut result = FileComparisonResult::new(PathBuf::new(), Classification::InBoth);
308 let should_compare_content = self.comparison_method != FileComparisonMethod::Size;
309 result.update(&comparer, should_compare_content)?;
310 let file1_str = file1.to_str().unwrap_or("file1");
311 if self.is_symbols_format {
312 println!("{} {}", result.to_symbol_string(), file1_str);
313 } else {
314 let file2_str = file2.to_str().unwrap_or("file2");
315 println!("{}: {}", file1_str, result.to_string(file1_str, file2_str));
316 }
317 Self::save_hashers(hashers)?;
318 Ok(())
319 }
320}
321
322#[derive(Default)]
323struct ComparisonSummary {
324 pub in_both: usize,
325 pub only_in_dir1: usize,
326 pub only_in_dir2: usize,
327 pub dir1_newer: usize,
328 pub dir2_newer: usize,
329 pub dir1_larger: usize,
330 pub dir2_larger: usize,
331 pub diff_content: usize,
332 pub not_comparable: usize,
333 pub num_errors: usize,
334}
335
336impl ComparisonSummary {
337 pub fn update(&mut self, result: &FileComparisonResult) {
338 match result.classification {
339 Classification::OnlyInDir1 => self.only_in_dir1 += 1,
340 Classification::OnlyInDir2 => self.only_in_dir2 += 1,
341 Classification::InBoth => {
342 self.in_both += 1;
343 let mut is_not_comparable = false;
344 match result.modified_time_comparison {
345 Some(Ordering::Greater) => self.dir1_newer += 1,
346 Some(Ordering::Less) => self.dir2_newer += 1,
347 Some(Ordering::Equal) => {}
348 None => is_not_comparable = true,
349 }
350 match result.size_comparison {
351 Some(Ordering::Greater) => self.dir1_larger += 1,
352 Some(Ordering::Less) => self.dir2_larger += 1,
353 Some(Ordering::Equal) => match result.is_content_same {
354 Some(false) => self.diff_content += 1,
355 Some(true) => {}
356 None => is_not_comparable = true,
357 },
358 None => is_not_comparable = true,
359 }
360 if is_not_comparable {
361 self.not_comparable += 1;
362 }
363 }
364 }
365 }
366
367 pub fn print(
368 &self,
369 mut writer: impl std::io::Write,
370 start_time: &time::Instant,
371 dir1_name: &str,
372 dir2_name: &str,
373 ) -> std::io::Result<()> {
374 let values = [
375 ("Elapsed:", 0),
376 ("Files in both:", self.in_both),
377 ("Only in left:", self.only_in_dir1),
378 ("Only in right:", self.only_in_dir2),
379 ("Left is newer:", self.dir1_newer),
380 ("Right is newer:", self.dir2_newer),
381 ("Left is larger:", self.dir1_larger),
382 ("Right is larger:", self.dir2_larger),
383 ("Different content:", self.diff_content),
384 ("Not comparable:", self.not_comparable),
385 ("Errors:", self.num_errors),
386 ];
387 let formatter = ColumnFormatter::new(values.iter().map(|(s, _)| *s));
388 formatter.write_value(&mut writer, "Left:", dir1_name)?;
389 formatter.write_value(&mut writer, "Right:", dir2_name)?;
390 formatter.write_value(
391 &mut writer,
392 values[0].0,
393 FormattedDuration(start_time.elapsed()),
394 )?;
395 formatter.write_values(&mut writer, &values[1..])?;
396 Ok(())
397 }
398}
399
400#[cfg(test)]
401mod tests {
402 use super::*;
403 use std::fs;
404 use std::io::Write;
405
406 #[test]
407 fn comparison_summary() {
408 let mut summary = ComparisonSummary::default();
409 let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
410 let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
411 let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
412 res3.modified_time_comparison = Some(Ordering::Greater);
413
414 summary.update(&res1);
415 summary.update(&res2);
416 summary.update(&res3);
417
418 assert_eq!(summary.only_in_dir1, 1);
419 assert_eq!(summary.only_in_dir2, 1);
420 assert_eq!(summary.in_both, 1);
421 assert_eq!(summary.dir1_newer, 1);
422 }
423
424 #[test]
425 fn directory_comparer_integration() -> anyhow::Result<()> {
426 let dir1 = tempfile::tempdir()?;
427 let dir2 = tempfile::tempdir()?;
428
429 let file1_path = dir1.path().join("same.txt");
431 let mut file1 = fs::File::create(&file1_path)?;
432 file1.write_all(b"same content")?;
433
434 let only1_path = dir1.path().join("only1.txt");
435 let mut only1 = fs::File::create(&only1_path)?;
436 only1.write_all(b"only in dir1")?;
437
438 let file2_path = dir2.path().join("same.txt");
440 let mut file2 = fs::File::create(&file2_path)?;
441 file2.write_all(b"same content")?;
442
443 let only2_path = dir2.path().join("only2.txt");
444 let mut only2 = fs::File::create(&only2_path)?;
445 only2.write_all(b"only in dir2")?;
446
447 let diff1_path = dir1.path().join("diff.txt");
449 let mut diff1 = fs::File::create(&diff1_path)?;
450 diff1.write_all(b"content 1")?;
451
452 let diff2_path = dir2.path().join("diff.txt");
453 let mut diff2 = fs::File::create(&diff2_path)?;
454 diff2.write_all(b"content 222")?; let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
457 let (tx, rx) = mpsc::channel();
458
459 comparer.compare_streaming_ordered(tx)?;
460
461 let mut results = Vec::new();
462 while let Ok(res) = rx.recv() {
463 if let CompareProgress::Result(_, r) = res {
464 results.push(r);
465 }
466 }
467
468 results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
469
470 assert_eq!(results.len(), 4);
471
472 assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
474 assert_eq!(results[0].classification, Classification::InBoth);
475 assert!(
476 results[0].is_content_same == Some(false)
477 || results[0].size_comparison != Some(Ordering::Equal)
478 );
479
480 assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
482 assert_eq!(results[1].classification, Classification::OnlyInDir1);
483
484 assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
486 assert_eq!(results[2].classification, Classification::OnlyInDir2);
487
488 assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
490 assert_eq!(results[3].classification, Classification::InBoth);
491 assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
492
493 Ok(())
494 }
495
496 #[test]
497 fn directory_comparer_size_mode() -> anyhow::Result<()> {
498 let dir1 = tempfile::tempdir()?;
499 let dir2 = tempfile::tempdir()?;
500
501 let file1_path = dir1.path().join("file.txt");
502 let mut file1 = fs::File::create(&file1_path)?;
503 file1.write_all(b"content 1")?;
504
505 let file2_path = dir2.path().join("file.txt");
506 let mut file2 = fs::File::create(&file2_path)?;
507 file2.write_all(b"content 2")?; let mut comparer =
510 DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
511 comparer.comparison_method = FileComparisonMethod::Size;
512 let (tx, rx) = mpsc::channel();
513
514 comparer.compare_streaming_ordered(tx)?;
515
516 let mut results = Vec::new();
517 while let Ok(res) = rx.recv() {
518 if let CompareProgress::Result(_, r) = res {
519 results.push(r);
520 }
521 }
522
523 assert_eq!(results.len(), 1);
524 assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
525 assert_eq!(results[0].classification, Classification::InBoth);
526 assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
527 assert_eq!(results[0].is_content_same, None);
528
529 Ok(())
530 }
531}