1use crate::{
2 Classification, FileComparer, FileComparisonResult, FileHasher, FileIterator, Progress,
3 ProgressBuilder,
4};
5use globset::GlobSet;
6use indicatif::FormattedDuration;
7use std::cmp::Ordering;
8use std::collections::HashMap;
9use std::path::{Path, PathBuf};
10use std::sync::{Arc, mpsc};
11
12#[derive(Debug, Clone)]
13enum CompareProgress {
14 StartOfComparison,
15 FileDone,
16 TotalFiles(usize),
17 Result(usize, FileComparisonResult),
18}
19
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum FileComparisonMethod {
23 Size,
25 Hash,
27 Rehash,
29 Full,
31}
32
33pub struct DirectoryComparer {
35 dir1: PathBuf,
36 dir2: PathBuf,
37 pub is_symbols_format: bool,
38 pub buffer_size: usize,
39 pub comparison_method: FileComparisonMethod,
40 pub exclude: Option<GlobSet>,
41 pub progress: Option<Arc<ProgressBuilder>>,
42 pub jobs: usize,
43}
44
45impl DirectoryComparer {
46 pub const DEFAULT_JOBS: usize = 8;
47
48 pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
50 Self {
51 dir1,
52 dir2,
53 is_symbols_format: false,
54 buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
55 comparison_method: FileComparisonMethod::Hash,
56 exclude: None,
57 progress: None,
58 jobs: Self::DEFAULT_JOBS,
59 }
60 }
61
62 pub fn run(&self) -> anyhow::Result<()> {
65 if self.dir1.is_file() {
66 return self.run_file_comparer();
67 }
68
69 let progress = self
70 .progress
71 .as_ref()
72 .map(|progress| progress.add_spinner())
73 .unwrap_or_else(Progress::none);
74 progress.set_message("Scanning directories...");
75 let start_time = std::time::Instant::now();
76 let mut summary = ComparisonSummary::default();
77 let dir1_str = self.dir1.to_str().unwrap_or("dir1");
78 let dir2_str = self.dir2.to_str().unwrap_or("dir2");
79 let (tx, rx) = mpsc::channel();
80 std::thread::scope(|scope| {
81 scope.spawn(move || {
82 if let Err(e) = self.compare_streaming_ordered(tx) {
83 log::error!("Error during comparison: {}", e);
84 }
85 });
86
87 while let Ok(event) = rx.recv() {
89 match event {
90 CompareProgress::StartOfComparison => {
91 progress.set_message("Comparing files...");
92 }
93 CompareProgress::TotalFiles(total_files) => {
94 progress.set_length(total_files as u64);
95 progress.set_message("");
96 }
97 CompareProgress::Result(_, result) => {
98 summary.update(&result);
99 if self.is_symbols_format {
100 progress.suspend(|| {
101 println!(
102 "{} {}",
103 result.to_symbol_string(),
104 result.relative_path.display()
105 );
106 })
107 } else if !result.is_identical() {
108 progress.suspend(|| {
109 println!(
110 "{}: {}",
111 result.relative_path.display(),
112 result.to_string(dir1_str, dir2_str)
113 );
114 });
115 }
116 }
117 CompareProgress::FileDone => progress.inc(1),
118 }
119 }
120 });
121 progress.finish();
122 eprintln!("\n--- Comparison Summary ---");
123 summary.print(&mut std::io::stderr(), dir1_str, dir2_str)?;
124 eprintln!(
125 "Comparison finished in {}.",
126 FormattedDuration(start_time.elapsed())
127 );
128 Ok(())
129 }
130
131 fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
136 let (tx_unordered, rx_unordered) = mpsc::channel();
137 std::thread::scope(|scope| {
138 scope.spawn(move || {
139 if let Err(e) = self.compare_streaming_unordered(tx_unordered) {
140 log::error!("Error during unordered comparison: {}", e);
141 }
142 });
143
144 let mut buffer = HashMap::new();
145 let mut next_index = 0;
146 for event in rx_unordered {
147 if let CompareProgress::Result(i, _) = &event {
148 let index = *i;
149 if index == next_index {
150 tx.send(event)?;
151 next_index += 1;
152 while let Some(buffered) = buffer.remove(&next_index) {
153 tx.send(buffered)?;
154 next_index += 1;
155 }
156 } else {
157 buffer.insert(index, event);
158 }
159 } else {
160 tx.send(event)?;
161 }
162 }
163 Ok::<(), anyhow::Error>(())
164 })?;
165 Ok(())
166 }
167
168 fn compare_streaming_unordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
169 let mut it1 = FileIterator::new(self.dir1.clone());
170 let mut it2 = FileIterator::new(self.dir2.clone());
171 it1.exclude = self.exclude.as_ref();
172 it2.exclude = self.exclude.as_ref();
173 let hashers = self.get_hashers(&self.dir1, &self.dir2)?;
174 if let Some((h1, h2)) = &hashers {
175 it1.hasher = Some(h1);
176 it2.hasher = Some(h2);
177 if self.comparison_method == FileComparisonMethod::Rehash {
178 h1.clear_cache()?;
179 h2.clear_cache()?;
180 }
181 }
182 let hashers_ref = hashers.as_ref();
183 std::thread::scope(|global_scope| {
184 let it1_rx = it1.spawn_in_scope(global_scope);
185 let it2_rx = it2.spawn_in_scope(global_scope);
186 let pool = crate::build_thread_pool(self.jobs)?;
187 pool.scope(move |scope| {
188 let mut cur1 = it1_rx.recv().ok();
189 let mut cur2 = it2_rx.recv().ok();
190 let mut index = 0;
191 tx.send(CompareProgress::StartOfComparison)?;
192 loop {
193 let cmp = match (&cur1, &cur2) {
194 (Some((rel1, _)), Some((rel2, _))) => rel1.cmp(rel2),
195 (Some(_), None) => Ordering::Less,
196 (None, Some(_)) => Ordering::Greater,
197 (None, None) => break,
198 };
199 match cmp {
200 Ordering::Less => {
201 let (rel1, _) = cur1.take().unwrap();
202 let result =
203 FileComparisonResult::new(rel1, Classification::OnlyInDir1);
204 tx.send(CompareProgress::Result(index, result))?;
205 tx.send(CompareProgress::FileDone)?;
206 index += 1;
207 cur1 = it1_rx.recv().ok();
208 }
209 Ordering::Greater => {
210 let (rel2, _) = cur2.take().unwrap();
211 let result =
212 FileComparisonResult::new(rel2, Classification::OnlyInDir2);
213 tx.send(CompareProgress::Result(index, result))?;
214 tx.send(CompareProgress::FileDone)?;
215 index += 1;
216 cur2 = it2_rx.recv().ok();
217 }
218 Ordering::Equal => {
219 let (rel_path, path1) = cur1.take().unwrap();
220 let (_, path2) = cur2.take().unwrap();
221 let buffer_size = self.buffer_size;
222 let tx_clone = tx.clone();
223 let i = index;
224 let should_compare =
225 self.comparison_method != FileComparisonMethod::Size;
226 scope.spawn(move |_| {
227 let mut comparer = FileComparer::new(&path1, &path2);
228 comparer.buffer_size = buffer_size;
229 if let Some((h1, h2)) = hashers_ref {
230 comparer.hashers = Some((h1, h2));
231 }
232 let mut result = FileComparisonResult::new(
233 rel_path.clone(),
234 Classification::InBoth,
235 );
236 if let Err(error) = result.update(&comparer, should_compare) {
237 log::error!(
238 "Error during comparison of {:?}: {}",
239 rel_path,
240 error
241 );
242 }
243 if tx_clone.send(CompareProgress::Result(i, result)).is_err()
244 || tx_clone.send(CompareProgress::FileDone).is_err()
245 {
246 log::error!("Send failed during comparison of {:?}", rel_path);
247 }
248 });
249 index += 1;
250 cur1 = it1_rx.recv().ok();
251 cur2 = it2_rx.recv().ok();
252 }
253 }
254 }
255 tx.send(CompareProgress::TotalFiles(index))
256 })?;
257 Ok::<(), anyhow::Error>(())
258 })?;
259
260 Self::save_hashers(hashers)?;
261 Ok(())
262 }
263
264 fn get_hashers(
265 &self,
266 dir1: &Path,
267 dir2: &Path,
268 ) -> anyhow::Result<Option<(FileHasher, FileHasher)>> {
269 if self.comparison_method == FileComparisonMethod::Hash
270 || self.comparison_method == FileComparisonMethod::Rehash
271 {
272 let (mut h1, mut h2) = rayon::join(
273 || FileHasher::new(dir1.to_path_buf()),
274 || FileHasher::new(dir2.to_path_buf()),
275 );
276 h1.buffer_size = self.buffer_size;
277 h2.buffer_size = self.buffer_size;
278 if let Some(progress) = self.progress.as_ref() {
279 h1.progress = Some(Arc::clone(progress));
280 h2.progress = Some(Arc::clone(progress));
281 }
282 return Ok(Some((h1, h2)));
283 }
284 Ok(None)
285 }
286
287 fn save_hashers(hashers: Option<(FileHasher, FileHasher)>) -> anyhow::Result<()> {
288 if let Some((h1, h2)) = hashers {
289 let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
290 r1?;
291 r2?;
292 }
293 Ok(())
294 }
295
296 fn run_file_comparer(&self) -> anyhow::Result<()> {
297 assert!(self.dir1.is_file());
298 let file1 = &self.dir1;
299 let dir1 = file1.parent().unwrap();
300 let file1_name = file1.file_name().unwrap();
301 let (dir2, file2) = if self.dir2.is_file() {
302 (self.dir2.parent().unwrap(), self.dir2.clone())
303 } else {
304 (self.dir2.as_path(), self.dir2.join(file1_name))
305 };
306
307 let mut comparer = FileComparer::new(file1, &file2);
308 comparer.buffer_size = self.buffer_size;
309 let hashers = self.get_hashers(dir1, dir2)?;
310 if let Some((h1, h2)) = &hashers {
311 if self.comparison_method == FileComparisonMethod::Rehash {
312 h1.remove_cache_entry(file1)?;
313 h2.remove_cache_entry(&file2)?;
314 }
315 comparer.hashers = Some((h1, h2));
316 }
317 let mut result = FileComparisonResult::new(PathBuf::new(), Classification::InBoth);
318 let should_compare_content = self.comparison_method != FileComparisonMethod::Size;
319 result.update(&comparer, should_compare_content)?;
320 let file1_str = file1.to_str().unwrap_or("file1");
321 if self.is_symbols_format {
322 println!("{} {}", result.to_symbol_string(), file1_str);
323 } else {
324 let file2_str = file2.to_str().unwrap_or("file2");
325 println!("{}: {}", file1_str, result.to_string(file1_str, file2_str));
326 }
327 Self::save_hashers(hashers)?;
328 Ok(())
329 }
330}
331
332#[derive(Default)]
333struct ComparisonSummary {
334 pub in_both: usize,
335 pub only_in_dir1: usize,
336 pub only_in_dir2: usize,
337 pub dir1_newer: usize,
338 pub dir2_newer: usize,
339 pub dir1_larger: usize,
340 pub dir2_larger: usize,
341 pub diff_content: usize,
342 pub not_comparable: usize,
343}
344
345impl ComparisonSummary {
346 pub fn update(&mut self, result: &FileComparisonResult) {
347 match result.classification {
348 Classification::OnlyInDir1 => self.only_in_dir1 += 1,
349 Classification::OnlyInDir2 => self.only_in_dir2 += 1,
350 Classification::InBoth => {
351 self.in_both += 1;
352 let mut is_not_comparable = false;
353 match result.modified_time_comparison {
354 Some(Ordering::Greater) => self.dir1_newer += 1,
355 Some(Ordering::Less) => self.dir2_newer += 1,
356 Some(Ordering::Equal) => {}
357 None => is_not_comparable = true,
358 }
359 match result.size_comparison {
360 Some(Ordering::Greater) => self.dir1_larger += 1,
361 Some(Ordering::Less) => self.dir2_larger += 1,
362 Some(Ordering::Equal) => match result.is_content_same {
363 Some(false) => self.diff_content += 1,
364 Some(true) => {}
365 None => is_not_comparable = true,
366 },
367 None => is_not_comparable = true,
368 }
369 if is_not_comparable {
370 self.not_comparable += 1;
371 }
372 }
373 }
374 }
375
376 pub fn print(
377 &self,
378 mut writer: impl std::io::Write,
379 dir1_name: &str,
380 dir2_name: &str,
381 ) -> std::io::Result<()> {
382 let values = [
383 ("Files in both:", self.in_both),
384 ("Only in left:", self.only_in_dir1),
385 ("Only in right:", self.only_in_dir2),
386 ("Left is newer:", self.dir1_newer),
387 ("Right is newer:", self.dir2_newer),
388 ("Left is larger:", self.dir1_larger),
389 ("Right is larger:", self.dir2_larger),
390 ("Different content:", self.diff_content),
391 ("Not comparable:", self.not_comparable),
392 ];
393 let max_len = values.iter().map(|(s, _)| s.len()).max().unwrap();
394 writeln!(writer, "{:width$} {}", "Left:", dir1_name, width = max_len)?;
395 writeln!(writer, "{:width$} {}", "Right:", dir2_name, width = max_len)?;
396 for (label, value) in values {
397 writeln!(writer, "{:width$} {}", label, value, width = max_len)?;
398 }
399 Ok(())
400 }
401}
402
403#[cfg(test)]
404mod tests {
405 use super::*;
406 use std::fs;
407 use std::io::Write;
408
409 #[test]
410 fn comparison_summary() {
411 let mut summary = ComparisonSummary::default();
412 let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
413 let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
414 let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
415 res3.modified_time_comparison = Some(Ordering::Greater);
416
417 summary.update(&res1);
418 summary.update(&res2);
419 summary.update(&res3);
420
421 assert_eq!(summary.only_in_dir1, 1);
422 assert_eq!(summary.only_in_dir2, 1);
423 assert_eq!(summary.in_both, 1);
424 assert_eq!(summary.dir1_newer, 1);
425 }
426
427 #[test]
428 fn directory_comparer_integration() -> anyhow::Result<()> {
429 let dir1 = tempfile::tempdir()?;
430 let dir2 = tempfile::tempdir()?;
431
432 let file1_path = dir1.path().join("same.txt");
434 let mut file1 = fs::File::create(&file1_path)?;
435 file1.write_all(b"same content")?;
436
437 let only1_path = dir1.path().join("only1.txt");
438 let mut only1 = fs::File::create(&only1_path)?;
439 only1.write_all(b"only in dir1")?;
440
441 let file2_path = dir2.path().join("same.txt");
443 let mut file2 = fs::File::create(&file2_path)?;
444 file2.write_all(b"same content")?;
445
446 let only2_path = dir2.path().join("only2.txt");
447 let mut only2 = fs::File::create(&only2_path)?;
448 only2.write_all(b"only in dir2")?;
449
450 let diff1_path = dir1.path().join("diff.txt");
452 let mut diff1 = fs::File::create(&diff1_path)?;
453 diff1.write_all(b"content 1")?;
454
455 let diff2_path = dir2.path().join("diff.txt");
456 let mut diff2 = fs::File::create(&diff2_path)?;
457 diff2.write_all(b"content 222")?; let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
460 let (tx, rx) = mpsc::channel();
461
462 comparer.compare_streaming_ordered(tx)?;
463
464 let mut results = Vec::new();
465 while let Ok(res) = rx.recv() {
466 if let CompareProgress::Result(_, r) = res {
467 results.push(r);
468 }
469 }
470
471 results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
472
473 assert_eq!(results.len(), 4);
474
475 assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
477 assert_eq!(results[0].classification, Classification::InBoth);
478 assert!(
479 results[0].is_content_same == Some(false)
480 || results[0].size_comparison != Some(Ordering::Equal)
481 );
482
483 assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
485 assert_eq!(results[1].classification, Classification::OnlyInDir1);
486
487 assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
489 assert_eq!(results[2].classification, Classification::OnlyInDir2);
490
491 assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
493 assert_eq!(results[3].classification, Classification::InBoth);
494 assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
495
496 Ok(())
497 }
498
499 #[test]
500 fn directory_comparer_size_mode() -> anyhow::Result<()> {
501 let dir1 = tempfile::tempdir()?;
502 let dir2 = tempfile::tempdir()?;
503
504 let file1_path = dir1.path().join("file.txt");
505 let mut file1 = fs::File::create(&file1_path)?;
506 file1.write_all(b"content 1")?;
507
508 let file2_path = dir2.path().join("file.txt");
509 let mut file2 = fs::File::create(&file2_path)?;
510 file2.write_all(b"content 2")?; let mut comparer =
513 DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
514 comparer.comparison_method = FileComparisonMethod::Size;
515 let (tx, rx) = mpsc::channel();
516
517 comparer.compare_streaming_ordered(tx)?;
518
519 let mut results = Vec::new();
520 while let Ok(res) = rx.recv() {
521 if let CompareProgress::Result(_, r) = res {
522 results.push(r);
523 }
524 }
525
526 assert_eq!(results.len(), 1);
527 assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
528 assert_eq!(results[0].classification, Classification::InBoth);
529 assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
530 assert_eq!(results[0].is_content_same, None);
531
532 Ok(())
533 }
534}