1use crate::{
2 Classification, FileComparer, FileComparisonResult, FileHasher, FileIterator, Progress,
3 ProgressBuilder,
4};
5use globset::GlobSet;
6use indicatif::FormattedDuration;
7use std::cmp::Ordering;
8use std::path::{Path, PathBuf};
9use std::sync::{Arc, mpsc};
10
11#[derive(Debug, Clone)]
12enum CompareProgress {
13 StartOfComparison,
14 FileDone,
15 TotalFiles(usize),
16 Result(usize, FileComparisonResult),
17}
18
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21pub enum FileComparisonMethod {
22 Size,
24 Hash,
26 Rehash,
28 Full,
30}
31
32pub struct DirectoryComparer {
34 dir1: PathBuf,
35 dir2: PathBuf,
36 pub is_symbols_format: bool,
37 pub buffer_size: usize,
38 pub comparison_method: FileComparisonMethod,
39 pub exclude: Option<GlobSet>,
40 pub progress: Option<Arc<ProgressBuilder>>,
41 pub jobs: usize,
42}
43
44impl DirectoryComparer {
45 pub const DEFAULT_JOBS: usize = 8;
46
47 pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
49 Self {
50 dir1,
51 dir2,
52 is_symbols_format: false,
53 buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
54 comparison_method: FileComparisonMethod::Hash,
55 exclude: None,
56 progress: None,
57 jobs: Self::DEFAULT_JOBS,
58 }
59 }
60
61 pub fn run(&self) -> anyhow::Result<()> {
64 if self.dir1.is_file() {
65 return self.run_file_comparer();
66 }
67
68 let progress = self
69 .progress
70 .as_ref()
71 .map(|progress| progress.add_spinner())
72 .unwrap_or_else(Progress::none);
73 progress.set_message("Scanning directories...");
74 let start_time = std::time::Instant::now();
75 let mut summary = ComparisonSummary::default();
76 let dir1_str = self.dir1.to_str().unwrap_or("dir1");
77 let dir2_str = self.dir2.to_str().unwrap_or("dir2");
78 let (tx, rx) = mpsc::channel();
79 std::thread::scope(|scope| {
80 scope.spawn(move || {
81 if let Err(e) = self.compare_streaming_ordered(tx) {
82 log::error!("Error during comparison: {}", e);
83 }
84 });
85
86 while let Ok(event) = rx.recv() {
88 match event {
89 CompareProgress::StartOfComparison => {
90 progress.set_message("Comparing files...");
91 }
92 CompareProgress::TotalFiles(total_files) => {
93 progress.set_length(total_files as u64);
94 progress.set_message("");
95 }
96 CompareProgress::Result(_, result) => {
97 summary.update(&result);
98 if self.is_symbols_format {
99 progress.suspend(|| {
100 println!(
101 "{} {}",
102 result.to_symbol_string(),
103 result.relative_path.display()
104 );
105 })
106 } else if !result.is_identical() {
107 progress.suspend(|| {
108 println!(
109 "{}: {}",
110 result.relative_path.display(),
111 result.to_string(dir1_str, dir2_str)
112 );
113 });
114 }
115 }
116 CompareProgress::FileDone => progress.inc(1),
117 }
118 }
119 });
120 progress.finish();
121 eprintln!("\n--- Comparison Summary ---");
122 summary.print(&mut std::io::stderr(), dir1_str, dir2_str)?;
123 eprintln!(
124 "Comparison finished in {}.",
125 FormattedDuration(start_time.elapsed())
126 );
127 Ok(())
128 }
129
130 fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
135 crate::sort_stream(
136 tx,
137 |tx_unordered| self.compare_streaming(tx_unordered),
138 |event| match event {
139 CompareProgress::Result(i, _) => Some(*i),
140 _ => None,
141 },
142 )
143 }
144
145 fn compare_streaming(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
146 let mut it1 = FileIterator::new(self.dir1.clone());
147 let mut it2 = FileIterator::new(self.dir2.clone());
148 it1.exclude = self.exclude.as_ref();
149 it2.exclude = self.exclude.as_ref();
150 let hashers = self.get_hashers(&self.dir1, &self.dir2)?;
151 if let Some((h1, h2)) = &hashers {
152 it1.hasher = Some(h1);
153 it2.hasher = Some(h2);
154 if self.comparison_method == FileComparisonMethod::Rehash {
155 h1.clear_cache()?;
156 h2.clear_cache()?;
157 }
158 }
159 let hashers_ref = hashers.as_ref();
160 std::thread::scope(|global_scope| {
161 let it1_rx = it1.spawn_in_scope(global_scope);
162 let it2_rx = it2.spawn_in_scope(global_scope);
163 let pool = crate::build_thread_pool(self.jobs)?;
164 pool.scope(move |scope| {
165 let mut cur1 = it1_rx.recv().ok();
166 let mut cur2 = it2_rx.recv().ok();
167 let mut index = 0;
168 tx.send(CompareProgress::StartOfComparison)?;
169 loop {
170 let cmp = match (&cur1, &cur2) {
171 (Some((rel1, _)), Some((rel2, _))) => rel1.cmp(rel2),
172 (Some(_), None) => Ordering::Less,
173 (None, Some(_)) => Ordering::Greater,
174 (None, None) => break,
175 };
176 match cmp {
177 Ordering::Less => {
178 let (rel1, _) = cur1.take().unwrap();
179 let result =
180 FileComparisonResult::new(rel1, Classification::OnlyInDir1);
181 tx.send(CompareProgress::Result(index, result))?;
182 tx.send(CompareProgress::FileDone)?;
183 index += 1;
184 cur1 = it1_rx.recv().ok();
185 }
186 Ordering::Greater => {
187 let (rel2, _) = cur2.take().unwrap();
188 let result =
189 FileComparisonResult::new(rel2, Classification::OnlyInDir2);
190 tx.send(CompareProgress::Result(index, result))?;
191 tx.send(CompareProgress::FileDone)?;
192 index += 1;
193 cur2 = it2_rx.recv().ok();
194 }
195 Ordering::Equal => {
196 let (rel_path, path1) = cur1.take().unwrap();
197 let (_, path2) = cur2.take().unwrap();
198 let buffer_size = self.buffer_size;
199 let tx_clone = tx.clone();
200 let i = index;
201 let should_compare =
202 self.comparison_method != FileComparisonMethod::Size;
203 scope.spawn(move |_| {
204 let mut comparer = FileComparer::new(&path1, &path2);
205 comparer.buffer_size = buffer_size;
206 if let Some((h1, h2)) = hashers_ref {
207 comparer.hashers = Some((h1, h2));
208 }
209 let mut result = FileComparisonResult::new(
210 rel_path.clone(),
211 Classification::InBoth,
212 );
213 if let Err(error) = result.update(&comparer, should_compare) {
214 log::error!(
215 "Error during comparison of {:?}: {}",
216 rel_path,
217 error
218 );
219 }
220 if tx_clone.send(CompareProgress::Result(i, result)).is_err()
221 || tx_clone.send(CompareProgress::FileDone).is_err()
222 {
223 log::error!("Send failed during comparison of {:?}", rel_path);
224 }
225 });
226 index += 1;
227 cur1 = it1_rx.recv().ok();
228 cur2 = it2_rx.recv().ok();
229 }
230 }
231 }
232 tx.send(CompareProgress::TotalFiles(index))
233 })?;
234 Ok::<(), anyhow::Error>(())
235 })?;
236
237 Self::save_hashers(hashers)?;
238 Ok(())
239 }
240
241 fn get_hashers(
242 &self,
243 dir1: &Path,
244 dir2: &Path,
245 ) -> anyhow::Result<Option<(FileHasher, FileHasher)>> {
246 if self.comparison_method == FileComparisonMethod::Hash
247 || self.comparison_method == FileComparisonMethod::Rehash
248 {
249 let (mut h1, mut h2) = rayon::join(
250 || FileHasher::new(dir1.to_path_buf()),
251 || FileHasher::new(dir2.to_path_buf()),
252 );
253 h1.buffer_size = self.buffer_size;
254 h2.buffer_size = self.buffer_size;
255 if let Some(progress) = self.progress.as_ref() {
256 h1.progress = Some(Arc::clone(progress));
257 h2.progress = Some(Arc::clone(progress));
258 }
259 return Ok(Some((h1, h2)));
260 }
261 Ok(None)
262 }
263
264 fn save_hashers(hashers: Option<(FileHasher, FileHasher)>) -> anyhow::Result<()> {
265 if let Some((h1, h2)) = hashers {
266 let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
267 r1?;
268 r2?;
269 }
270 Ok(())
271 }
272
273 fn run_file_comparer(&self) -> anyhow::Result<()> {
274 assert!(self.dir1.is_file());
275 let file1 = &self.dir1;
276 let dir1 = file1.parent().unwrap();
277 let file1_name = file1.file_name().unwrap();
278 let (dir2, file2) = if self.dir2.is_file() {
279 (self.dir2.parent().unwrap(), self.dir2.clone())
280 } else {
281 (self.dir2.as_path(), self.dir2.join(file1_name))
282 };
283
284 let mut comparer = FileComparer::new(file1, &file2);
285 comparer.buffer_size = self.buffer_size;
286 let hashers = self.get_hashers(dir1, dir2)?;
287 if let Some((h1, h2)) = &hashers {
288 if self.comparison_method == FileComparisonMethod::Rehash {
289 h1.remove_cache_entry(file1)?;
290 h2.remove_cache_entry(&file2)?;
291 }
292 comparer.hashers = Some((h1, h2));
293 }
294 let mut result = FileComparisonResult::new(PathBuf::new(), Classification::InBoth);
295 let should_compare_content = self.comparison_method != FileComparisonMethod::Size;
296 result.update(&comparer, should_compare_content)?;
297 let file1_str = file1.to_str().unwrap_or("file1");
298 if self.is_symbols_format {
299 println!("{} {}", result.to_symbol_string(), file1_str);
300 } else {
301 let file2_str = file2.to_str().unwrap_or("file2");
302 println!("{}: {}", file1_str, result.to_string(file1_str, file2_str));
303 }
304 Self::save_hashers(hashers)?;
305 Ok(())
306 }
307}
308
309#[derive(Default)]
310struct ComparisonSummary {
311 pub in_both: usize,
312 pub only_in_dir1: usize,
313 pub only_in_dir2: usize,
314 pub dir1_newer: usize,
315 pub dir2_newer: usize,
316 pub dir1_larger: usize,
317 pub dir2_larger: usize,
318 pub diff_content: usize,
319 pub not_comparable: usize,
320}
321
322impl ComparisonSummary {
323 pub fn update(&mut self, result: &FileComparisonResult) {
324 match result.classification {
325 Classification::OnlyInDir1 => self.only_in_dir1 += 1,
326 Classification::OnlyInDir2 => self.only_in_dir2 += 1,
327 Classification::InBoth => {
328 self.in_both += 1;
329 let mut is_not_comparable = false;
330 match result.modified_time_comparison {
331 Some(Ordering::Greater) => self.dir1_newer += 1,
332 Some(Ordering::Less) => self.dir2_newer += 1,
333 Some(Ordering::Equal) => {}
334 None => is_not_comparable = true,
335 }
336 match result.size_comparison {
337 Some(Ordering::Greater) => self.dir1_larger += 1,
338 Some(Ordering::Less) => self.dir2_larger += 1,
339 Some(Ordering::Equal) => match result.is_content_same {
340 Some(false) => self.diff_content += 1,
341 Some(true) => {}
342 None => is_not_comparable = true,
343 },
344 None => is_not_comparable = true,
345 }
346 if is_not_comparable {
347 self.not_comparable += 1;
348 }
349 }
350 }
351 }
352
353 pub fn print(
354 &self,
355 mut writer: impl std::io::Write,
356 dir1_name: &str,
357 dir2_name: &str,
358 ) -> std::io::Result<()> {
359 let values = [
360 ("Files in both:", self.in_both),
361 ("Only in left:", self.only_in_dir1),
362 ("Only in right:", self.only_in_dir2),
363 ("Left is newer:", self.dir1_newer),
364 ("Right is newer:", self.dir2_newer),
365 ("Left is larger:", self.dir1_larger),
366 ("Right is larger:", self.dir2_larger),
367 ("Different content:", self.diff_content),
368 ("Not comparable:", self.not_comparable),
369 ];
370 let max_len = values.iter().map(|(s, _)| s.len()).max().unwrap();
371 writeln!(writer, "{:width$} {}", "Left:", dir1_name, width = max_len)?;
372 writeln!(writer, "{:width$} {}", "Right:", dir2_name, width = max_len)?;
373 for (label, value) in values {
374 writeln!(writer, "{:width$} {}", label, value, width = max_len)?;
375 }
376 Ok(())
377 }
378}
379
380#[cfg(test)]
381mod tests {
382 use super::*;
383 use std::fs;
384 use std::io::Write;
385
386 #[test]
387 fn comparison_summary() {
388 let mut summary = ComparisonSummary::default();
389 let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
390 let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
391 let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
392 res3.modified_time_comparison = Some(Ordering::Greater);
393
394 summary.update(&res1);
395 summary.update(&res2);
396 summary.update(&res3);
397
398 assert_eq!(summary.only_in_dir1, 1);
399 assert_eq!(summary.only_in_dir2, 1);
400 assert_eq!(summary.in_both, 1);
401 assert_eq!(summary.dir1_newer, 1);
402 }
403
404 #[test]
405 fn directory_comparer_integration() -> anyhow::Result<()> {
406 let dir1 = tempfile::tempdir()?;
407 let dir2 = tempfile::tempdir()?;
408
409 let file1_path = dir1.path().join("same.txt");
411 let mut file1 = fs::File::create(&file1_path)?;
412 file1.write_all(b"same content")?;
413
414 let only1_path = dir1.path().join("only1.txt");
415 let mut only1 = fs::File::create(&only1_path)?;
416 only1.write_all(b"only in dir1")?;
417
418 let file2_path = dir2.path().join("same.txt");
420 let mut file2 = fs::File::create(&file2_path)?;
421 file2.write_all(b"same content")?;
422
423 let only2_path = dir2.path().join("only2.txt");
424 let mut only2 = fs::File::create(&only2_path)?;
425 only2.write_all(b"only in dir2")?;
426
427 let diff1_path = dir1.path().join("diff.txt");
429 let mut diff1 = fs::File::create(&diff1_path)?;
430 diff1.write_all(b"content 1")?;
431
432 let diff2_path = dir2.path().join("diff.txt");
433 let mut diff2 = fs::File::create(&diff2_path)?;
434 diff2.write_all(b"content 222")?; let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
437 let (tx, rx) = mpsc::channel();
438
439 comparer.compare_streaming_ordered(tx)?;
440
441 let mut results = Vec::new();
442 while let Ok(res) = rx.recv() {
443 if let CompareProgress::Result(_, r) = res {
444 results.push(r);
445 }
446 }
447
448 results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
449
450 assert_eq!(results.len(), 4);
451
452 assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
454 assert_eq!(results[0].classification, Classification::InBoth);
455 assert!(
456 results[0].is_content_same == Some(false)
457 || results[0].size_comparison != Some(Ordering::Equal)
458 );
459
460 assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
462 assert_eq!(results[1].classification, Classification::OnlyInDir1);
463
464 assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
466 assert_eq!(results[2].classification, Classification::OnlyInDir2);
467
468 assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
470 assert_eq!(results[3].classification, Classification::InBoth);
471 assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
472
473 Ok(())
474 }
475
476 #[test]
477 fn directory_comparer_size_mode() -> anyhow::Result<()> {
478 let dir1 = tempfile::tempdir()?;
479 let dir2 = tempfile::tempdir()?;
480
481 let file1_path = dir1.path().join("file.txt");
482 let mut file1 = fs::File::create(&file1_path)?;
483 file1.write_all(b"content 1")?;
484
485 let file2_path = dir2.path().join("file.txt");
486 let mut file2 = fs::File::create(&file2_path)?;
487 file2.write_all(b"content 2")?; let mut comparer =
490 DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
491 comparer.comparison_method = FileComparisonMethod::Size;
492 let (tx, rx) = mpsc::channel();
493
494 comparer.compare_streaming_ordered(tx)?;
495
496 let mut results = Vec::new();
497 while let Ok(res) = rx.recv() {
498 if let CompareProgress::Result(_, r) = res {
499 results.push(r);
500 }
501 }
502
503 assert_eq!(results.len(), 1);
504 assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
505 assert_eq!(results[0].classification, Classification::InBoth);
506 assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
507 assert_eq!(results[0].is_content_same, None);
508
509 Ok(())
510 }
511}