1use crate::{
2 Classification, ColumnFormatter, FileComparer, FileComparisonResult, FileHasher, FileIterator,
3 Progress, ProgressBuilder,
4};
5use globset::GlobSet;
6use indicatif::FormattedDuration;
7use std::cmp::Ordering;
8use std::path::{Path, PathBuf};
9use std::sync::{Arc, mpsc};
10
11#[derive(Debug, Clone)]
12enum CompareProgress {
13 StartOfComparison,
14 FileDone,
15 TotalFiles(usize),
16 Result(usize, FileComparisonResult),
17}
18
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21pub enum FileComparisonMethod {
22 Size,
24 Hash,
26 Rehash,
28 Full,
30}
31
32pub struct DirectoryComparer {
34 dir1: PathBuf,
35 dir2: PathBuf,
36 pub is_symbols_format: bool,
37 pub buffer_size: usize,
38 pub comparison_method: FileComparisonMethod,
39 pub exclude: Option<GlobSet>,
40 pub progress: Option<Arc<ProgressBuilder>>,
41 pub jobs: usize,
42}
43
44impl DirectoryComparer {
45 pub const DEFAULT_JOBS: usize = 8;
46
47 pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
49 Self {
50 dir1,
51 dir2,
52 is_symbols_format: false,
53 buffer_size: FileComparer::DEFAULT_BUFFER_SIZE,
54 comparison_method: FileComparisonMethod::Hash,
55 exclude: None,
56 progress: None,
57 jobs: Self::DEFAULT_JOBS,
58 }
59 }
60
61 pub fn run(&self) -> anyhow::Result<()> {
64 if self.dir1.is_file() {
65 return self.run_file_comparer();
66 }
67
68 let progress = self
69 .progress
70 .as_ref()
71 .map(|progress| progress.add_spinner())
72 .unwrap_or_else(Progress::none);
73 progress.set_message("Scanning directories...");
74 let start_time = std::time::Instant::now();
75 let mut summary = ComparisonSummary::default();
76 let dir1_str = self.dir1.to_str().unwrap_or("dir1");
77 let dir2_str = self.dir2.to_str().unwrap_or("dir2");
78 let (tx, rx) = mpsc::channel();
79 std::thread::scope(|scope| {
80 scope.spawn(move || {
81 if let Err(e) = self.compare_streaming_ordered(tx) {
82 log::error!("Error during comparison: {}", e);
83 }
84 });
85
86 while let Ok(event) = rx.recv() {
88 match event {
89 CompareProgress::StartOfComparison => {
90 progress.set_message("Comparing files...");
91 }
92 CompareProgress::TotalFiles(total_files) => {
93 progress.set_length(total_files as u64);
94 progress.set_message("");
95 }
96 CompareProgress::Result(_, result) => {
97 summary.update(&result);
98 if self.is_symbols_format {
99 progress.suspend(|| {
100 println!(
101 "{} {}",
102 result.to_symbol_string(),
103 result.relative_path.display()
104 );
105 })
106 } else if !result.is_identical() {
107 progress.suspend(|| {
108 println!(
109 "{}: {}",
110 result.relative_path.display(),
111 result.to_string(dir1_str, dir2_str)
112 );
113 });
114 }
115 }
116 CompareProgress::FileDone => progress.inc(1),
117 }
118 }
119 });
120 progress.finish();
121 eprintln!("\n--- Comparison Summary ---");
122 summary.print(&mut std::io::stderr(), dir1_str, dir2_str)?;
123 eprintln!(
124 "Comparison finished in {}.",
125 FormattedDuration(start_time.elapsed())
126 );
127 Ok(())
128 }
129
130 fn compare_streaming_ordered(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
135 crate::sort_stream(
136 tx,
137 |tx_unordered| self.compare_streaming(tx_unordered),
138 |event| match event {
139 CompareProgress::Result(i, _) => Some(*i),
140 _ => None,
141 },
142 )
143 }
144
145 fn compare_streaming(&self, tx: mpsc::Sender<CompareProgress>) -> anyhow::Result<()> {
146 let mut it1 = FileIterator::new(self.dir1.clone());
147 let mut it2 = FileIterator::new(self.dir2.clone());
148 it1.exclude = self.exclude.as_ref();
149 it2.exclude = self.exclude.as_ref();
150 let hashers = self.get_hashers(&self.dir1, &self.dir2)?;
151 if let Some((h1, h2)) = &hashers {
152 it1.hasher = Some(h1);
153 it2.hasher = Some(h2);
154 if self.comparison_method == FileComparisonMethod::Rehash {
155 h1.clear_cache()?;
156 h2.clear_cache()?;
157 }
158 }
159 let hashers_ref = hashers.as_ref();
160 std::thread::scope(|global_scope| {
161 let it1_rx = it1.spawn_in_scope(global_scope);
162 let it2_rx = it2.spawn_in_scope(global_scope);
163 let pool = crate::build_thread_pool(self.jobs)?;
164 pool.scope(move |scope| {
165 let mut cur1 = it1_rx.recv().ok();
166 let mut cur2 = it2_rx.recv().ok();
167 let mut index = 0;
168 tx.send(CompareProgress::StartOfComparison)?;
169 loop {
170 let cmp = match (&cur1, &cur2) {
171 (Some((rel1, _)), Some((rel2, _))) => rel1.cmp(rel2),
172 (Some(_), None) => Ordering::Less,
173 (None, Some(_)) => Ordering::Greater,
174 (None, None) => break,
175 };
176 match cmp {
177 Ordering::Less => {
178 let (rel1, _) = cur1.take().unwrap();
179 let result =
180 FileComparisonResult::new(rel1, Classification::OnlyInDir1);
181 tx.send(CompareProgress::Result(index, result))?;
182 tx.send(CompareProgress::FileDone)?;
183 index += 1;
184 cur1 = it1_rx.recv().ok();
185 }
186 Ordering::Greater => {
187 let (rel2, _) = cur2.take().unwrap();
188 let result =
189 FileComparisonResult::new(rel2, Classification::OnlyInDir2);
190 tx.send(CompareProgress::Result(index, result))?;
191 tx.send(CompareProgress::FileDone)?;
192 index += 1;
193 cur2 = it2_rx.recv().ok();
194 }
195 Ordering::Equal => {
196 let (rel_path, path1) = cur1.take().unwrap();
197 let (_, path2) = cur2.take().unwrap();
198 let buffer_size = self.buffer_size;
199 let tx_clone = tx.clone();
200 let i = index;
201 let should_compare =
202 self.comparison_method != FileComparisonMethod::Size;
203 scope.spawn(move |_| {
204 let mut comparer = FileComparer::new(&path1, &path2);
205 comparer.buffer_size = buffer_size;
206 if let Some((h1, h2)) = hashers_ref {
207 comparer.hashers = Some((h1, h2));
208 }
209 let mut result = FileComparisonResult::new(
210 rel_path.clone(),
211 Classification::InBoth,
212 );
213 if let Err(error) = result.update(&comparer, should_compare) {
214 log::error!(
215 "Error during comparison of {:?}: {}",
216 rel_path,
217 error
218 );
219 }
220 if tx_clone.send(CompareProgress::Result(i, result)).is_err()
221 || tx_clone.send(CompareProgress::FileDone).is_err()
222 {
223 log::error!("Send failed during comparison of {:?}", rel_path);
224 }
225 });
226 index += 1;
227 cur1 = it1_rx.recv().ok();
228 cur2 = it2_rx.recv().ok();
229 }
230 }
231 }
232 tx.send(CompareProgress::TotalFiles(index))
233 })?;
234 Ok::<(), anyhow::Error>(())
235 })?;
236
237 Self::save_hashers(hashers)?;
238 Ok(())
239 }
240
241 fn get_hashers(
242 &self,
243 dir1: &Path,
244 dir2: &Path,
245 ) -> anyhow::Result<Option<(FileHasher, FileHasher)>> {
246 if self.comparison_method == FileComparisonMethod::Hash
247 || self.comparison_method == FileComparisonMethod::Rehash
248 {
249 let (mut h1, mut h2) = rayon::join(
250 || FileHasher::new(dir1.to_path_buf()),
251 || FileHasher::new(dir2.to_path_buf()),
252 );
253 h1.buffer_size = self.buffer_size;
254 h2.buffer_size = self.buffer_size;
255 if let Some(progress) = self.progress.as_ref() {
256 h1.progress = Some(Arc::clone(progress));
257 h2.progress = Some(Arc::clone(progress));
258 }
259 return Ok(Some((h1, h2)));
260 }
261 Ok(None)
262 }
263
264 fn save_hashers(hashers: Option<(FileHasher, FileHasher)>) -> anyhow::Result<()> {
265 if let Some((h1, h2)) = hashers {
266 let (r1, r2) = rayon::join(|| h1.save_cache(), || h2.save_cache());
267 r1?;
268 r2?;
269 }
270 Ok(())
271 }
272
273 fn run_file_comparer(&self) -> anyhow::Result<()> {
274 assert!(self.dir1.is_file());
275 let file1 = &self.dir1;
276 let dir1 = file1.parent().unwrap();
277 let file1_name = file1.file_name().unwrap();
278 let (dir2, file2) = if self.dir2.is_file() {
279 (self.dir2.parent().unwrap(), self.dir2.clone())
280 } else {
281 (self.dir2.as_path(), self.dir2.join(file1_name))
282 };
283
284 let mut comparer = FileComparer::new(file1, &file2);
285 comparer.buffer_size = self.buffer_size;
286 let hashers = self.get_hashers(dir1, dir2)?;
287 if let Some((h1, h2)) = &hashers {
288 if self.comparison_method == FileComparisonMethod::Rehash {
289 h1.remove_cache_entry(file1)?;
290 h2.remove_cache_entry(&file2)?;
291 }
292 comparer.hashers = Some((h1, h2));
293 }
294 let mut result = FileComparisonResult::new(PathBuf::new(), Classification::InBoth);
295 let should_compare_content = self.comparison_method != FileComparisonMethod::Size;
296 result.update(&comparer, should_compare_content)?;
297 let file1_str = file1.to_str().unwrap_or("file1");
298 if self.is_symbols_format {
299 println!("{} {}", result.to_symbol_string(), file1_str);
300 } else {
301 let file2_str = file2.to_str().unwrap_or("file2");
302 println!("{}: {}", file1_str, result.to_string(file1_str, file2_str));
303 }
304 Self::save_hashers(hashers)?;
305 Ok(())
306 }
307}
308
309#[derive(Default)]
310struct ComparisonSummary {
311 pub in_both: usize,
312 pub only_in_dir1: usize,
313 pub only_in_dir2: usize,
314 pub dir1_newer: usize,
315 pub dir2_newer: usize,
316 pub dir1_larger: usize,
317 pub dir2_larger: usize,
318 pub diff_content: usize,
319 pub not_comparable: usize,
320}
321
322impl ComparisonSummary {
323 pub fn update(&mut self, result: &FileComparisonResult) {
324 match result.classification {
325 Classification::OnlyInDir1 => self.only_in_dir1 += 1,
326 Classification::OnlyInDir2 => self.only_in_dir2 += 1,
327 Classification::InBoth => {
328 self.in_both += 1;
329 let mut is_not_comparable = false;
330 match result.modified_time_comparison {
331 Some(Ordering::Greater) => self.dir1_newer += 1,
332 Some(Ordering::Less) => self.dir2_newer += 1,
333 Some(Ordering::Equal) => {}
334 None => is_not_comparable = true,
335 }
336 match result.size_comparison {
337 Some(Ordering::Greater) => self.dir1_larger += 1,
338 Some(Ordering::Less) => self.dir2_larger += 1,
339 Some(Ordering::Equal) => match result.is_content_same {
340 Some(false) => self.diff_content += 1,
341 Some(true) => {}
342 None => is_not_comparable = true,
343 },
344 None => is_not_comparable = true,
345 }
346 if is_not_comparable {
347 self.not_comparable += 1;
348 }
349 }
350 }
351 }
352
353 pub fn print(
354 &self,
355 mut writer: impl std::io::Write,
356 dir1_name: &str,
357 dir2_name: &str,
358 ) -> std::io::Result<()> {
359 let values = [
360 ("Files in both:", self.in_both),
361 ("Only in left:", self.only_in_dir1),
362 ("Only in right:", self.only_in_dir2),
363 ("Left is newer:", self.dir1_newer),
364 ("Right is newer:", self.dir2_newer),
365 ("Left is larger:", self.dir1_larger),
366 ("Right is larger:", self.dir2_larger),
367 ("Different content:", self.diff_content),
368 ("Not comparable:", self.not_comparable),
369 ];
370 let formatter = ColumnFormatter::new(values.iter().map(|(s, _)| *s));
371 formatter.write_value(&mut writer, "Left:", dir1_name)?;
372 formatter.write_value(&mut writer, "Right:", dir2_name)?;
373 formatter.write_values(&mut writer, values)?;
374 Ok(())
375 }
376}
377
378#[cfg(test)]
379mod tests {
380 use super::*;
381 use std::fs;
382 use std::io::Write;
383
384 #[test]
385 fn comparison_summary() {
386 let mut summary = ComparisonSummary::default();
387 let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
388 let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
389 let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
390 res3.modified_time_comparison = Some(Ordering::Greater);
391
392 summary.update(&res1);
393 summary.update(&res2);
394 summary.update(&res3);
395
396 assert_eq!(summary.only_in_dir1, 1);
397 assert_eq!(summary.only_in_dir2, 1);
398 assert_eq!(summary.in_both, 1);
399 assert_eq!(summary.dir1_newer, 1);
400 }
401
402 #[test]
403 fn directory_comparer_integration() -> anyhow::Result<()> {
404 let dir1 = tempfile::tempdir()?;
405 let dir2 = tempfile::tempdir()?;
406
407 let file1_path = dir1.path().join("same.txt");
409 let mut file1 = fs::File::create(&file1_path)?;
410 file1.write_all(b"same content")?;
411
412 let only1_path = dir1.path().join("only1.txt");
413 let mut only1 = fs::File::create(&only1_path)?;
414 only1.write_all(b"only in dir1")?;
415
416 let file2_path = dir2.path().join("same.txt");
418 let mut file2 = fs::File::create(&file2_path)?;
419 file2.write_all(b"same content")?;
420
421 let only2_path = dir2.path().join("only2.txt");
422 let mut only2 = fs::File::create(&only2_path)?;
423 only2.write_all(b"only in dir2")?;
424
425 let diff1_path = dir1.path().join("diff.txt");
427 let mut diff1 = fs::File::create(&diff1_path)?;
428 diff1.write_all(b"content 1")?;
429
430 let diff2_path = dir2.path().join("diff.txt");
431 let mut diff2 = fs::File::create(&diff2_path)?;
432 diff2.write_all(b"content 222")?; let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
435 let (tx, rx) = mpsc::channel();
436
437 comparer.compare_streaming_ordered(tx)?;
438
439 let mut results = Vec::new();
440 while let Ok(res) = rx.recv() {
441 if let CompareProgress::Result(_, r) = res {
442 results.push(r);
443 }
444 }
445
446 results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
447
448 assert_eq!(results.len(), 4);
449
450 assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
452 assert_eq!(results[0].classification, Classification::InBoth);
453 assert!(
454 results[0].is_content_same == Some(false)
455 || results[0].size_comparison != Some(Ordering::Equal)
456 );
457
458 assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
460 assert_eq!(results[1].classification, Classification::OnlyInDir1);
461
462 assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
464 assert_eq!(results[2].classification, Classification::OnlyInDir2);
465
466 assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
468 assert_eq!(results[3].classification, Classification::InBoth);
469 assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
470
471 Ok(())
472 }
473
474 #[test]
475 fn directory_comparer_size_mode() -> anyhow::Result<()> {
476 let dir1 = tempfile::tempdir()?;
477 let dir2 = tempfile::tempdir()?;
478
479 let file1_path = dir1.path().join("file.txt");
480 let mut file1 = fs::File::create(&file1_path)?;
481 file1.write_all(b"content 1")?;
482
483 let file2_path = dir2.path().join("file.txt");
484 let mut file2 = fs::File::create(&file2_path)?;
485 file2.write_all(b"content 2")?; let mut comparer =
488 DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
489 comparer.comparison_method = FileComparisonMethod::Size;
490 let (tx, rx) = mpsc::channel();
491
492 comparer.compare_streaming_ordered(tx)?;
493
494 let mut results = Vec::new();
495 while let Ok(res) = rx.recv() {
496 if let CompareProgress::Result(_, r) = res {
497 results.push(r);
498 }
499 }
500
501 assert_eq!(results.len(), 1);
502 assert_eq!(results[0].relative_path.to_str().unwrap(), "file.txt");
503 assert_eq!(results[0].classification, Classification::InBoth);
504 assert_eq!(results[0].size_comparison, Some(Ordering::Equal));
505 assert_eq!(results[0].is_content_same, None);
506
507 Ok(())
508 }
509}