1use indicatif::{ProgressBar, ProgressStyle};
2use log::info;
3use rayon::prelude::*;
4use std::cmp::Ordering;
5use std::collections::HashMap;
6use std::fs;
7use std::io::{self, Read};
8use std::path::{Path, PathBuf};
9use std::sync::{Arc, Mutex, mpsc};
10use walkdir::WalkDir;
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14pub enum Classification {
15 OnlyInDir1,
17 OnlyInDir2,
19 InBoth,
21}
22
23#[derive(Debug, Clone)]
25pub struct FileComparisonResult {
26 pub relative_path: PathBuf,
28 pub classification: Classification,
30 pub modified_time_comparison: Option<Ordering>,
32 pub size_comparison: Option<Ordering>,
34 pub is_content_same: Option<bool>,
36}
37
38impl FileComparisonResult {
39 pub fn new(relative_path: PathBuf, classification: Classification) -> Self {
40 Self {
41 relative_path,
42 classification,
43 modified_time_comparison: None,
44 size_comparison: None,
45 is_content_same: None,
46 }
47 }
48
49 fn update(&mut self, path1: &Path, path2: &Path, buffer_size: usize) -> anyhow::Result<()> {
50 let m1 = fs::metadata(path1)?;
51 let m2 = fs::metadata(path2)?;
52 let t1 = m1.modified()?;
53 let t2 = m2.modified()?;
54 self.modified_time_comparison = Some(t1.cmp(&t2));
55
56 let s1 = m1.len();
57 let s2 = m2.len();
58 self.size_comparison = Some(s1.cmp(&s2));
59
60 if s1 == s2 {
61 log::info!("Comparing content: {:?}", self.relative_path);
62 self.is_content_same = Some(Self::compare_contents(path1, path2, buffer_size)?);
63 }
64 Ok(())
65 }
66
67 fn compare_contents(path1: &Path, path2: &Path, buffer_size: usize) -> io::Result<bool> {
68 let mut f1 = fs::File::open(path1)?;
69 let mut f2 = fs::File::open(path2)?;
70
71 let mut buf1 = vec![0u8; buffer_size];
72 let mut buf2 = vec![0u8; buffer_size];
73
74 loop {
75 let (n1, n2) = rayon::join(|| f1.read(&mut buf1), || f2.read(&mut buf2));
79 let n1 = n1?;
80 let n2 = n2?;
81
82 if n1 != n2 || buf1[..n1] != buf2[..n2] {
83 return Ok(false);
84 }
85
86 if n1 == 0 {
87 return Ok(true);
88 }
89 }
90 }
91
92 pub fn is_identical(&self) -> bool {
93 self.classification == Classification::InBoth
94 && self.modified_time_comparison == Some(Ordering::Equal)
95 && self.size_comparison == Some(Ordering::Equal)
96 && self.is_content_same == Some(true)
97 }
98
99 pub fn to_string(&self, dir1_name: &str, dir2_name: &str) -> String {
100 let mut parts = Vec::new();
101 match self.classification {
102 Classification::OnlyInDir1 => parts.push(format!("Only in {}", dir1_name)),
103 Classification::OnlyInDir2 => parts.push(format!("Only in {}", dir2_name)),
104 Classification::InBoth => {}
105 }
106
107 if let Some(comp) = &self.modified_time_comparison {
108 match comp {
109 Ordering::Greater => parts.push(format!("{} is newer", dir1_name)),
110 Ordering::Less => parts.push(format!("{} is newer", dir2_name)),
111 Ordering::Equal => {}
112 }
113 }
114
115 if let Some(comp) = &self.size_comparison {
116 match comp {
117 Ordering::Greater => parts.push(format!("Size of {} is larger", dir1_name)),
118 Ordering::Less => parts.push(format!("Size of {} is larger", dir2_name)),
119 Ordering::Equal => {}
120 }
121 }
122
123 if let Some(same) = self.is_content_same
124 && !same
125 {
126 parts.push("Content differ".to_string());
127 }
128
129 format!("{}: {}", self.relative_path.display(), parts.join(", "))
130 }
131}
132
133#[derive(Default)]
134pub struct ComparisonSummary {
135 pub in_both: usize,
136 pub only_in_dir1: usize,
137 pub only_in_dir2: usize,
138 pub dir1_newer: usize,
139 pub dir2_newer: usize,
140 pub same_time_diff_size: usize,
141 pub same_time_size_diff_content: usize,
142}
143
144impl ComparisonSummary {
145 pub fn update(&mut self, result: &FileComparisonResult) {
146 match result.classification {
147 Classification::OnlyInDir1 => self.only_in_dir1 += 1,
148 Classification::OnlyInDir2 => self.only_in_dir2 += 1,
149 Classification::InBoth => {
150 self.in_both += 1;
151 match result.modified_time_comparison {
152 Some(Ordering::Greater) => self.dir1_newer += 1,
153 Some(Ordering::Less) => self.dir2_newer += 1,
154 _ => {
155 if result.size_comparison != Some(Ordering::Equal) {
156 self.same_time_diff_size += 1;
157 } else if result.is_content_same == Some(false) {
158 self.same_time_size_diff_content += 1;
159 }
160 }
161 }
162 }
163 }
164 }
165
166 pub fn print(&self, dir1_name: &str, dir2_name: &str) {
167 println!("Files in both: {}", self.in_both);
168 println!("Files only in {}: {}", dir1_name, self.only_in_dir1);
169 println!("Files only in {}: {}", dir2_name, self.only_in_dir2);
170 println!(
171 "Files in both ({} is newer): {}",
172 dir1_name, self.dir1_newer
173 );
174 println!(
175 "Files in both ({} is newer): {}",
176 dir2_name, self.dir2_newer
177 );
178 println!(
179 "Files in both (same time, different size): {}",
180 self.same_time_diff_size
181 );
182 println!(
183 "Files in both (same time and size, different content): {}",
184 self.same_time_size_diff_content
185 );
186 }
187}
188
189#[derive(Clone)]
191pub struct DirectoryComparer {
192 dir1: PathBuf,
193 dir2: PathBuf,
194 total_files: Arc<Mutex<usize>>,
195 buffer_size: usize,
196}
197
198impl DirectoryComparer {
199 pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
201 Self {
202 dir1,
203 dir2,
204 total_files: Arc::new(Mutex::new(0)),
205 buffer_size: 64 * 1024,
206 }
207 }
208
209 pub fn set_buffer_size(&mut self, size: usize) {
211 self.buffer_size = size;
212 }
213
214 pub fn set_max_threads(parallel: usize) -> anyhow::Result<()> {
217 rayon::ThreadPoolBuilder::new()
218 .num_threads(parallel)
219 .build_global()
220 .map_err(|e| anyhow::anyhow!("Failed to initialize thread pool: {}", e))?;
221 Ok(())
222 }
223
224 pub fn run(&self) -> anyhow::Result<()> {
227 let pb = ProgressBar::new_spinner();
228 pb.enable_steady_tick(std::time::Duration::from_millis(120));
229 pb.set_style(
230 ProgressStyle::with_template("{spinner:.green} [{elapsed_precise}] {msg}").unwrap(),
231 );
232 pb.set_message("Scanning directories...");
233
234 let start_time = std::time::Instant::now();
235 let mut summary = ComparisonSummary::default();
236 let dir1_str = self.dir1.to_str().unwrap_or("dir1");
237 let dir2_str = self.dir2.to_str().unwrap_or("dir2");
238
239 let (tx, rx) = mpsc::channel();
240 let comparer = self.clone();
241
242 std::thread::scope(|s| {
243 s.spawn(move || {
244 if let Err(e) = comparer.compare_streaming(tx) {
245 log::error!("Error during comparison: {}", e);
246 }
247 });
248
249 let mut length_set = false;
251 while let Ok(result) = rx.recv() {
252 if !length_set {
253 let total_files = *self.total_files.lock().unwrap();
254 if total_files > 0 {
255 pb.set_length(total_files as u64);
256 pb.set_style(
257 ProgressStyle::with_template(
258 "[{elapsed_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} ({percent}%) {msg}",
259 )
260 .unwrap(),
261 );
262 pb.set_message("");
263 length_set = true;
264 }
265 }
266 summary.update(&result);
267 if !result.is_identical() {
268 pb.suspend(|| {
269 println!("{}", result.to_string(dir1_str, dir2_str));
270 });
271 }
272 pb.inc(1);
273 }
274 });
275
276 pb.finish_and_clear();
277
278 eprintln!("\n--- Comparison Summary ---");
279 summary.print(dir1_str, dir2_str);
280 eprintln!("Comparison finished in {:?}.", start_time.elapsed());
281 Ok(())
282 }
283
284 fn get_files(dir: &Path) -> anyhow::Result<HashMap<PathBuf, PathBuf>> {
285 let mut files = HashMap::new();
286 for entry in WalkDir::new(dir).into_iter().filter_map(|e| e.ok()) {
287 if entry.file_type().is_file() {
288 let rel_path = entry.path().strip_prefix(dir)?.to_path_buf();
289 files.insert(rel_path, entry.path().to_path_buf());
290 }
291 }
292 Ok(files)
293 }
294
295 fn compare_streaming(&self, tx: mpsc::Sender<FileComparisonResult>) -> anyhow::Result<()> {
300 let (tx_unordered, rx_unordered) = mpsc::channel();
301 let comparer = self.clone();
302
303 std::thread::scope(|s| {
304 s.spawn(move || {
305 if let Err(e) = comparer.compare_unordered_streaming(tx_unordered) {
306 log::error!("Error during unordered comparison: {}", e);
307 }
308 });
309
310 let mut buffer = HashMap::new();
311 let mut next_index = 0;
312 let mut total_len: Option<usize> = None;
313
314 while total_len.is_none() || next_index < total_len.unwrap() {
315 match rx_unordered.recv() {
316 Ok((i, result)) => {
317 if total_len.is_none() {
318 total_len = Some(*self.total_files.lock().unwrap());
319 }
320
321 if i == next_index {
322 if tx.send(result).is_err() {
323 break; }
325 next_index += 1;
326 while let Some(result) = buffer.remove(&next_index) {
327 if tx.send(result).is_err() {
328 break;
329 }
330 next_index += 1;
331 }
332 } else {
333 buffer.insert(i, result);
334 }
335 }
336 Err(_) => {
337 break;
339 }
340 }
341 }
342 });
343
344 Ok(())
345 }
346
347 fn compare_unordered_streaming(
348 &self,
349 tx: mpsc::Sender<(usize, FileComparisonResult)>,
350 ) -> anyhow::Result<()> {
351 let (dir1_files, dir2_files) = rayon::join(
352 || {
353 info!("Scanning directory: {:?}", self.dir1);
354 Self::get_files(&self.dir1)
355 },
356 || {
357 info!("Scanning directory: {:?}", self.dir2);
358 Self::get_files(&self.dir2)
359 },
360 );
361 let dir1_files = dir1_files?;
362 let dir2_files = dir2_files?;
363
364 let mut all_rel_paths: Vec<_> = dir1_files
365 .keys()
366 .cloned()
367 .chain(dir2_files.keys().cloned())
368 .collect();
369 all_rel_paths.sort();
370 all_rel_paths.dedup();
371
372 *self.total_files.lock().unwrap() = all_rel_paths.len();
373
374 all_rel_paths
375 .into_par_iter()
376 .enumerate()
377 .for_each(|(i, rel_path)| {
378 let in_dir1 = dir1_files.get(&rel_path);
379 let in_dir2 = dir2_files.get(&rel_path);
380
381 let result = match (in_dir1, in_dir2) {
382 (Some(_), None) => {
383 FileComparisonResult::new(rel_path.clone(), Classification::OnlyInDir1)
384 }
385 (None, Some(_)) => {
386 FileComparisonResult::new(rel_path.clone(), Classification::OnlyInDir2)
387 }
388 (Some(path1), Some(path2)) => {
389 let mut result =
390 FileComparisonResult::new(rel_path.clone(), Classification::InBoth);
391 if let Err(error) = result.update(path1, path2, self.buffer_size) {
392 log::error!("Error during comparison of {:?}: {}", rel_path, error);
393 }
394 result
395 }
396 (None, None) => unreachable!(),
397 };
398 if tx.send((i, result)).is_err() {
399 log::error!("Receiver dropped, stopping comparison of {:?}", rel_path);
400 }
401 });
402 Ok(())
403 }
404}
405
406#[cfg(test)]
407mod tests {
408 use super::*;
409 use std::io::Write;
410 use tempfile::NamedTempFile;
411
412 #[test]
413 fn test_compare_contents_identical() -> io::Result<()> {
414 let mut f1 = NamedTempFile::new()?;
415 let mut f2 = NamedTempFile::new()?;
416 f1.write_all(b"hello world")?;
417 f2.write_all(b"hello world")?;
418 assert!(FileComparisonResult::compare_contents(
419 f1.path(),
420 f2.path(),
421 8192
422 )?);
423 Ok(())
424 }
425
426 #[test]
427 fn test_compare_contents_different() -> io::Result<()> {
428 let mut f1 = NamedTempFile::new()?;
429 let mut f2 = NamedTempFile::new()?;
430 f1.write_all(b"hello world")?;
431 f2.write_all(b"hello rust")?;
432 assert!(!FileComparisonResult::compare_contents(
433 f1.path(),
434 f2.path(),
435 8192
436 )?);
437 Ok(())
438 }
439
440 #[test]
441 fn test_compare_contents_different_size() -> io::Result<()> {
442 let mut f1 = NamedTempFile::new()?;
443 let mut f2 = NamedTempFile::new()?;
444 f1.write_all(b"hello world")?;
445 f2.write_all(b"hello")?;
446 assert!(!FileComparisonResult::compare_contents(
448 f1.path(),
449 f2.path(),
450 8192
451 )?);
452 Ok(())
453 }
454
455 #[test]
456 fn test_comparison_summary() {
457 let mut summary = ComparisonSummary::default();
458 let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
459 let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
460 let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
461 res3.modified_time_comparison = Some(Ordering::Greater);
462
463 summary.update(&res1);
464 summary.update(&res2);
465 summary.update(&res3);
466
467 assert_eq!(summary.only_in_dir1, 1);
468 assert_eq!(summary.only_in_dir2, 1);
469 assert_eq!(summary.in_both, 1);
470 assert_eq!(summary.dir1_newer, 1);
471 }
472
473 #[test]
474 fn test_directory_comparer_integration() -> anyhow::Result<()> {
475 let dir1 = tempfile::tempdir()?;
476 let dir2 = tempfile::tempdir()?;
477
478 let file1_path = dir1.path().join("same.txt");
480 let mut file1 = fs::File::create(&file1_path)?;
481 file1.write_all(b"same content")?;
482
483 let only1_path = dir1.path().join("only1.txt");
484 let mut only1 = fs::File::create(&only1_path)?;
485 only1.write_all(b"only in dir1")?;
486
487 let file2_path = dir2.path().join("same.txt");
489 let mut file2 = fs::File::create(&file2_path)?;
490 file2.write_all(b"same content")?;
491
492 let only2_path = dir2.path().join("only2.txt");
493 let mut only2 = fs::File::create(&only2_path)?;
494 only2.write_all(b"only in dir2")?;
495
496 let diff1_path = dir1.path().join("diff.txt");
498 let mut diff1 = fs::File::create(&diff1_path)?;
499 diff1.write_all(b"content 1")?;
500
501 let diff2_path = dir2.path().join("diff.txt");
502 let mut diff2 = fs::File::create(&diff2_path)?;
503 diff2.write_all(b"content 222")?; let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
506 let (tx, rx) = mpsc::channel();
507
508 comparer.compare_streaming(tx)?;
509
510 let mut results = Vec::new();
511 while let Ok(res) = rx.recv() {
512 results.push(res);
513 }
514
515 results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
516
517 assert_eq!(results.len(), 4);
518
519 assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
521 assert_eq!(results[0].classification, Classification::InBoth);
522 assert!(
523 results[0].is_content_same == Some(false)
524 || results[0].size_comparison != Some(Ordering::Equal)
525 );
526
527 assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
529 assert_eq!(results[1].classification, Classification::OnlyInDir1);
530
531 assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
533 assert_eq!(results[2].classification, Classification::OnlyInDir2);
534
535 assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
537 assert_eq!(results[3].classification, Classification::InBoth);
538 assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
539
540 Ok(())
541 }
542}