1use indicatif::{ProgressBar, ProgressStyle};
2use log::info;
3use rayon::prelude::*;
4use std::cmp::Ordering;
5use std::collections::HashMap;
6use std::fs;
7use std::io::{self, Read};
8use std::path::{Path, PathBuf};
9use std::sync::{Arc, Mutex, mpsc};
10use walkdir::WalkDir;
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14pub enum Classification {
15 OnlyInDir1,
17 OnlyInDir2,
19 InBoth,
21}
22
23#[derive(Debug, Clone)]
25pub struct FileComparisonResult {
26 pub relative_path: PathBuf,
28 pub classification: Classification,
30 pub modified_time_comparison: Option<Ordering>,
32 pub size_comparison: Option<Ordering>,
34 pub is_content_same: Option<bool>,
36}
37
38impl FileComparisonResult {
39 pub fn new(relative_path: PathBuf, classification: Classification) -> Self {
40 Self {
41 relative_path,
42 classification,
43 modified_time_comparison: None,
44 size_comparison: None,
45 is_content_same: None,
46 }
47 }
48
49 pub fn is_identical(&self) -> bool {
50 self.classification == Classification::InBoth
51 && self.modified_time_comparison == Some(Ordering::Equal)
52 && self.size_comparison == Some(Ordering::Equal)
53 && self.is_content_same == Some(true)
54 }
55
56 pub fn to_string(&self, dir1_name: &str, dir2_name: &str) -> String {
57 let mut parts = Vec::new();
58 match self.classification {
59 Classification::OnlyInDir1 => parts.push(format!("Only in {}", dir1_name)),
60 Classification::OnlyInDir2 => parts.push(format!("Only in {}", dir2_name)),
61 Classification::InBoth => {}
62 }
63
64 if let Some(comp) = &self.modified_time_comparison {
65 match comp {
66 Ordering::Greater => parts.push(format!("{} is newer", dir1_name)),
67 Ordering::Less => parts.push(format!("{} is newer", dir2_name)),
68 Ordering::Equal => {}
69 }
70 }
71
72 if let Some(comp) = &self.size_comparison {
73 match comp {
74 Ordering::Greater => parts.push(format!("Size of {} is larger", dir1_name)),
75 Ordering::Less => parts.push(format!("Size of {} is larger", dir2_name)),
76 Ordering::Equal => {}
77 }
78 }
79
80 if let Some(same) = self.is_content_same
81 && !same
82 {
83 parts.push("Content differ".to_string());
84 }
85
86 format!("{}: {}", self.relative_path.display(), parts.join(", "))
87 }
88}
89
90#[derive(Default)]
91pub struct ComparisonSummary {
92 pub in_both: usize,
93 pub only_in_dir1: usize,
94 pub only_in_dir2: usize,
95 pub dir1_newer: usize,
96 pub dir2_newer: usize,
97 pub same_time_diff_size: usize,
98 pub same_time_size_diff_content: usize,
99}
100
101impl ComparisonSummary {
102 pub fn update(&mut self, result: &FileComparisonResult) {
103 match result.classification {
104 Classification::OnlyInDir1 => self.only_in_dir1 += 1,
105 Classification::OnlyInDir2 => self.only_in_dir2 += 1,
106 Classification::InBoth => {
107 self.in_both += 1;
108 match result.modified_time_comparison {
109 Some(Ordering::Greater) => self.dir1_newer += 1,
110 Some(Ordering::Less) => self.dir2_newer += 1,
111 _ => {
112 if result.size_comparison != Some(Ordering::Equal) {
113 self.same_time_diff_size += 1;
114 } else if result.is_content_same == Some(false) {
115 self.same_time_size_diff_content += 1;
116 }
117 }
118 }
119 }
120 }
121 }
122
123 pub fn print(&self, dir1_name: &str, dir2_name: &str) {
124 println!("Files in both: {}", self.in_both);
125 println!("Files only in {}: {}", dir1_name, self.only_in_dir1);
126 println!("Files only in {}: {}", dir2_name, self.only_in_dir2);
127 println!(
128 "Files in both ({} is newer): {}",
129 dir1_name, self.dir1_newer
130 );
131 println!(
132 "Files in both ({} is newer): {}",
133 dir2_name, self.dir2_newer
134 );
135 println!(
136 "Files in both (same time, different size): {}",
137 self.same_time_diff_size
138 );
139 println!(
140 "Files in both (same time and size, different content): {}",
141 self.same_time_size_diff_content
142 );
143 }
144}
145
146#[derive(Clone)]
148pub struct DirectoryComparer {
149 dir1: PathBuf,
150 dir2: PathBuf,
151 total_files: Arc<Mutex<usize>>,
152}
153
154impl DirectoryComparer {
155 pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
157 Self {
158 dir1,
159 dir2,
160 total_files: Arc::new(Mutex::new(0)),
161 }
162 }
163
164 pub fn set_max_threads(parallel: usize) -> anyhow::Result<()> {
167 rayon::ThreadPoolBuilder::new()
168 .num_threads(parallel)
169 .build_global()
170 .map_err(|e| anyhow::anyhow!("Failed to initialize thread pool: {}", e))?;
171 Ok(())
172 }
173
174 pub fn run(&self) -> anyhow::Result<()> {
177 let pb = ProgressBar::new_spinner();
178 pb.enable_steady_tick(std::time::Duration::from_millis(120));
179 pb.set_style(
180 ProgressStyle::with_template("{spinner:.green} [{elapsed_precise}] {msg}").unwrap(),
181 );
182 pb.set_message("Scanning directories...");
183
184 let start_time = std::time::Instant::now();
185 let mut summary = ComparisonSummary::default();
186 let dir1_str = self.dir1.to_str().unwrap_or("dir1");
187 let dir2_str = self.dir2.to_str().unwrap_or("dir2");
188
189 let (tx, rx) = mpsc::channel();
190 let comparer = self.clone();
191
192 std::thread::scope(|s| {
193 s.spawn(move || {
194 if let Err(e) = comparer.compare_streaming(tx) {
195 eprintln!("Error during comparison: {}", e);
196 }
197 });
198
199 let mut length_set = false;
201 while let Ok(result) = rx.recv() {
202 if !length_set {
203 let total_files = *self.total_files.lock().unwrap();
204 if total_files > 0 {
205 pb.set_length(total_files as u64);
206 pb.set_style(
207 ProgressStyle::with_template(
208 "[{elapsed_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} ({percent}%) {msg}",
209 )
210 .unwrap(),
211 );
212 pb.set_message("");
213 length_set = true;
214 }
215 }
216 summary.update(&result);
217 if !result.is_identical() {
218 pb.suspend(|| {
219 println!("{}", result.to_string(dir1_str, dir2_str));
220 });
221 }
222 pb.inc(1);
223 }
224 });
225
226 pb.finish_and_clear();
227
228 eprintln!("\n--- Comparison Summary ---");
229 summary.print(dir1_str, dir2_str);
230 eprintln!("Comparison finished in {:?}.", start_time.elapsed());
231 Ok(())
232 }
233
234 fn get_files(dir: &Path) -> anyhow::Result<HashMap<PathBuf, PathBuf>> {
235 let mut files = HashMap::new();
236 for entry in WalkDir::new(dir).into_iter().filter_map(|e| e.ok()) {
237 if entry.file_type().is_file() {
238 let rel_path = entry.path().strip_prefix(dir)?.to_path_buf();
239 files.insert(rel_path, entry.path().to_path_buf());
240 }
241 }
242 Ok(files)
243 }
244
245 fn compare_streaming(&self, tx: mpsc::Sender<FileComparisonResult>) -> anyhow::Result<()> {
250 let (dir1_files, dir2_files) = rayon::join(
251 || {
252 info!("Scanning directory: {:?}", self.dir1);
253 Self::get_files(&self.dir1)
254 },
255 || {
256 info!("Scanning directory: {:?}", self.dir2);
257 Self::get_files(&self.dir2)
258 },
259 );
260 let dir1_files = dir1_files?;
261 let dir2_files = dir2_files?;
262
263 let mut all_rel_paths: Vec<_> = dir1_files.keys().chain(dir2_files.keys()).collect();
264 all_rel_paths.sort();
265 all_rel_paths.dedup();
266 let total_len = all_rel_paths.len();
267
268 *self.total_files.lock().unwrap() = all_rel_paths.len();
269
270 let (tx_unordered, rx_unordered) = mpsc::channel();
271
272 std::thread::scope(|s| {
273 s.spawn(|| {
274 self.compare_unordered_streaming(
275 tx_unordered,
276 all_rel_paths,
277 &dir1_files,
278 &dir2_files,
279 );
280 });
281
282 let mut buffer = HashMap::new();
283 let mut next_index = 0;
284 while next_index < total_len {
285 match rx_unordered.recv() {
286 Ok((i, result)) => {
287 if i == next_index {
288 if tx.send(result).is_err() {
289 break;
290 }
291 next_index += 1;
292 while let Some(result) = buffer.remove(&next_index) {
293 if tx.send(result).is_err() {
294 break;
295 }
296 next_index += 1;
297 }
298 } else {
299 buffer.insert(i, result);
300 }
301 }
302 Err(_) => {
303 break;
304 }
305 }
306 }
307 });
308
309 Ok(())
310 }
311
312 fn compare_unordered_streaming<'a>(
313 &self,
314 tx: mpsc::Sender<(usize, FileComparisonResult)>,
315 all_rel_paths: Vec<&'a PathBuf>,
316 dir1_files: &'a HashMap<PathBuf, PathBuf>,
317 dir2_files: &'a HashMap<PathBuf, PathBuf>,
318 ) {
319 all_rel_paths
320 .into_par_iter()
321 .enumerate()
322 .for_each(|(i, rel_path)| {
323 let in_dir1 = dir1_files.get(rel_path);
324 let in_dir2 = dir2_files.get(rel_path);
325
326 let result = match (in_dir1, in_dir2) {
327 (Some(_), None) => {
328 FileComparisonResult::new(rel_path.clone(), Classification::OnlyInDir1)
329 }
330 (None, Some(_)) => {
331 FileComparisonResult::new(rel_path.clone(), Classification::OnlyInDir2)
332 }
333 (Some(p1), Some(p2)) => {
334 let mut result =
335 FileComparisonResult::new(rel_path.clone(), Classification::InBoth);
336 let m1 = fs::metadata(p1).ok();
337 let m2 = fs::metadata(p2).ok();
338
339 if let (Some(m1), Some(m2)) = (m1, m2) {
340 let t1 = m1.modified().ok();
341 let t2 = m2.modified().ok();
342 if let (Some(t1), Some(t2)) = (t1, t2) {
343 result.modified_time_comparison = Some(t1.cmp(&t2));
344 }
345
346 let s1 = m1.len();
347 let s2 = m2.len();
348 result.size_comparison = Some(s1.cmp(&s2));
349
350 if s1 == s2 {
351 info!("Comparing content: {:?}", rel_path);
352 result.is_content_same =
353 Some(compare_contents(p1, p2).unwrap_or(false));
354 }
355 }
356 result
357 }
358 (None, None) => unreachable!(),
359 };
360 let _ = tx.send((i, result));
361 });
362 }
363}
364
365fn compare_contents(p1: &Path, p2: &Path) -> io::Result<bool> {
366 let mut f1 = fs::File::open(p1)?;
367 let mut f2 = fs::File::open(p2)?;
368
369 let mut buf1 = [0u8; 8192];
370 let mut buf2 = [0u8; 8192];
371
372 loop {
373 let n1 = f1.read(&mut buf1)?;
374 let n2 = f2.read(&mut buf2)?;
375
376 if n1 != n2 || buf1[..n1] != buf2[..n2] {
377 return Ok(false);
378 }
379
380 if n1 == 0 {
381 return Ok(true);
382 }
383 }
384}
385
386#[cfg(test)]
387mod tests {
388 use super::*;
389 use std::io::Write;
390 use tempfile::NamedTempFile;
391
392 #[test]
393 fn test_compare_contents_identical() -> io::Result<()> {
394 let mut f1 = NamedTempFile::new()?;
395 let mut f2 = NamedTempFile::new()?;
396 f1.write_all(b"hello world")?;
397 f2.write_all(b"hello world")?;
398 assert!(compare_contents(f1.path(), f2.path())?);
399 Ok(())
400 }
401
402 #[test]
403 fn test_compare_contents_different() -> io::Result<()> {
404 let mut f1 = NamedTempFile::new()?;
405 let mut f2 = NamedTempFile::new()?;
406 f1.write_all(b"hello world")?;
407 f2.write_all(b"hello rust")?;
408 assert!(!compare_contents(f1.path(), f2.path())?);
409 Ok(())
410 }
411
412 #[test]
413 fn test_compare_contents_different_size() -> io::Result<()> {
414 let mut f1 = NamedTempFile::new()?;
415 let mut f2 = NamedTempFile::new()?;
416 f1.write_all(b"hello world")?;
417 f2.write_all(b"hello")?;
418 assert!(!compare_contents(f1.path(), f2.path())?);
420 Ok(())
421 }
422
423 #[test]
424 fn test_comparison_summary() {
425 let mut summary = ComparisonSummary::default();
426 let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
427 let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
428 let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
429 res3.modified_time_comparison = Some(Ordering::Greater);
430
431 summary.update(&res1);
432 summary.update(&res2);
433 summary.update(&res3);
434
435 assert_eq!(summary.only_in_dir1, 1);
436 assert_eq!(summary.only_in_dir2, 1);
437 assert_eq!(summary.in_both, 1);
438 assert_eq!(summary.dir1_newer, 1);
439 }
440
441 #[test]
442 fn test_directory_comparer_integration() -> anyhow::Result<()> {
443 let dir1 = tempfile::tempdir()?;
444 let dir2 = tempfile::tempdir()?;
445
446 let file1_path = dir1.path().join("same.txt");
448 let mut file1 = fs::File::create(&file1_path)?;
449 file1.write_all(b"same content")?;
450
451 let only1_path = dir1.path().join("only1.txt");
452 let mut only1 = fs::File::create(&only1_path)?;
453 only1.write_all(b"only in dir1")?;
454
455 let file2_path = dir2.path().join("same.txt");
457 let mut file2 = fs::File::create(&file2_path)?;
458 file2.write_all(b"same content")?;
459
460 let only2_path = dir2.path().join("only2.txt");
461 let mut only2 = fs::File::create(&only2_path)?;
462 only2.write_all(b"only in dir2")?;
463
464 let diff1_path = dir1.path().join("diff.txt");
466 let mut diff1 = fs::File::create(&diff1_path)?;
467 diff1.write_all(b"content 1")?;
468
469 let diff2_path = dir2.path().join("diff.txt");
470 let mut diff2 = fs::File::create(&diff2_path)?;
471 diff2.write_all(b"content 222")?; let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
474 let (tx, rx) = mpsc::channel();
475
476 comparer.compare_streaming(tx)?;
477
478 let mut results = Vec::new();
479 while let Ok(res) = rx.recv() {
480 results.push(res);
481 }
482
483 results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
484
485 assert_eq!(results.len(), 4);
486
487 assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
489 assert_eq!(results[0].classification, Classification::InBoth);
490 assert!(
491 results[0].is_content_same == Some(false)
492 || results[0].size_comparison != Some(Ordering::Equal)
493 );
494
495 assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
497 assert_eq!(results[1].classification, Classification::OnlyInDir1);
498
499 assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
501 assert_eq!(results[2].classification, Classification::OnlyInDir2);
502
503 assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
505 assert_eq!(results[3].classification, Classification::InBoth);
506 assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
507
508 Ok(())
509 }
510}