1use indicatif::{ProgressBar, ProgressStyle};
2use std::sync::{Arc, Mutex, mpsc};
3use std::path::{Path, PathBuf};
4use std::fs;
5use std::io::{self, Read};
6use rayon::prelude::*;
7use walkdir::WalkDir;
8use std::collections::HashMap;
9use log::info;
10
11#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum Classification {
14 OnlyInDir1,
16 OnlyInDir2,
18 InBoth,
20}
21
22#[derive(Debug, Clone, PartialEq, Eq)]
24pub enum Comparison {
25 Dir1Greater,
27 Dir2Greater,
29 Same,
31}
32
33impl Comparison {
34 pub fn from_values<T: PartialOrd>(v1: T, v2: T) -> Self {
35 if v1 > v2 {
36 Comparison::Dir1Greater
37 } else if v2 > v1 {
38 Comparison::Dir2Greater
39 } else {
40 Comparison::Same
41 }
42 }
43}
44
45#[derive(Debug, Clone)]
47pub struct FileComparisonResult {
48 pub relative_path: PathBuf,
50 pub classification: Classification,
52 pub modified_time_comparison: Option<Comparison>,
54 pub size_comparison: Option<Comparison>,
56 pub is_content_same: Option<bool>,
58}
59
60impl FileComparisonResult {
61 pub fn new(relative_path: PathBuf, classification: Classification) -> Self {
62 Self {
63 relative_path,
64 classification,
65 modified_time_comparison: None,
66 size_comparison: None,
67 is_content_same: None,
68 }
69 }
70
71 pub fn is_identical(&self) -> bool {
72 self.classification == Classification::InBoth
73 && self.modified_time_comparison == Some(Comparison::Same)
74 && self.size_comparison == Some(Comparison::Same)
75 && self.is_content_same == Some(true)
76 }
77
78 pub fn to_string(&self, dir1_name: &str, dir2_name: &str) -> String {
79 let mut parts = Vec::new();
80 match self.classification {
81 Classification::OnlyInDir1 => parts.push(format!("Only in {}", dir1_name)),
82 Classification::OnlyInDir2 => parts.push(format!("Only in {}", dir2_name)),
83 Classification::InBoth => {}
84 }
85
86 if let Some(comp) = &self.modified_time_comparison {
87 match comp {
88 Comparison::Dir1Greater => parts.push(format!("{} is newer", dir1_name)),
89 Comparison::Dir2Greater => parts.push(format!("{} is newer", dir2_name)),
90 Comparison::Same => {}
91 }
92 }
93
94 if let Some(comp) = &self.size_comparison {
95 match comp {
96 Comparison::Dir1Greater => parts.push(format!("Size of {} is larger", dir1_name)),
97 Comparison::Dir2Greater => parts.push(format!("Size of {} is larger", dir2_name)),
98 Comparison::Same => {}
99 }
100 }
101
102 if let Some(same) = self.is_content_same {
103 if !same {
104 parts.push("Content differ".to_string());
105 }
106 }
107
108 format!("{}: {}", self.relative_path.display(), parts.join(", "))
109 }
110}
111
112pub struct ComparisonSummary {
113 pub in_both: usize,
114 pub only_in_dir1: usize,
115 pub only_in_dir2: usize,
116 pub dir1_newer: usize,
117 pub dir2_newer: usize,
118 pub same_time_diff_size: usize,
119 pub same_time_size_diff_content: usize,
120}
121
122impl Default for ComparisonSummary {
123 fn default() -> Self {
124 Self {
125 in_both: 0,
126 only_in_dir1: 0,
127 only_in_dir2: 0,
128 dir1_newer: 0,
129 dir2_newer: 0,
130 same_time_diff_size: 0,
131 same_time_size_diff_content: 0,
132 }
133 }
134}
135
136impl ComparisonSummary {
137 pub fn update(&mut self, result: &FileComparisonResult) {
138 match result.classification {
139 Classification::OnlyInDir1 => self.only_in_dir1 += 1,
140 Classification::OnlyInDir2 => self.only_in_dir2 += 1,
141 Classification::InBoth => {
142 self.in_both += 1;
143 match result.modified_time_comparison {
144 Some(Comparison::Dir1Greater) => self.dir1_newer += 1,
145 Some(Comparison::Dir2Greater) => self.dir2_newer += 1,
146 _ => {
147 if result.size_comparison != Some(Comparison::Same) {
148 self.same_time_diff_size += 1;
149 } else if result.is_content_same == Some(false) {
150 self.same_time_size_diff_content += 1;
151 }
152 }
153 }
154 }
155 }
156 }
157
158 pub fn print(&self, dir1_name: &str, dir2_name: &str) {
159 println!("Files in both: {}", self.in_both);
160 println!("Files only in {}: {}", dir1_name, self.only_in_dir1);
161 println!("Files only in {}: {}", dir2_name, self.only_in_dir2);
162 println!("Files in both ({} is newer): {}", dir1_name, self.dir1_newer);
163 println!("Files in both ({} is newer): {}", dir2_name, self.dir2_newer);
164 println!(
165 "Files in both (same time, different size): {}",
166 self.same_time_diff_size
167 );
168 println!(
169 "Files in both (same time and size, different content): {}",
170 self.same_time_size_diff_content
171 );
172 }
173}
174
175pub struct DirectoryComparer {
177 dir1: PathBuf,
178 dir2: PathBuf,
179}
180
181impl DirectoryComparer {
182 pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
184 Self { dir1, dir2 }
185 }
186
187 pub fn set_max_threads(parallel: usize) -> anyhow::Result<()> {
190 rayon::ThreadPoolBuilder::new()
191 .num_threads(parallel)
192 .build_global()
193 .map_err(|e| anyhow::anyhow!("Failed to initialize thread pool: {}", e))?;
194 Ok(())
195 }
196
197 pub fn run(dir1: PathBuf, dir2: PathBuf) -> anyhow::Result<()> {
200
201 let pb_holder: Arc<Mutex<Option<ProgressBar>>> = Arc::new(Mutex::new(None));
202
203 let start_time = std::time::Instant::now();
204 let mut summary = ComparisonSummary::default();
205 let dir1_str = dir1.to_str().unwrap_or("dir1");
206 let dir2_str = dir2.to_str().unwrap_or("dir2");
207
208 let (tx, rx) = mpsc::channel();
209
210 let dir1_c = dir1.clone();
212 let dir2_c = dir2.clone();
213 let pb_holder_c = pb_holder.clone();
214
215 std::thread::spawn(move || {
216 let comparer = Self::new(dir1_c, dir2_c);
217 let on_total = move |total: usize| {
218 let pb = ProgressBar::new(total as u64);
219 pb.set_style(
220 ProgressStyle::with_template(
221 "[{elapsed_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}",
222 )
223 .unwrap()
224 .progress_chars("##-"),
225 );
226 *pb_holder_c.lock().unwrap() = Some(pb);
227 };
228
229 if let Err(e) = comparer.compare_streaming(on_total, tx) {
230 eprintln!("Error during comparison: {}", e);
231 }
232 });
233
234 while let Ok(result) = rx.recv() {
236 summary.update(&result);
237 if let Some(pb) = pb_holder.lock().unwrap().as_ref() {
238 if !result.is_identical() {
239 pb.suspend(|| {
240 println!("{}", result.to_string(dir1_str, dir2_str));
241 });
242 }
243 pb.inc(1);
244 } else {
245 if !result.is_identical() {
246 println!("{}", result.to_string(dir1_str, dir2_str));
247 }
248 }
249 }
250
251 if let Some(pb) = pb_holder.lock().unwrap().as_ref() {
252 pb.finish_and_clear();
253 }
254
255 eprintln!("\n--- Comparison Summary ---");
256 summary.print(dir1_str, dir2_str);
257 eprintln!("Comparison finished in {:?}.", start_time.elapsed());
258 Ok(())
259 }
260
261 fn get_files(dir: &Path) -> anyhow::Result<HashMap<PathBuf, PathBuf>> {
262 let mut files = HashMap::new();
263 for entry in WalkDir::new(dir).into_iter().filter_map(|e| e.ok()) {
264 if entry.file_type().is_file() {
265 let rel_path = entry.path().strip_prefix(dir)?.to_path_buf();
266 files.insert(rel_path, entry.path().to_path_buf());
267 }
268 }
269 Ok(files)
270 }
271
272 pub fn compare_streaming<F>(&self, on_total: F, tx: mpsc::Sender<FileComparisonResult>) -> anyhow::Result<()>
278 where
279 F: FnOnce(usize),
280 {
281 let (dir1_files, dir2_files) = rayon::join(
282 || {
283 info!("Scanning directory: {:?}", self.dir1);
284 Self::get_files(&self.dir1)
285 },
286 || {
287 info!("Scanning directory: {:?}", self.dir2);
288 Self::get_files(&self.dir2)
289 },
290 );
291 let dir1_files = dir1_files?;
292 let dir2_files = dir2_files?;
293
294 let mut all_rel_paths: Vec<_> = dir1_files.keys().chain(dir2_files.keys()).collect();
295 all_rel_paths.sort();
296 all_rel_paths.dedup();
297
298 on_total(all_rel_paths.len());
299
300 all_rel_paths.into_par_iter().for_each(|rel_path| {
301 let in_dir1 = dir1_files.get(rel_path);
302 let in_dir2 = dir2_files.get(rel_path);
303
304 let result = match (in_dir1, in_dir2) {
305 (Some(_), None) => {
306 FileComparisonResult::new(rel_path.clone(), Classification::OnlyInDir1)
307 }
308 (None, Some(_)) => {
309 FileComparisonResult::new(rel_path.clone(), Classification::OnlyInDir2)
310 }
311 (Some(p1), Some(p2)) => {
312 let mut result = FileComparisonResult::new(rel_path.clone(), Classification::InBoth);
313 let m1 = fs::metadata(p1).ok();
314 let m2 = fs::metadata(p2).ok();
315
316 if let (Some(m1), Some(m2)) = (m1, m2) {
317 let t1 = m1.modified().ok();
318 let t2 = m2.modified().ok();
319 if let (Some(t1), Some(t2)) = (t1, t2) {
320 result.modified_time_comparison = Some(Comparison::from_values(t1, t2));
321 }
322
323 let s1 = m1.len();
324 let s2 = m2.len();
325 result.size_comparison = Some(Comparison::from_values(s1, s2));
326
327 if s1 == s2 {
328 info!("Comparing content: {:?}", rel_path);
329 result.is_content_same = Some(compare_contents(p1, p2).unwrap_or(false));
330 }
331 }
332 result
333 }
334 (None, None) => unreachable!(),
335 };
336 let _ = tx.send(result);
337 });
338
339 Ok(())
340 }
341}
342
343fn compare_contents(p1: &Path, p2: &Path) -> io::Result<bool> {
344 let mut f1 = fs::File::open(p1)?;
345 let mut f2 = fs::File::open(p2)?;
346
347 let mut buf1 = [0u8; 8192];
348 let mut buf2 = [0u8; 8192];
349
350 loop {
351 let n1 = f1.read(&mut buf1)?;
352 let n2 = f2.read(&mut buf2)?;
353
354 if n1 != n2 || buf1[..n1] != buf2[..n2] {
355 return Ok(false);
356 }
357
358 if n1 == 0 {
359 return Ok(true);
360 }
361 }
362}
363
364#[cfg(test)]
365mod tests {
366 use super::*;
367 use std::io::Write;
368 use tempfile::NamedTempFile;
369
370 #[test]
371 fn test_compare_contents_identical() -> io::Result<()> {
372 let mut f1 = NamedTempFile::new()?;
373 let mut f2 = NamedTempFile::new()?;
374 f1.write_all(b"hello world")?;
375 f2.write_all(b"hello world")?;
376 assert!(compare_contents(f1.path(), f2.path())?);
377 Ok(())
378 }
379
380 #[test]
381 fn test_compare_contents_different() -> io::Result<()> {
382 let mut f1 = NamedTempFile::new()?;
383 let mut f2 = NamedTempFile::new()?;
384 f1.write_all(b"hello world")?;
385 f2.write_all(b"hello rust")?;
386 assert!(!compare_contents(f1.path(), f2.path())?);
387 Ok(())
388 }
389
390 #[test]
391 fn test_compare_contents_different_size() -> io::Result<()> {
392 let mut f1 = NamedTempFile::new()?;
393 let mut f2 = NamedTempFile::new()?;
394 f1.write_all(b"hello world")?;
395 f2.write_all(b"hello")?;
396 assert!(!compare_contents(f1.path(), f2.path())?);
398 Ok(())
399 }
400
401 #[test]
402 fn test_comparison_summary() {
403 let mut summary = ComparisonSummary::default();
404 let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
405 let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
406 let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
407 res3.modified_time_comparison = Some(Comparison::Dir1Greater);
408
409 summary.update(&res1);
410 summary.update(&res2);
411 summary.update(&res3);
412
413 assert_eq!(summary.only_in_dir1, 1);
414 assert_eq!(summary.only_in_dir2, 1);
415 assert_eq!(summary.in_both, 1);
416 assert_eq!(summary.dir1_newer, 1);
417 }
418
419 #[test]
420 fn test_directory_comparer_integration() -> anyhow::Result<()> {
421 let dir1 = tempfile::tempdir()?;
422 let dir2 = tempfile::tempdir()?;
423
424 let file1_path = dir1.path().join("same.txt");
426 let mut file1 = fs::File::create(&file1_path)?;
427 file1.write_all(b"same content")?;
428
429 let only1_path = dir1.path().join("only1.txt");
430 let mut only1 = fs::File::create(&only1_path)?;
431 only1.write_all(b"only in dir1")?;
432
433 let file2_path = dir2.path().join("same.txt");
435 let mut file2 = fs::File::create(&file2_path)?;
436 file2.write_all(b"same content")?;
437
438 let only2_path = dir2.path().join("only2.txt");
439 let mut only2 = fs::File::create(&only2_path)?;
440 only2.write_all(b"only in dir2")?;
441
442 let diff1_path = dir1.path().join("diff.txt");
444 let mut diff1 = fs::File::create(&diff1_path)?;
445 diff1.write_all(b"content 1")?;
446
447 let diff2_path = dir2.path().join("diff.txt");
448 let mut diff2 = fs::File::create(&diff2_path)?;
449 diff2.write_all(b"content 222")?; let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
452 let (tx, rx) = mpsc::channel();
453
454 comparer.compare_streaming(|_| {}, tx)?;
455
456 let mut results = Vec::new();
457 while let Ok(res) = rx.recv() {
458 results.push(res);
459 }
460
461 results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
462
463 assert_eq!(results.len(), 4);
464
465 assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
467 assert_eq!(results[0].classification, Classification::InBoth);
468 assert!(results[0].is_content_same == Some(false) || results[0].size_comparison != Some(Comparison::Same));
469
470 assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
472 assert_eq!(results[1].classification, Classification::OnlyInDir1);
473
474 assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
476 assert_eq!(results[2].classification, Classification::OnlyInDir2);
477
478 assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
480 assert_eq!(results[3].classification, Classification::InBoth);
481 assert_eq!(results[3].size_comparison, Some(Comparison::Same));
482
483 Ok(())
484 }
485}