Skip to main content

compare_dir/
lib.rs

1use indicatif::{ProgressBar, ProgressStyle};
2use log::info;
3use rayon::prelude::*;
4use std::cmp::Ordering;
5use std::collections::HashMap;
6use std::fs;
7use std::io::{self, Read};
8use std::path::{Path, PathBuf};
9use std::sync::{Arc, Mutex, mpsc};
10use walkdir::WalkDir;
11
12/// How a file is classified during comparison.
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14pub enum Classification {
15    /// File exists only in the first directory.
16    OnlyInDir1,
17    /// File exists only in the second directory.
18    OnlyInDir2,
19    /// File exists in both directories.
20    InBoth,
21}
22
23/// Detailed result of comparing a single file.
24#[derive(Debug, Clone)]
25pub struct FileComparisonResult {
26    /// The path relative to the root of the directories.
27    pub relative_path: PathBuf,
28    /// Whether the file exists in one or both directories.
29    pub classification: Classification,
30    /// Comparison of the last modified time, if applicable.
31    pub modified_time_comparison: Option<Ordering>,
32    /// Comparison of the file size, if applicable.
33    pub size_comparison: Option<Ordering>,
34    /// Whether the content is byte-for-byte identical, if applicable.
35    pub is_content_same: Option<bool>,
36}
37
38impl FileComparisonResult {
39    pub fn new(relative_path: PathBuf, classification: Classification) -> Self {
40        Self {
41            relative_path,
42            classification,
43            modified_time_comparison: None,
44            size_comparison: None,
45            is_content_same: None,
46        }
47    }
48
49    pub fn is_identical(&self) -> bool {
50        self.classification == Classification::InBoth
51            && self.modified_time_comparison == Some(Ordering::Equal)
52            && self.size_comparison == Some(Ordering::Equal)
53            && self.is_content_same == Some(true)
54    }
55
56    pub fn to_string(&self, dir1_name: &str, dir2_name: &str) -> String {
57        let mut parts = Vec::new();
58        match self.classification {
59            Classification::OnlyInDir1 => parts.push(format!("Only in {}", dir1_name)),
60            Classification::OnlyInDir2 => parts.push(format!("Only in {}", dir2_name)),
61            Classification::InBoth => {}
62        }
63
64        if let Some(comp) = &self.modified_time_comparison {
65            match comp {
66                Ordering::Greater => parts.push(format!("{} is newer", dir1_name)),
67                Ordering::Less => parts.push(format!("{} is newer", dir2_name)),
68                Ordering::Equal => {}
69            }
70        }
71
72        if let Some(comp) = &self.size_comparison {
73            match comp {
74                Ordering::Greater => parts.push(format!("Size of {} is larger", dir1_name)),
75                Ordering::Less => parts.push(format!("Size of {} is larger", dir2_name)),
76                Ordering::Equal => {}
77            }
78        }
79
80        if let Some(same) = self.is_content_same
81            && !same
82        {
83            parts.push("Content differ".to_string());
84        }
85
86        format!("{}: {}", self.relative_path.display(), parts.join(", "))
87    }
88}
89
90#[derive(Default)]
91pub struct ComparisonSummary {
92    pub in_both: usize,
93    pub only_in_dir1: usize,
94    pub only_in_dir2: usize,
95    pub dir1_newer: usize,
96    pub dir2_newer: usize,
97    pub same_time_diff_size: usize,
98    pub same_time_size_diff_content: usize,
99}
100
101impl ComparisonSummary {
102    pub fn update(&mut self, result: &FileComparisonResult) {
103        match result.classification {
104            Classification::OnlyInDir1 => self.only_in_dir1 += 1,
105            Classification::OnlyInDir2 => self.only_in_dir2 += 1,
106            Classification::InBoth => {
107                self.in_both += 1;
108                match result.modified_time_comparison {
109                    Some(Ordering::Greater) => self.dir1_newer += 1,
110                    Some(Ordering::Less) => self.dir2_newer += 1,
111                    _ => {
112                        if result.size_comparison != Some(Ordering::Equal) {
113                            self.same_time_diff_size += 1;
114                        } else if result.is_content_same == Some(false) {
115                            self.same_time_size_diff_content += 1;
116                        }
117                    }
118                }
119            }
120        }
121    }
122
123    pub fn print(&self, dir1_name: &str, dir2_name: &str) {
124        println!("Files in both: {}", self.in_both);
125        println!("Files only in {}: {}", dir1_name, self.only_in_dir1);
126        println!("Files only in {}: {}", dir2_name, self.only_in_dir2);
127        println!(
128            "Files in both ({} is newer): {}",
129            dir1_name, self.dir1_newer
130        );
131        println!(
132            "Files in both ({} is newer): {}",
133            dir2_name, self.dir2_newer
134        );
135        println!(
136            "Files in both (same time, different size): {}",
137            self.same_time_diff_size
138        );
139        println!(
140            "Files in both (same time and size, different content): {}",
141            self.same_time_size_diff_content
142        );
143    }
144}
145
146/// A tool for comparing the contents of two directories.
147#[derive(Clone)]
148pub struct DirectoryComparer {
149    dir1: PathBuf,
150    dir2: PathBuf,
151    total_files: Arc<Mutex<usize>>,
152}
153
154impl DirectoryComparer {
155    /// Creates a new `DirectoryComparer` for the two given directories.
156    pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
157        Self {
158            dir1,
159            dir2,
160            total_files: Arc::new(Mutex::new(0)),
161        }
162    }
163
164    /// Sets the maximum number of threads for parallel processing.
165    /// This initializes the global Rayon thread pool.
166    pub fn set_max_threads(parallel: usize) -> anyhow::Result<()> {
167        rayon::ThreadPoolBuilder::new()
168            .num_threads(parallel)
169            .build_global()
170            .map_err(|e| anyhow::anyhow!("Failed to initialize thread pool: {}", e))?;
171        Ok(())
172    }
173
174    /// Executes the directory comparison and prints results to stdout.
175    /// This is a convenience method for CLI usage.
176    pub fn run(&self) -> anyhow::Result<()> {
177        let pb = ProgressBar::new_spinner();
178        pb.enable_steady_tick(std::time::Duration::from_millis(120));
179        pb.set_style(
180            ProgressStyle::with_template("{spinner:.green} [{elapsed_precise}] {msg}").unwrap(),
181        );
182        pb.set_message("Scanning directories...");
183
184        let start_time = std::time::Instant::now();
185        let mut summary = ComparisonSummary::default();
186        let dir1_str = self.dir1.to_str().unwrap_or("dir1");
187        let dir2_str = self.dir2.to_str().unwrap_or("dir2");
188
189        let (tx, rx) = mpsc::channel();
190        let comparer = self.clone();
191
192        std::thread::scope(|s| {
193            s.spawn(move || {
194                if let Err(e) = comparer.compare_streaming(tx) {
195                    eprintln!("Error during comparison: {}", e);
196                }
197            });
198
199            // Receive results and update summary/UI
200            let mut length_set = false;
201            while let Ok(result) = rx.recv() {
202                if !length_set {
203                    let total_files = *self.total_files.lock().unwrap();
204                    if total_files > 0 {
205                        pb.set_length(total_files as u64);
206                        pb.set_style(
207                            ProgressStyle::with_template(
208                                "[{elapsed_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} ({percent}%) {msg}",
209                            )
210                            .unwrap(),
211                        );
212                        pb.set_message("");
213                        length_set = true;
214                    }
215                }
216                summary.update(&result);
217                if !result.is_identical() {
218                    pb.suspend(|| {
219                        println!("{}", result.to_string(dir1_str, dir2_str));
220                    });
221                }
222                pb.inc(1);
223            }
224        });
225
226        pb.finish_and_clear();
227
228        eprintln!("\n--- Comparison Summary ---");
229        summary.print(dir1_str, dir2_str);
230        eprintln!("Comparison finished in {:?}.", start_time.elapsed());
231        Ok(())
232    }
233
234    fn get_files(dir: &Path) -> anyhow::Result<HashMap<PathBuf, PathBuf>> {
235        let mut files = HashMap::new();
236        for entry in WalkDir::new(dir).into_iter().filter_map(|e| e.ok()) {
237            if entry.file_type().is_file() {
238                let rel_path = entry.path().strip_prefix(dir)?.to_path_buf();
239                files.insert(rel_path, entry.path().to_path_buf());
240            }
241        }
242        Ok(files)
243    }
244
245    /// Performs the directory comparison and streams results via a channel.
246    ///
247    /// # Arguments
248    /// * `tx` - A sender to transmit `FileComparisonResult` as they are computed.
249    fn compare_streaming(&self, tx: mpsc::Sender<FileComparisonResult>) -> anyhow::Result<()> {
250        let (dir1_files, dir2_files) = rayon::join(
251            || {
252                info!("Scanning directory: {:?}", self.dir1);
253                Self::get_files(&self.dir1)
254            },
255            || {
256                info!("Scanning directory: {:?}", self.dir2);
257                Self::get_files(&self.dir2)
258            },
259        );
260        let dir1_files = dir1_files?;
261        let dir2_files = dir2_files?;
262
263        let mut all_rel_paths: Vec<_> = dir1_files.keys().chain(dir2_files.keys()).collect();
264        all_rel_paths.sort();
265        all_rel_paths.dedup();
266        let total_len = all_rel_paths.len();
267
268        *self.total_files.lock().unwrap() = all_rel_paths.len();
269
270        let (tx_unordered, rx_unordered) = mpsc::channel();
271
272        std::thread::scope(|s| {
273            s.spawn(|| {
274                self.compare_unordered_streaming(
275                    tx_unordered,
276                    all_rel_paths,
277                    &dir1_files,
278                    &dir2_files,
279                );
280            });
281
282            let mut buffer = HashMap::new();
283            let mut next_index = 0;
284            while next_index < total_len {
285                match rx_unordered.recv() {
286                    Ok((i, result)) => {
287                        if i == next_index {
288                            if tx.send(result).is_err() {
289                                break;
290                            }
291                            next_index += 1;
292                            while let Some(result) = buffer.remove(&next_index) {
293                                if tx.send(result).is_err() {
294                                    break;
295                                }
296                                next_index += 1;
297                            }
298                        } else {
299                            buffer.insert(i, result);
300                        }
301                    }
302                    Err(_) => {
303                        break;
304                    }
305                }
306            }
307        });
308
309        Ok(())
310    }
311
312    fn compare_unordered_streaming<'a>(
313        &self,
314        tx: mpsc::Sender<(usize, FileComparisonResult)>,
315        all_rel_paths: Vec<&'a PathBuf>,
316        dir1_files: &'a HashMap<PathBuf, PathBuf>,
317        dir2_files: &'a HashMap<PathBuf, PathBuf>,
318    ) {
319        all_rel_paths
320            .into_par_iter()
321            .enumerate()
322            .for_each(|(i, rel_path)| {
323                let in_dir1 = dir1_files.get(rel_path);
324                let in_dir2 = dir2_files.get(rel_path);
325
326                let result = match (in_dir1, in_dir2) {
327                    (Some(_), None) => {
328                        FileComparisonResult::new(rel_path.clone(), Classification::OnlyInDir1)
329                    }
330                    (None, Some(_)) => {
331                        FileComparisonResult::new(rel_path.clone(), Classification::OnlyInDir2)
332                    }
333                    (Some(p1), Some(p2)) => {
334                        let mut result =
335                            FileComparisonResult::new(rel_path.clone(), Classification::InBoth);
336                        let m1 = fs::metadata(p1).ok();
337                        let m2 = fs::metadata(p2).ok();
338
339                        if let (Some(m1), Some(m2)) = (m1, m2) {
340                            let t1 = m1.modified().ok();
341                            let t2 = m2.modified().ok();
342                            if let (Some(t1), Some(t2)) = (t1, t2) {
343                                result.modified_time_comparison = Some(t1.cmp(&t2));
344                            }
345
346                            let s1 = m1.len();
347                            let s2 = m2.len();
348                            result.size_comparison = Some(s1.cmp(&s2));
349
350                            if s1 == s2 {
351                                info!("Comparing content: {:?}", rel_path);
352                                result.is_content_same =
353                                    Some(compare_contents(p1, p2).unwrap_or(false));
354                            }
355                        }
356                        result
357                    }
358                    (None, None) => unreachable!(),
359                };
360                let _ = tx.send((i, result));
361            });
362    }
363}
364
365fn compare_contents(p1: &Path, p2: &Path) -> io::Result<bool> {
366    let mut f1 = fs::File::open(p1)?;
367    let mut f2 = fs::File::open(p2)?;
368
369    let mut buf1 = [0u8; 8192];
370    let mut buf2 = [0u8; 8192];
371
372    loop {
373        let n1 = f1.read(&mut buf1)?;
374        let n2 = f2.read(&mut buf2)?;
375
376        if n1 != n2 || buf1[..n1] != buf2[..n2] {
377            return Ok(false);
378        }
379
380        if n1 == 0 {
381            return Ok(true);
382        }
383    }
384}
385
386#[cfg(test)]
387mod tests {
388    use super::*;
389    use std::io::Write;
390    use tempfile::NamedTempFile;
391
392    #[test]
393    fn test_compare_contents_identical() -> io::Result<()> {
394        let mut f1 = NamedTempFile::new()?;
395        let mut f2 = NamedTempFile::new()?;
396        f1.write_all(b"hello world")?;
397        f2.write_all(b"hello world")?;
398        assert!(compare_contents(f1.path(), f2.path())?);
399        Ok(())
400    }
401
402    #[test]
403    fn test_compare_contents_different() -> io::Result<()> {
404        let mut f1 = NamedTempFile::new()?;
405        let mut f2 = NamedTempFile::new()?;
406        f1.write_all(b"hello world")?;
407        f2.write_all(b"hello rust")?;
408        assert!(!compare_contents(f1.path(), f2.path())?);
409        Ok(())
410    }
411
412    #[test]
413    fn test_compare_contents_different_size() -> io::Result<()> {
414        let mut f1 = NamedTempFile::new()?;
415        let mut f2 = NamedTempFile::new()?;
416        f1.write_all(b"hello world")?;
417        f2.write_all(b"hello")?;
418        // compare_contents assumes same size, but let's see what it does
419        assert!(!compare_contents(f1.path(), f2.path())?);
420        Ok(())
421    }
422
423    #[test]
424    fn test_comparison_summary() {
425        let mut summary = ComparisonSummary::default();
426        let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
427        let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
428        let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
429        res3.modified_time_comparison = Some(Ordering::Greater);
430
431        summary.update(&res1);
432        summary.update(&res2);
433        summary.update(&res3);
434
435        assert_eq!(summary.only_in_dir1, 1);
436        assert_eq!(summary.only_in_dir2, 1);
437        assert_eq!(summary.in_both, 1);
438        assert_eq!(summary.dir1_newer, 1);
439    }
440
441    #[test]
442    fn test_directory_comparer_integration() -> anyhow::Result<()> {
443        let dir1 = tempfile::tempdir()?;
444        let dir2 = tempfile::tempdir()?;
445
446        // Create files in dir1
447        let file1_path = dir1.path().join("same.txt");
448        let mut file1 = fs::File::create(&file1_path)?;
449        file1.write_all(b"same content")?;
450
451        let only1_path = dir1.path().join("only1.txt");
452        let mut only1 = fs::File::create(&only1_path)?;
453        only1.write_all(b"only in dir1")?;
454
455        // Create files in dir2
456        let file2_path = dir2.path().join("same.txt");
457        let mut file2 = fs::File::create(&file2_path)?;
458        file2.write_all(b"same content")?;
459
460        let only2_path = dir2.path().join("only2.txt");
461        let mut only2 = fs::File::create(&only2_path)?;
462        only2.write_all(b"only in dir2")?;
463
464        // Create a different file
465        let diff1_path = dir1.path().join("diff.txt");
466        let mut diff1 = fs::File::create(&diff1_path)?;
467        diff1.write_all(b"content 1")?;
468
469        let diff2_path = dir2.path().join("diff.txt");
470        let mut diff2 = fs::File::create(&diff2_path)?;
471        diff2.write_all(b"content 222")?; // different length and content
472
473        let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
474        let (tx, rx) = mpsc::channel();
475
476        comparer.compare_streaming(tx)?;
477
478        let mut results = Vec::new();
479        while let Ok(res) = rx.recv() {
480            results.push(res);
481        }
482
483        results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
484
485        assert_eq!(results.len(), 4);
486
487        // diff.txt
488        assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
489        assert_eq!(results[0].classification, Classification::InBoth);
490        assert!(
491            results[0].is_content_same == Some(false)
492                || results[0].size_comparison != Some(Ordering::Equal)
493        );
494
495        // only1.txt
496        assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
497        assert_eq!(results[1].classification, Classification::OnlyInDir1);
498
499        // only2.txt
500        assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
501        assert_eq!(results[2].classification, Classification::OnlyInDir2);
502
503        // same.txt
504        assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
505        assert_eq!(results[3].classification, Classification::InBoth);
506        assert_eq!(results[3].size_comparison, Some(Ordering::Equal));
507
508        Ok(())
509    }
510}