Skip to main content

compare_dir/
lib.rs

1use indicatif::{ProgressBar, ProgressStyle};
2use log::info;
3use rayon::prelude::*;
4use std::collections::HashMap;
5use std::fs;
6use std::io::{self, Read};
7use std::path::{Path, PathBuf};
8use std::sync::{Arc, Mutex, mpsc};
9use walkdir::WalkDir;
10
11/// How a file is classified during comparison.
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
13pub enum Classification {
14    /// File exists only in the first directory.
15    OnlyInDir1,
16    /// File exists only in the second directory.
17    OnlyInDir2,
18    /// File exists in both directories.
19    InBoth,
20}
21
22/// The result of comparing two values (e.g., size or modified time).
23#[derive(Debug, Clone, PartialEq, Eq)]
24pub enum Comparison {
25    /// The value in the first directory is greater.
26    Dir1Greater,
27    /// The value in the second directory is greater.
28    Dir2Greater,
29    /// The values are equal.
30    Same,
31}
32
33impl Comparison {
34    pub fn from_values<T: PartialOrd>(v1: T, v2: T) -> Self {
35        if v1 > v2 {
36            Comparison::Dir1Greater
37        } else if v2 > v1 {
38            Comparison::Dir2Greater
39        } else {
40            Comparison::Same
41        }
42    }
43}
44
45/// Detailed result of comparing a single file.
46#[derive(Debug, Clone)]
47pub struct FileComparisonResult {
48    /// The path relative to the root of the directories.
49    pub relative_path: PathBuf,
50    /// Whether the file exists in one or both directories.
51    pub classification: Classification,
52    /// Comparison of the last modified time, if applicable.
53    pub modified_time_comparison: Option<Comparison>,
54    /// Comparison of the file size, if applicable.
55    pub size_comparison: Option<Comparison>,
56    /// Whether the content is byte-for-byte identical, if applicable.
57    pub is_content_same: Option<bool>,
58}
59
60impl FileComparisonResult {
61    pub fn new(relative_path: PathBuf, classification: Classification) -> Self {
62        Self {
63            relative_path,
64            classification,
65            modified_time_comparison: None,
66            size_comparison: None,
67            is_content_same: None,
68        }
69    }
70
71    pub fn is_identical(&self) -> bool {
72        self.classification == Classification::InBoth
73            && self.modified_time_comparison == Some(Comparison::Same)
74            && self.size_comparison == Some(Comparison::Same)
75            && self.is_content_same == Some(true)
76    }
77
78    pub fn to_string(&self, dir1_name: &str, dir2_name: &str) -> String {
79        let mut parts = Vec::new();
80        match self.classification {
81            Classification::OnlyInDir1 => parts.push(format!("Only in {}", dir1_name)),
82            Classification::OnlyInDir2 => parts.push(format!("Only in {}", dir2_name)),
83            Classification::InBoth => {}
84        }
85
86        if let Some(comp) = &self.modified_time_comparison {
87            match comp {
88                Comparison::Dir1Greater => parts.push(format!("{} is newer", dir1_name)),
89                Comparison::Dir2Greater => parts.push(format!("{} is newer", dir2_name)),
90                Comparison::Same => {}
91            }
92        }
93
94        if let Some(comp) = &self.size_comparison {
95            match comp {
96                Comparison::Dir1Greater => parts.push(format!("Size of {} is larger", dir1_name)),
97                Comparison::Dir2Greater => parts.push(format!("Size of {} is larger", dir2_name)),
98                Comparison::Same => {}
99            }
100        }
101
102        if let Some(same) = self.is_content_same
103            && !same
104        {
105            parts.push("Content differ".to_string());
106        }
107
108        format!("{}: {}", self.relative_path.display(), parts.join(", "))
109    }
110}
111
112#[derive(Default)]
113pub struct ComparisonSummary {
114    pub in_both: usize,
115    pub only_in_dir1: usize,
116    pub only_in_dir2: usize,
117    pub dir1_newer: usize,
118    pub dir2_newer: usize,
119    pub same_time_diff_size: usize,
120    pub same_time_size_diff_content: usize,
121}
122
123impl ComparisonSummary {
124    pub fn update(&mut self, result: &FileComparisonResult) {
125        match result.classification {
126            Classification::OnlyInDir1 => self.only_in_dir1 += 1,
127            Classification::OnlyInDir2 => self.only_in_dir2 += 1,
128            Classification::InBoth => {
129                self.in_both += 1;
130                match result.modified_time_comparison {
131                    Some(Comparison::Dir1Greater) => self.dir1_newer += 1,
132                    Some(Comparison::Dir2Greater) => self.dir2_newer += 1,
133                    _ => {
134                        if result.size_comparison != Some(Comparison::Same) {
135                            self.same_time_diff_size += 1;
136                        } else if result.is_content_same == Some(false) {
137                            self.same_time_size_diff_content += 1;
138                        }
139                    }
140                }
141            }
142        }
143    }
144
145    pub fn print(&self, dir1_name: &str, dir2_name: &str) {
146        println!("Files in both: {}", self.in_both);
147        println!("Files only in {}: {}", dir1_name, self.only_in_dir1);
148        println!("Files only in {}: {}", dir2_name, self.only_in_dir2);
149        println!(
150            "Files in both ({} is newer): {}",
151            dir1_name, self.dir1_newer
152        );
153        println!(
154            "Files in both ({} is newer): {}",
155            dir2_name, self.dir2_newer
156        );
157        println!(
158            "Files in both (same time, different size): {}",
159            self.same_time_diff_size
160        );
161        println!(
162            "Files in both (same time and size, different content): {}",
163            self.same_time_size_diff_content
164        );
165    }
166}
167
168/// A tool for comparing the contents of two directories.
169pub struct DirectoryComparer {
170    dir1: PathBuf,
171    dir2: PathBuf,
172}
173
174impl DirectoryComparer {
175    /// Creates a new `DirectoryComparer` for the two given directories.
176    pub fn new(dir1: PathBuf, dir2: PathBuf) -> Self {
177        Self { dir1, dir2 }
178    }
179
180    /// Sets the maximum number of threads for parallel processing.
181    /// This initializes the global Rayon thread pool.
182    pub fn set_max_threads(parallel: usize) -> anyhow::Result<()> {
183        rayon::ThreadPoolBuilder::new()
184            .num_threads(parallel)
185            .build_global()
186            .map_err(|e| anyhow::anyhow!("Failed to initialize thread pool: {}", e))?;
187        Ok(())
188    }
189
190    /// Executes the directory comparison and prints results to stdout.
191    /// This is a convenience method for CLI usage.
192    pub fn run(dir1: PathBuf, dir2: PathBuf) -> anyhow::Result<()> {
193        let pb_holder: Arc<Mutex<Option<ProgressBar>>> = Arc::new(Mutex::new(None));
194
195        let start_time = std::time::Instant::now();
196        let mut summary = ComparisonSummary::default();
197        let dir1_str = dir1.to_str().unwrap_or("dir1");
198        let dir2_str = dir2.to_str().unwrap_or("dir2");
199
200        let (tx, rx) = mpsc::channel();
201
202        // Run comparison in a separate thread or use rayon::spawn
203        let dir1_c = dir1.clone();
204        let dir2_c = dir2.clone();
205        let pb_holder_c = pb_holder.clone();
206
207        std::thread::spawn(move || {
208            let comparer = Self::new(dir1_c, dir2_c);
209            let on_total = move |total: usize| {
210                let pb = ProgressBar::new(total as u64);
211                pb.set_style(
212                    ProgressStyle::with_template(
213                        "[{elapsed_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}",
214                    )
215                    .unwrap()
216                    .progress_chars("##-"),
217                );
218                *pb_holder_c.lock().unwrap() = Some(pb);
219            };
220
221            if let Err(e) = comparer.compare_streaming(on_total, tx) {
222                eprintln!("Error during comparison: {}", e);
223            }
224        });
225
226        // Receive results and update summary/UI
227        while let Ok(result) = rx.recv() {
228            summary.update(&result);
229            if let Some(pb) = pb_holder.lock().unwrap().as_ref() {
230                if !result.is_identical() {
231                    pb.suspend(|| {
232                        println!("{}", result.to_string(dir1_str, dir2_str));
233                    });
234                }
235                pb.inc(1);
236            } else if !result.is_identical() {
237                println!("{}", result.to_string(dir1_str, dir2_str));
238            }
239        }
240
241        if let Some(pb) = pb_holder.lock().unwrap().as_ref() {
242            pb.finish_and_clear();
243        }
244
245        eprintln!("\n--- Comparison Summary ---");
246        summary.print(dir1_str, dir2_str);
247        eprintln!("Comparison finished in {:?}.", start_time.elapsed());
248        Ok(())
249    }
250
251    fn get_files(dir: &Path) -> anyhow::Result<HashMap<PathBuf, PathBuf>> {
252        let mut files = HashMap::new();
253        for entry in WalkDir::new(dir).into_iter().filter_map(|e| e.ok()) {
254            if entry.file_type().is_file() {
255                let rel_path = entry.path().strip_prefix(dir)?.to_path_buf();
256                files.insert(rel_path, entry.path().to_path_buf());
257            }
258        }
259        Ok(files)
260    }
261
262    /// Performs the directory comparison and streams results via a channel.
263    ///
264    /// # Arguments
265    /// * `on_total` - A callback triggered with the total number of files to be compared.
266    /// * `tx` - A sender to transmit `FileComparisonResult` as they are computed.
267    pub fn compare_streaming<F>(
268        &self,
269        on_total: F,
270        tx: mpsc::Sender<FileComparisonResult>,
271    ) -> anyhow::Result<()>
272    where
273        F: FnOnce(usize),
274    {
275        let (dir1_files, dir2_files) = rayon::join(
276            || {
277                info!("Scanning directory: {:?}", self.dir1);
278                Self::get_files(&self.dir1)
279            },
280            || {
281                info!("Scanning directory: {:?}", self.dir2);
282                Self::get_files(&self.dir2)
283            },
284        );
285        let dir1_files = dir1_files?;
286        let dir2_files = dir2_files?;
287
288        let mut all_rel_paths: Vec<_> = dir1_files.keys().chain(dir2_files.keys()).collect();
289        all_rel_paths.sort();
290        all_rel_paths.dedup();
291
292        on_total(all_rel_paths.len());
293
294        all_rel_paths.into_par_iter().for_each(|rel_path| {
295            let in_dir1 = dir1_files.get(rel_path);
296            let in_dir2 = dir2_files.get(rel_path);
297
298            let result = match (in_dir1, in_dir2) {
299                (Some(_), None) => {
300                    FileComparisonResult::new(rel_path.clone(), Classification::OnlyInDir1)
301                }
302                (None, Some(_)) => {
303                    FileComparisonResult::new(rel_path.clone(), Classification::OnlyInDir2)
304                }
305                (Some(p1), Some(p2)) => {
306                    let mut result =
307                        FileComparisonResult::new(rel_path.clone(), Classification::InBoth);
308                    let m1 = fs::metadata(p1).ok();
309                    let m2 = fs::metadata(p2).ok();
310
311                    if let (Some(m1), Some(m2)) = (m1, m2) {
312                        let t1 = m1.modified().ok();
313                        let t2 = m2.modified().ok();
314                        if let (Some(t1), Some(t2)) = (t1, t2) {
315                            result.modified_time_comparison = Some(Comparison::from_values(t1, t2));
316                        }
317
318                        let s1 = m1.len();
319                        let s2 = m2.len();
320                        result.size_comparison = Some(Comparison::from_values(s1, s2));
321
322                        if s1 == s2 {
323                            info!("Comparing content: {:?}", rel_path);
324                            result.is_content_same =
325                                Some(compare_contents(p1, p2).unwrap_or(false));
326                        }
327                    }
328                    result
329                }
330                (None, None) => unreachable!(),
331            };
332            let _ = tx.send(result);
333        });
334
335        Ok(())
336    }
337}
338
339fn compare_contents(p1: &Path, p2: &Path) -> io::Result<bool> {
340    let mut f1 = fs::File::open(p1)?;
341    let mut f2 = fs::File::open(p2)?;
342
343    let mut buf1 = [0u8; 8192];
344    let mut buf2 = [0u8; 8192];
345
346    loop {
347        let n1 = f1.read(&mut buf1)?;
348        let n2 = f2.read(&mut buf2)?;
349
350        if n1 != n2 || buf1[..n1] != buf2[..n2] {
351            return Ok(false);
352        }
353
354        if n1 == 0 {
355            return Ok(true);
356        }
357    }
358}
359
360#[cfg(test)]
361mod tests {
362    use super::*;
363    use std::io::Write;
364    use tempfile::NamedTempFile;
365
366    #[test]
367    fn test_compare_contents_identical() -> io::Result<()> {
368        let mut f1 = NamedTempFile::new()?;
369        let mut f2 = NamedTempFile::new()?;
370        f1.write_all(b"hello world")?;
371        f2.write_all(b"hello world")?;
372        assert!(compare_contents(f1.path(), f2.path())?);
373        Ok(())
374    }
375
376    #[test]
377    fn test_compare_contents_different() -> io::Result<()> {
378        let mut f1 = NamedTempFile::new()?;
379        let mut f2 = NamedTempFile::new()?;
380        f1.write_all(b"hello world")?;
381        f2.write_all(b"hello rust")?;
382        assert!(!compare_contents(f1.path(), f2.path())?);
383        Ok(())
384    }
385
386    #[test]
387    fn test_compare_contents_different_size() -> io::Result<()> {
388        let mut f1 = NamedTempFile::new()?;
389        let mut f2 = NamedTempFile::new()?;
390        f1.write_all(b"hello world")?;
391        f2.write_all(b"hello")?;
392        // compare_contents assumes same size, but let's see what it does
393        assert!(!compare_contents(f1.path(), f2.path())?);
394        Ok(())
395    }
396
397    #[test]
398    fn test_comparison_summary() {
399        let mut summary = ComparisonSummary::default();
400        let res1 = FileComparisonResult::new(PathBuf::from("a"), Classification::OnlyInDir1);
401        let res2 = FileComparisonResult::new(PathBuf::from("b"), Classification::OnlyInDir2);
402        let mut res3 = FileComparisonResult::new(PathBuf::from("c"), Classification::InBoth);
403        res3.modified_time_comparison = Some(Comparison::Dir1Greater);
404
405        summary.update(&res1);
406        summary.update(&res2);
407        summary.update(&res3);
408
409        assert_eq!(summary.only_in_dir1, 1);
410        assert_eq!(summary.only_in_dir2, 1);
411        assert_eq!(summary.in_both, 1);
412        assert_eq!(summary.dir1_newer, 1);
413    }
414
415    #[test]
416    fn test_directory_comparer_integration() -> anyhow::Result<()> {
417        let dir1 = tempfile::tempdir()?;
418        let dir2 = tempfile::tempdir()?;
419
420        // Create files in dir1
421        let file1_path = dir1.path().join("same.txt");
422        let mut file1 = fs::File::create(&file1_path)?;
423        file1.write_all(b"same content")?;
424
425        let only1_path = dir1.path().join("only1.txt");
426        let mut only1 = fs::File::create(&only1_path)?;
427        only1.write_all(b"only in dir1")?;
428
429        // Create files in dir2
430        let file2_path = dir2.path().join("same.txt");
431        let mut file2 = fs::File::create(&file2_path)?;
432        file2.write_all(b"same content")?;
433
434        let only2_path = dir2.path().join("only2.txt");
435        let mut only2 = fs::File::create(&only2_path)?;
436        only2.write_all(b"only in dir2")?;
437
438        // Create a different file
439        let diff1_path = dir1.path().join("diff.txt");
440        let mut diff1 = fs::File::create(&diff1_path)?;
441        diff1.write_all(b"content 1")?;
442
443        let diff2_path = dir2.path().join("diff.txt");
444        let mut diff2 = fs::File::create(&diff2_path)?;
445        diff2.write_all(b"content 222")?; // different length and content
446
447        let comparer = DirectoryComparer::new(dir1.path().to_path_buf(), dir2.path().to_path_buf());
448        let (tx, rx) = mpsc::channel();
449
450        comparer.compare_streaming(|_| {}, tx)?;
451
452        let mut results = Vec::new();
453        while let Ok(res) = rx.recv() {
454            results.push(res);
455        }
456
457        results.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
458
459        assert_eq!(results.len(), 4);
460
461        // diff.txt
462        assert_eq!(results[0].relative_path.to_str().unwrap(), "diff.txt");
463        assert_eq!(results[0].classification, Classification::InBoth);
464        assert!(
465            results[0].is_content_same == Some(false)
466                || results[0].size_comparison != Some(Comparison::Same)
467        );
468
469        // only1.txt
470        assert_eq!(results[1].relative_path.to_str().unwrap(), "only1.txt");
471        assert_eq!(results[1].classification, Classification::OnlyInDir1);
472
473        // only2.txt
474        assert_eq!(results[2].relative_path.to_str().unwrap(), "only2.txt");
475        assert_eq!(results[2].classification, Classification::OnlyInDir2);
476
477        // same.txt
478        assert_eq!(results[3].relative_path.to_str().unwrap(), "same.txt");
479        assert_eq!(results[3].classification, Classification::InBoth);
480        assert_eq!(results[3].size_comparison, Some(Comparison::Same));
481
482        Ok(())
483    }
484}